Compare commits
No commits in common. "730b9775a30097c6513be1a82b0c2f3ba7bdf747" and "f306baad355946cf4befec64c6ae09397c907520" have entirely different histories.
730b9775a3
...
f306baad35
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,8 +1,2 @@
|
|||||||
*.md
|
*.md
|
||||||
!README.md
|
!README.md
|
||||||
!knowledge/**/*.md
|
|
||||||
!services/comms/knowledge/**/*.md
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
.pytest_cache
|
|
||||||
.venv
|
|
||||||
|
|||||||
@ -5,9 +5,8 @@ resources:
|
|||||||
- ../../services/crypto
|
- ../../services/crypto
|
||||||
- ../../services/gitea
|
- ../../services/gitea
|
||||||
- ../../services/jellyfin
|
- ../../services/jellyfin
|
||||||
- ../../services/comms
|
- ../../services/jitsi
|
||||||
- ../../services/monitoring
|
- ../../services/monitoring
|
||||||
- ../../services/logging
|
|
||||||
- ../../services/pegasus
|
- ../../services/pegasus
|
||||||
- ../../services/vault
|
- ../../services/vault
|
||||||
- ../../services/bstein-dev-home
|
- ../../services/bstein-dev-home
|
||||||
|
|||||||
@ -1,23 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: ai-llm
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
path: ./services/ai-llm
|
|
||||||
targetNamespace: ai
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
namespace: flux-system
|
|
||||||
wait: true
|
|
||||||
healthChecks:
|
|
||||||
- apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
name: ollama
|
|
||||||
namespace: ai
|
|
||||||
dependsOn:
|
|
||||||
- name: core
|
|
||||||
@ -0,0 +1,26 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageUpdateAutomation
|
||||||
|
metadata:
|
||||||
|
name: ci-demo
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1m0s
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
git:
|
||||||
|
checkout:
|
||||||
|
ref:
|
||||||
|
branch: feature/ci-gitops
|
||||||
|
commit:
|
||||||
|
author:
|
||||||
|
email: ops@bstein.dev
|
||||||
|
name: flux-bot
|
||||||
|
messageTemplate: "chore(ci-demo): apply image updates"
|
||||||
|
push:
|
||||||
|
branch: feature/ci-gitops
|
||||||
|
update:
|
||||||
|
strategy: Setters
|
||||||
|
path: services/ci-demo
|
||||||
@ -1,14 +1,17 @@
|
|||||||
# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml
|
# clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
metadata:
|
metadata:
|
||||||
name: maintenance
|
name: ci-demo
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 10m
|
interval: 10m
|
||||||
path: ./services/maintenance
|
path: ./services/ci-demo
|
||||||
prune: true
|
prune: true
|
||||||
sourceRef:
|
sourceRef:
|
||||||
kind: GitRepository
|
kind: GitRepository
|
||||||
name: flux-system
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
dependsOn:
|
||||||
|
- name: core
|
||||||
wait: false
|
wait: false
|
||||||
@ -1,17 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/communication/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: comms
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
path: ./services/comms
|
|
||||||
targetNamespace: comms
|
|
||||||
timeout: 2m
|
|
||||||
dependsOn:
|
|
||||||
- name: traefik
|
|
||||||
@ -15,6 +15,5 @@ spec:
|
|||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- name: core
|
- name: core
|
||||||
- name: openldap
|
|
||||||
wait: true
|
wait: true
|
||||||
timeout: 5m
|
timeout: 5m
|
||||||
|
|||||||
@ -16,12 +16,8 @@ spec:
|
|||||||
- name: helm
|
- name: helm
|
||||||
- name: traefik
|
- name: traefik
|
||||||
healthChecks:
|
healthChecks:
|
||||||
- apiVersion: apps/v1
|
- apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
kind: Deployment
|
kind: HelmRelease
|
||||||
name: jenkins
|
|
||||||
namespace: jenkins
|
|
||||||
- apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
name: jenkins
|
name: jenkins
|
||||||
namespace: jenkins
|
namespace: jenkins
|
||||||
wait: false
|
wait: false
|
||||||
|
|||||||
@ -1,18 +1,18 @@
|
|||||||
# clusters/atlas/flux-system/applications/openldap/kustomization.yaml
|
# clusters/atlas/flux-system/applications/jitsi/kustomization.yaml
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
metadata:
|
metadata:
|
||||||
name: openldap
|
name: jitsi
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 10m
|
interval: 10m
|
||||||
|
path: ./services/jitsi
|
||||||
|
targetNamespace: jitsi
|
||||||
prune: true
|
prune: true
|
||||||
sourceRef:
|
sourceRef:
|
||||||
kind: GitRepository
|
kind: GitRepository
|
||||||
name: flux-system
|
name: flux-system
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
path: ./services/openldap
|
|
||||||
targetNamespace: sso
|
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- name: core
|
- name: core
|
||||||
wait: true
|
wait: true
|
||||||
@ -4,8 +4,7 @@ kind: Kustomization
|
|||||||
resources:
|
resources:
|
||||||
- gitea/kustomization.yaml
|
- gitea/kustomization.yaml
|
||||||
- vault/kustomization.yaml
|
- vault/kustomization.yaml
|
||||||
- vaultwarden/kustomization.yaml
|
- jitsi/kustomization.yaml
|
||||||
- comms/kustomization.yaml
|
|
||||||
- crypto/kustomization.yaml
|
- crypto/kustomization.yaml
|
||||||
- monerod/kustomization.yaml
|
- monerod/kustomization.yaml
|
||||||
- pegasus/kustomization.yaml
|
- pegasus/kustomization.yaml
|
||||||
@ -17,14 +16,9 @@ resources:
|
|||||||
- jellyfin/kustomization.yaml
|
- jellyfin/kustomization.yaml
|
||||||
- xmr-miner/kustomization.yaml
|
- xmr-miner/kustomization.yaml
|
||||||
- sui-metrics/kustomization.yaml
|
- sui-metrics/kustomization.yaml
|
||||||
- openldap/kustomization.yaml
|
|
||||||
- keycloak/kustomization.yaml
|
- keycloak/kustomization.yaml
|
||||||
- oauth2-proxy/kustomization.yaml
|
- oauth2-proxy/kustomization.yaml
|
||||||
- mailu/kustomization.yaml
|
- mailu/kustomization.yaml
|
||||||
- jenkins/kustomization.yaml
|
- jenkins/kustomization.yaml
|
||||||
- ai-llm/kustomization.yaml
|
- ci-demo/kustomization.yaml
|
||||||
- nextcloud/kustomization.yaml
|
- ci-demo/image-automation.yaml
|
||||||
- nextcloud-mail-sync/kustomization.yaml
|
|
||||||
- postgres/kustomization.yaml
|
|
||||||
- outline/kustomization.yaml
|
|
||||||
- planka/kustomization.yaml
|
|
||||||
|
|||||||
@ -1,17 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: nextcloud-mail-sync
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
path: ./services/nextcloud-mail-sync
|
|
||||||
targetNamespace: nextcloud
|
|
||||||
timeout: 2m
|
|
||||||
dependsOn:
|
|
||||||
- name: keycloak
|
|
||||||
@ -1,16 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: nextcloud
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
path: ./services/nextcloud
|
|
||||||
targetNamespace: nextcloud
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
namespace: flux-system
|
|
||||||
wait: true
|
|
||||||
@ -1,28 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/outline/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: outline
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
path: ./services/outline
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
targetNamespace: outline
|
|
||||||
dependsOn:
|
|
||||||
- name: keycloak
|
|
||||||
- name: mailu
|
|
||||||
- name: traefik
|
|
||||||
healthChecks:
|
|
||||||
- apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
name: outline
|
|
||||||
namespace: outline
|
|
||||||
- apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
name: outline
|
|
||||||
namespace: outline
|
|
||||||
wait: false
|
|
||||||
@ -1,28 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/planka/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: planka
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
path: ./services/planka
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
targetNamespace: planka
|
|
||||||
dependsOn:
|
|
||||||
- name: keycloak
|
|
||||||
- name: mailu
|
|
||||||
- name: traefik
|
|
||||||
healthChecks:
|
|
||||||
- apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
name: planka
|
|
||||||
namespace: planka
|
|
||||||
- apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
name: planka
|
|
||||||
namespace: planka
|
|
||||||
wait: false
|
|
||||||
@ -1,24 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/postgres/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: postgres
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
path: ./services/postgres
|
|
||||||
prune: true
|
|
||||||
force: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
targetNamespace: postgres
|
|
||||||
dependsOn:
|
|
||||||
- name: vault
|
|
||||||
- name: vault-csi
|
|
||||||
healthChecks:
|
|
||||||
- apiVersion: apps/v1
|
|
||||||
kind: StatefulSet
|
|
||||||
name: postgres
|
|
||||||
namespace: postgres
|
|
||||||
wait: true
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: vaultwarden
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
suspend: false
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
namespace: flux-system
|
|
||||||
path: ./services/vaultwarden
|
|
||||||
targetNamespace: vaultwarden
|
|
||||||
prune: true
|
|
||||||
wait: true
|
|
||||||
dependsOn:
|
|
||||||
- name: helm
|
|
||||||
- name: traefik
|
|
||||||
@ -8,7 +8,7 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
interval: 1m0s
|
interval: 1m0s
|
||||||
ref:
|
ref:
|
||||||
branch: feature/sso-hardening
|
branch: main
|
||||||
secretRef:
|
secretRef:
|
||||||
name: flux-system-gitea
|
name: flux-system-gitea
|
||||||
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
||||||
|
|||||||
@ -4,11 +4,7 @@ kind: Kustomization
|
|||||||
resources:
|
resources:
|
||||||
- core/kustomization.yaml
|
- core/kustomization.yaml
|
||||||
- helm/kustomization.yaml
|
- helm/kustomization.yaml
|
||||||
- metallb/kustomization.yaml
|
|
||||||
- traefik/kustomization.yaml
|
- traefik/kustomization.yaml
|
||||||
- gitops-ui/kustomization.yaml
|
- gitops-ui/kustomization.yaml
|
||||||
- monitoring/kustomization.yaml
|
- monitoring/kustomization.yaml
|
||||||
- logging/kustomization.yaml
|
|
||||||
- maintenance/kustomization.yaml
|
|
||||||
- longhorn-ui/kustomization.yaml
|
- longhorn-ui/kustomization.yaml
|
||||||
- ../platform/vault-csi/kustomization.yaml
|
|
||||||
|
|||||||
@ -1,14 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/platform/logging/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: logging
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
path: ./services/logging
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
wait: false
|
|
||||||
@ -1,16 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/platform/metallb/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: metallb
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 30m
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
namespace: flux-system
|
|
||||||
path: ./infrastructure/metallb
|
|
||||||
prune: true
|
|
||||||
wait: true
|
|
||||||
targetNamespace: metallb-system
|
|
||||||
@ -15,5 +15,4 @@ spec:
|
|||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- name: core
|
- name: core
|
||||||
- name: metallb
|
|
||||||
wait: true
|
wait: true
|
||||||
|
|||||||
@ -1,16 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml
|
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: vault-csi
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 30m
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
namespace: flux-system
|
|
||||||
path: ./infrastructure/vault-csi
|
|
||||||
prune: true
|
|
||||||
wait: true
|
|
||||||
targetNamespace: kube-system
|
|
||||||
@ -5,4 +5,3 @@ resources:
|
|||||||
- ../../../infrastructure/modules/base
|
- ../../../infrastructure/modules/base
|
||||||
- ../../../infrastructure/modules/profiles/atlas-ha
|
- ../../../infrastructure/modules/profiles/atlas-ha
|
||||||
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
|
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
|
||||||
- ../../../infrastructure/metallb
|
|
||||||
|
|||||||
@ -1,16 +0,0 @@
|
|||||||
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
|
|
||||||
|
|
||||||
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
|
|
||||||
|
|
||||||
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
|
|
||||||
|
|
||||||
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
|
|
||||||
&& mkdir -p /var/log/data-prepper
|
|
||||||
|
|
||||||
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
|
|
||||||
|
|
||||||
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
|
|
||||||
|
|
||||||
USER 10001
|
|
||||||
WORKDIR /usr/share/data-prepper
|
|
||||||
CMD ["bin/data-prepper"]
|
|
||||||
@ -1,18 +1,5 @@
|
|||||||
# hosts/roles/titan_jh/tasks/main.yaml
|
# hosts/roles/titan_jh/tasks/main.yaml
|
||||||
---
|
---
|
||||||
- name: Install node exporter
|
|
||||||
ansible.builtin.package:
|
|
||||||
name: prometheus-node-exporter
|
|
||||||
state: present
|
|
||||||
tags: ['jumphost', 'monitoring']
|
|
||||||
|
|
||||||
- name: Enable node exporter
|
|
||||||
ansible.builtin.service:
|
|
||||||
name: prometheus-node-exporter
|
|
||||||
enabled: true
|
|
||||||
state: started
|
|
||||||
tags: ['jumphost', 'monitoring']
|
|
||||||
|
|
||||||
- name: Placeholder for jumphost hardening
|
- name: Placeholder for jumphost hardening
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
|
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
|
||||||
|
|||||||
@ -1,20 +0,0 @@
|
|||||||
# infrastructure/metallb/ippool.yaml
|
|
||||||
apiVersion: metallb.io/v1beta1
|
|
||||||
kind: IPAddressPool
|
|
||||||
metadata:
|
|
||||||
name: communication-pool
|
|
||||||
namespace: metallb-system
|
|
||||||
spec:
|
|
||||||
addresses:
|
|
||||||
- 192.168.22.4-192.168.22.6
|
|
||||||
- 192.168.22.9-192.168.22.9
|
|
||||||
autoAssign: true
|
|
||||||
---
|
|
||||||
apiVersion: metallb.io/v1beta1
|
|
||||||
kind: L2Advertisement
|
|
||||||
metadata:
|
|
||||||
name: communication-adv
|
|
||||||
namespace: metallb-system
|
|
||||||
spec:
|
|
||||||
ipAddressPools:
|
|
||||||
- communication-pool
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
# infrastructure/metallb/kustomization.yaml
|
|
||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
resources:
|
|
||||||
- namespace.yaml
|
|
||||||
- metallb-rendered.yaml
|
|
||||||
- ippool.yaml
|
|
||||||
patchesStrategicMerge:
|
|
||||||
- patches/node-placement.yaml
|
|
||||||
- patches/speaker-loglevel.yaml
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +0,0 @@
|
|||||||
# infrastructure/metallb/namespace.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: metallb-system
|
|
||||||
@ -1,27 +0,0 @@
|
|||||||
# infrastructure/metallb/patches/node-placement.yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: metallb-controller
|
|
||||||
namespace: metallb-system
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: controller
|
|
||||||
args:
|
|
||||||
- --port=7472
|
|
||||||
- --log-level=info
|
|
||||||
- --webhook-mode=enabled
|
|
||||||
- --tls-min-version=VersionTLS12
|
|
||||||
- --lb-class=metallb
|
|
||||||
affinity:
|
|
||||||
nodeAffinity:
|
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
nodeSelectorTerms:
|
|
||||||
- matchExpressions:
|
|
||||||
- key: hardware
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- rpi4
|
|
||||||
- rpi5
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
# infrastructure/metallb/patches/speaker-loglevel.yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: DaemonSet
|
|
||||||
metadata:
|
|
||||||
name: metallb-speaker
|
|
||||||
namespace: metallb-system
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: speaker
|
|
||||||
args:
|
|
||||||
- --port=7472
|
|
||||||
- --log-level=info
|
|
||||||
- --lb-class=metallb
|
|
||||||
@ -2,7 +2,6 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
- ../components/device-plugin-config
|
|
||||||
- ../components/device-plugin-jetson
|
- ../components/device-plugin-jetson
|
||||||
- ../components/device-plugin-minipc
|
- ../components/device-plugin-minipc
|
||||||
- ../components/device-plugin-tethys
|
- ../components/device-plugin-tethys
|
||||||
|
|||||||
@ -1,15 +0,0 @@
|
|||||||
# infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: nvidia-device-plugin-config
|
|
||||||
namespace: kube-system
|
|
||||||
data:
|
|
||||||
config.yaml: |
|
|
||||||
version: v1
|
|
||||||
sharing:
|
|
||||||
timeSlicing:
|
|
||||||
renameByDefault: true
|
|
||||||
resources:
|
|
||||||
- name: nvidia.com/gpu
|
|
||||||
replicas: 4
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
# infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml
|
|
||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
resources:
|
|
||||||
- configmap.yaml
|
|
||||||
@ -30,8 +30,7 @@ spec:
|
|||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
args:
|
args:
|
||||||
- "--fail-on-init-error=false"
|
- "--fail-on-init-error=false"
|
||||||
- "--device-list-strategy=envvar"
|
- "--device-list-strategy=envvar,cdi"
|
||||||
- "--config-file=/config/config.yaml"
|
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
env:
|
env:
|
||||||
@ -42,12 +41,7 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
mountPath: /var/lib/kubelet/device-plugins
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
- name: config
|
|
||||||
mountPath: /config
|
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/lib/kubelet/device-plugins
|
path: /var/lib/kubelet/device-plugins
|
||||||
- name: config
|
|
||||||
configMap:
|
|
||||||
name: nvidia-device-plugin-config
|
|
||||||
|
|||||||
@ -32,7 +32,6 @@ spec:
|
|||||||
- "--fail-on-init-error=false"
|
- "--fail-on-init-error=false"
|
||||||
- "--device-list-strategy=envvar"
|
- "--device-list-strategy=envvar"
|
||||||
- "--mig-strategy=none"
|
- "--mig-strategy=none"
|
||||||
- "--config-file=/config/config.yaml"
|
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
env:
|
env:
|
||||||
@ -43,12 +42,7 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
mountPath: /var/lib/kubelet/device-plugins
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
- name: config
|
|
||||||
mountPath: /config
|
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/lib/kubelet/device-plugins
|
path: /var/lib/kubelet/device-plugins
|
||||||
- name: config
|
|
||||||
configMap:
|
|
||||||
name: nvidia-device-plugin-config
|
|
||||||
|
|||||||
@ -33,7 +33,6 @@ spec:
|
|||||||
- "--fail-on-init-error=false"
|
- "--fail-on-init-error=false"
|
||||||
- "--device-list-strategy=envvar"
|
- "--device-list-strategy=envvar"
|
||||||
- "--mig-strategy=none"
|
- "--mig-strategy=none"
|
||||||
- "--config-file=/config/config.yaml"
|
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
env:
|
env:
|
||||||
@ -44,12 +43,7 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
mountPath: /var/lib/kubelet/device-plugins
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
- name: config
|
|
||||||
mountPath: /config
|
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/lib/kubelet/device-plugins
|
path: /var/lib/kubelet/device-plugins
|
||||||
- name: config
|
|
||||||
configMap:
|
|
||||||
name: nvidia-device-plugin-config
|
|
||||||
|
|||||||
@ -2,5 +2,4 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
- ../components/device-plugin-config
|
|
||||||
- ../components/device-plugin-tethys
|
- ../components/device-plugin-tethys
|
||||||
|
|||||||
@ -1,9 +0,0 @@
|
|||||||
# infrastructure/sources/helm/fluent-bit.yaml
|
|
||||||
apiVersion: source.toolkit.fluxcd.io/v1
|
|
||||||
kind: HelmRepository
|
|
||||||
metadata:
|
|
||||||
name: fluent
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 1h
|
|
||||||
url: https://fluent.github.io/helm-charts
|
|
||||||
@ -2,15 +2,11 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
- fluent-bit.yaml
|
|
||||||
- grafana.yaml
|
- grafana.yaml
|
||||||
- hashicorp.yaml
|
- hashicorp.yaml
|
||||||
- jetstack.yaml
|
- jetstack.yaml
|
||||||
- jenkins.yaml
|
- jenkins.yaml
|
||||||
- mailu.yaml
|
- mailu.yaml
|
||||||
- opentelemetry.yaml
|
|
||||||
- opensearch.yaml
|
|
||||||
- harbor.yaml
|
- harbor.yaml
|
||||||
- prometheus.yaml
|
- prometheus.yaml
|
||||||
- victoria-metrics.yaml
|
- victoria-metrics.yaml
|
||||||
- secrets-store-csi.yaml
|
|
||||||
|
|||||||
@ -1,9 +0,0 @@
|
|||||||
# infrastructure/sources/helm/opensearch.yaml
|
|
||||||
apiVersion: source.toolkit.fluxcd.io/v1
|
|
||||||
kind: HelmRepository
|
|
||||||
metadata:
|
|
||||||
name: opensearch
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 1h
|
|
||||||
url: https://opensearch-project.github.io/helm-charts
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
# infrastructure/sources/helm/opentelemetry.yaml
|
|
||||||
apiVersion: source.toolkit.fluxcd.io/v1
|
|
||||||
kind: HelmRepository
|
|
||||||
metadata:
|
|
||||||
name: opentelemetry
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 1h
|
|
||||||
url: https://open-telemetry.github.io/opentelemetry-helm-charts
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
# infrastructure/sources/helm/secrets-store-csi.yaml
|
|
||||||
apiVersion: source.toolkit.fluxcd.io/v1
|
|
||||||
kind: HelmRepository
|
|
||||||
metadata:
|
|
||||||
name: secrets-store-csi-driver
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 1h
|
|
||||||
url: https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts
|
|
||||||
@ -71,10 +71,9 @@ rules:
|
|||||||
- tlsoptions
|
- tlsoptions
|
||||||
- tlsstores
|
- tlsstores
|
||||||
- serverstransports
|
- serverstransports
|
||||||
- serverstransporttcps
|
|
||||||
- traefikservices
|
- traefikservices
|
||||||
- middlewaretcps
|
|
||||||
verbs:
|
verbs:
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
|
|
||||||
|
|||||||
@ -10,4 +10,3 @@ resources:
|
|||||||
- clusterrole.yaml
|
- clusterrole.yaml
|
||||||
- clusterrolebinding.yaml
|
- clusterrolebinding.yaml
|
||||||
- service.yaml
|
- service.yaml
|
||||||
- traefik-service-lb.yaml
|
|
||||||
|
|||||||
@ -1,24 +0,0 @@
|
|||||||
# infrastructure/traefik/traefik-service-lb.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: traefik
|
|
||||||
namespace: kube-system
|
|
||||||
annotations:
|
|
||||||
metallb.universe.tf/address-pool: communication-pool
|
|
||||||
spec:
|
|
||||||
type: LoadBalancer
|
|
||||||
loadBalancerClass: metallb
|
|
||||||
loadBalancerIP: 192.168.22.9
|
|
||||||
ports:
|
|
||||||
- name: web
|
|
||||||
port: 80
|
|
||||||
targetPort: web
|
|
||||||
protocol: TCP
|
|
||||||
- name: websecure
|
|
||||||
port: 443
|
|
||||||
targetPort: websecure
|
|
||||||
protocol: TCP
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/instance: traefik-kube-system
|
|
||||||
app.kubernetes.io/name: traefik
|
|
||||||
@ -1,6 +0,0 @@
|
|||||||
# infrastructure/vault-csi/kustomization.yaml
|
|
||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
resources:
|
|
||||||
- secrets-store-csi-driver.yaml
|
|
||||||
- vault-csi-provider.yaml
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
# infrastructure/vault-csi/secrets-store-csi-driver.yaml
|
|
||||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
||||||
kind: HelmRelease
|
|
||||||
metadata:
|
|
||||||
name: secrets-store-csi-driver
|
|
||||||
namespace: kube-system
|
|
||||||
spec:
|
|
||||||
interval: 15m
|
|
||||||
chart:
|
|
||||||
spec:
|
|
||||||
chart: secrets-store-csi-driver
|
|
||||||
version: "~1.3.0"
|
|
||||||
sourceRef:
|
|
||||||
kind: HelmRepository
|
|
||||||
name: secrets-store-csi-driver
|
|
||||||
namespace: flux-system
|
|
||||||
values:
|
|
||||||
syncSecret:
|
|
||||||
enabled: true
|
|
||||||
enableSecretRotation: false
|
|
||||||
@ -1,111 +0,0 @@
|
|||||||
# infrastructure/vault-csi/vault-csi-provider.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: vault-csi-provider
|
|
||||||
namespace: kube-system
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: vault-csi-provider-clusterrole
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["serviceaccounts/token"]
|
|
||||||
verbs: ["create"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: vault-csi-provider-clusterrolebinding
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: vault-csi-provider-clusterrole
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: vault-csi-provider
|
|
||||||
namespace: kube-system
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: vault-csi-provider-role
|
|
||||||
namespace: kube-system
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["secrets"]
|
|
||||||
verbs: ["get"]
|
|
||||||
resourceNames: ["vault-csi-provider-hmac-key"]
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["secrets"]
|
|
||||||
verbs: ["create"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: vault-csi-provider-rolebinding
|
|
||||||
namespace: kube-system
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: vault-csi-provider-role
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: vault-csi-provider
|
|
||||||
namespace: kube-system
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: DaemonSet
|
|
||||||
metadata:
|
|
||||||
name: vault-csi-provider
|
|
||||||
namespace: kube-system
|
|
||||||
labels: { app.kubernetes.io/name: vault-csi-provider }
|
|
||||||
spec:
|
|
||||||
updateStrategy:
|
|
||||||
type: RollingUpdate
|
|
||||||
selector:
|
|
||||||
matchLabels: { app.kubernetes.io/name: vault-csi-provider }
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels: { app.kubernetes.io/name: vault-csi-provider }
|
|
||||||
spec:
|
|
||||||
serviceAccountName: vault-csi-provider
|
|
||||||
containers:
|
|
||||||
- name: provider-vault-installer
|
|
||||||
image: hashicorp/vault-csi-provider:1.7.0
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
args:
|
|
||||||
- -endpoint=/provider/vault.sock
|
|
||||||
- -log-level=info
|
|
||||||
resources:
|
|
||||||
requests: { cpu: 50m, memory: 100Mi }
|
|
||||||
limits: { cpu: 50m, memory: 100Mi }
|
|
||||||
volumeMounts:
|
|
||||||
- { name: providervol, mountPath: "/provider" }
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: "/health/ready"
|
|
||||||
port: 8080
|
|
||||||
scheme: "HTTP"
|
|
||||||
failureThreshold: 2
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
successThreshold: 1
|
|
||||||
timeoutSeconds: 3
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: "/health/ready"
|
|
||||||
port: 8080
|
|
||||||
scheme: "HTTP"
|
|
||||||
failureThreshold: 2
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
successThreshold: 1
|
|
||||||
timeoutSeconds: 3
|
|
||||||
volumes:
|
|
||||||
- name: providervol
|
|
||||||
hostPath:
|
|
||||||
path: "/var/run/secrets-store-csi-providers"
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
Atlas Knowledge Base (KB)
|
|
||||||
|
|
||||||
This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be:
|
|
||||||
- Accurate (grounded in GitOps + read-only cluster tools)
|
|
||||||
- Maintainable (small docs + deterministic generators)
|
|
||||||
- Safe (no secrets; refer to Secret/Vault paths by name only)
|
|
||||||
|
|
||||||
Layout
|
|
||||||
- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown).
|
|
||||||
- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON).
|
|
||||||
- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog.
|
|
||||||
|
|
||||||
Regeneration
|
|
||||||
- Update manifests/docs, then regenerate generated artifacts:
|
|
||||||
- `python scripts/knowledge_render_atlas.py --write`
|
|
||||||
|
|
||||||
Authoring rules
|
|
||||||
- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`.
|
|
||||||
- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths.
|
|
||||||
- Keep each runbook small; one topic per file; use headings.
|
|
||||||
- When in doubt, link to the exact file path in this repo that configures the behavior.
|
|
||||||
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
"counts": {
|
|
||||||
"helmrelease_host_hints": 7,
|
|
||||||
"http_endpoints": 35,
|
|
||||||
"services": 44,
|
|
||||||
"workloads": 49
|
|
||||||
}
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,89 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"path": "runbooks/ci-gitea-jenkins.md",
|
|
||||||
"title": "CI: Gitea \u2192 Jenkins pipeline",
|
|
||||||
"tags": [
|
|
||||||
"atlas",
|
|
||||||
"ci",
|
|
||||||
"gitea",
|
|
||||||
"jenkins"
|
|
||||||
],
|
|
||||||
"entrypoints": [
|
|
||||||
"scm.bstein.dev",
|
|
||||||
"ci.bstein.dev"
|
|
||||||
],
|
|
||||||
"source_paths": [
|
|
||||||
"services/gitea",
|
|
||||||
"services/jenkins",
|
|
||||||
"scripts/jenkins_cred_sync.sh",
|
|
||||||
"scripts/gitea_cred_sync.sh"
|
|
||||||
],
|
|
||||||
"body": "# CI: Gitea \u2192 Jenkins pipeline\n\n## What this is\nAtlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).\n\n## Where it is configured\n- Gitea manifests: `services/gitea/`\n- Jenkins manifests: `services/jenkins/`\n- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`\n\n## What users do (typical flow)\n- Create a repo in Gitea.\n- Create/update a Jenkins job/pipeline that can fetch the repo.\n- Configure a webhook (or SCM polling) so pushes trigger builds.\n\n## Troubleshooting (common)\n- \u201cWebhook not firing\u201d: confirm ingress host, webhook URL, and Jenkins job is reachable.\n- \u201cAuth denied cloning\u201d: confirm Keycloak group membership and that Jenkins has a valid token/credential configured."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"path": "runbooks/comms-verify.md",
|
|
||||||
"title": "Othrys verification checklist",
|
|
||||||
"tags": [
|
|
||||||
"comms",
|
|
||||||
"matrix",
|
|
||||||
"element",
|
|
||||||
"livekit"
|
|
||||||
],
|
|
||||||
"entrypoints": [
|
|
||||||
"https://live.bstein.dev",
|
|
||||||
"https://matrix.live.bstein.dev"
|
|
||||||
],
|
|
||||||
"source_paths": [],
|
|
||||||
"body": "1) Guest join:\n- Open a private window and visit:\n `https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join`\n- Confirm the guest join flow works and the displayname becomes `<word>-<word>`.\n\n2) Keycloak login:\n- Log in from `https://live.bstein.dev` and confirm MAS -> Keycloak -> Element redirect.\n\n3) Video rooms:\n- Start an Element Call room and confirm audio/video with a second account.\n- Check that guests can read public rooms but cannot start calls.\n\n4) Well-known:\n- `https://live.bstein.dev/.well-known/matrix/client` returns JSON.\n- `https://matrix.live.bstein.dev/.well-known/matrix/client` returns JSON.\n\n5) TURN reachability:\n- Confirm `turn.live.bstein.dev:3478` and `turns:5349` are reachable from WAN."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"path": "runbooks/kb-authoring.md",
|
|
||||||
"title": "KB authoring: what to write (and what not to)",
|
|
||||||
"tags": [
|
|
||||||
"atlas",
|
|
||||||
"kb",
|
|
||||||
"runbooks"
|
|
||||||
],
|
|
||||||
"entrypoints": [],
|
|
||||||
"source_paths": [
|
|
||||||
"knowledge/runbooks",
|
|
||||||
"scripts/knowledge_render_atlas.py"
|
|
||||||
],
|
|
||||||
"body": "# KB authoring: what to write (and what not to)\n\n## The goal\nGive Atlas assistants enough grounded, Atlas-specific context to answer \u201chow do I\u2026?\u201d questions without guessing.\n\n## What to capture (high value)\n- User workflows: \u201cclick here, set X, expected result\u201d\n- Operator workflows: \u201cedit these files, reconcile this kustomization, verify with these commands\u201d\n- Wiring: \u201cthis host routes to this service; this service depends on Postgres/Vault/etc\u201d\n- Failure modes: exact error messages + the 2\u20135 checks that usually resolve them\n- Permissions: Keycloak groups/roles and what they unlock\n\n## What to avoid (low value / fluff)\n- Generic Kubernetes explanations (link to upstream docs instead)\n- Copy-pasting large manifests (prefer file paths + small snippets)\n- Anything that will drift quickly (render it from GitOps instead)\n- Any secret values (reference Secret/Vault locations by name only)\n\n## Document pattern (recommended)\nEach runbook should answer:\n- \u201cWhat is this?\u201d\n- \u201cWhat do users do?\u201d\n- \u201cWhat do operators change (where in Git)?\u201d\n- \u201cHow do we verify it works?\u201d\n- \u201cWhat breaks and how to debug it?\u201d"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"path": "runbooks/observability.md",
|
|
||||||
"title": "Observability: Grafana + VictoriaMetrics (how to query safely)",
|
|
||||||
"tags": [
|
|
||||||
"atlas",
|
|
||||||
"monitoring",
|
|
||||||
"grafana",
|
|
||||||
"victoriametrics"
|
|
||||||
],
|
|
||||||
"entrypoints": [
|
|
||||||
"metrics.bstein.dev",
|
|
||||||
"alerts.bstein.dev"
|
|
||||||
],
|
|
||||||
"source_paths": [
|
|
||||||
"services/monitoring"
|
|
||||||
],
|
|
||||||
"body": "# Observability: Grafana + VictoriaMetrics (how to query safely)\n\n## Where it is configured\n- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values)\n- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL)\n\n## Using metrics as a \u201ctool\u201d for Atlas assistants\nThe safest pattern is: map a small set of intents \u2192 fixed PromQL queries, then summarize results.\n\nExamples (intents)\n- \u201cIs the cluster healthy?\u201d \u2192 node readiness + pod restart rate\n- \u201cWhy is Element Call failing?\u201d \u2192 LiveKit/coturn pod restarts + synapse errors + ingress 5xx\n- \u201cIs Jenkins slow?\u201d \u2192 pod CPU/memory + HTTP latency metrics (if exported)\n\n## Why dashboards are not the KB\nDashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the\nKB focused on wiring, runbooks, and stable conventions."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"path": "runbooks/template.md",
|
|
||||||
"title": "<short title>",
|
|
||||||
"tags": [
|
|
||||||
"atlas",
|
|
||||||
"<service>",
|
|
||||||
"<topic>"
|
|
||||||
],
|
|
||||||
"entrypoints": [
|
|
||||||
"<hostnames if relevant>"
|
|
||||||
],
|
|
||||||
"source_paths": [
|
|
||||||
"services/<svc>",
|
|
||||||
"clusters/atlas/<...>"
|
|
||||||
],
|
|
||||||
"body": "# <Short title>\n\n## What this is\n\n## For users (how to)\n\n## For operators (where configured)\n\n## Troubleshooting (symptoms \u2192 checks)"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@ -1,189 +0,0 @@
|
|||||||
flowchart LR
|
|
||||||
host_auth_bstein_dev["auth.bstein.dev"]
|
|
||||||
svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"]
|
|
||||||
host_auth_bstein_dev --> svc_sso_oauth2_proxy
|
|
||||||
wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"]
|
|
||||||
svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy
|
|
||||||
host_bstein_dev["bstein.dev"]
|
|
||||||
svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"]
|
|
||||||
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend
|
|
||||||
wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"]
|
|
||||||
svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend
|
|
||||||
svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"]
|
|
||||||
host_bstein_dev --> svc_comms_matrix_wellknown
|
|
||||||
wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"]
|
|
||||||
svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown
|
|
||||||
svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"]
|
|
||||||
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend
|
|
||||||
wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"]
|
|
||||||
svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend
|
|
||||||
host_call_live_bstein_dev["call.live.bstein.dev"]
|
|
||||||
svc_comms_element_call["comms/element-call (Service)"]
|
|
||||||
host_call_live_bstein_dev --> svc_comms_element_call
|
|
||||||
wl_comms_element_call["comms/element-call (Deployment)"]
|
|
||||||
svc_comms_element_call --> wl_comms_element_call
|
|
||||||
host_chat_ai_bstein_dev["chat.ai.bstein.dev"]
|
|
||||||
svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"]
|
|
||||||
host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway
|
|
||||||
wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"]
|
|
||||||
svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway
|
|
||||||
host_ci_bstein_dev["ci.bstein.dev"]
|
|
||||||
svc_jenkins_jenkins["jenkins/jenkins (Service)"]
|
|
||||||
host_ci_bstein_dev --> svc_jenkins_jenkins
|
|
||||||
wl_jenkins_jenkins["jenkins/jenkins (Deployment)"]
|
|
||||||
svc_jenkins_jenkins --> wl_jenkins_jenkins
|
|
||||||
host_cloud_bstein_dev["cloud.bstein.dev"]
|
|
||||||
svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"]
|
|
||||||
host_cloud_bstein_dev --> svc_nextcloud_nextcloud
|
|
||||||
wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"]
|
|
||||||
svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud
|
|
||||||
host_kit_live_bstein_dev["kit.live.bstein.dev"]
|
|
||||||
svc_comms_livekit_token_service["comms/livekit-token-service (Service)"]
|
|
||||||
host_kit_live_bstein_dev --> svc_comms_livekit_token_service
|
|
||||||
wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"]
|
|
||||||
svc_comms_livekit_token_service --> wl_comms_livekit_token_service
|
|
||||||
svc_comms_livekit["comms/livekit (Service)"]
|
|
||||||
host_kit_live_bstein_dev --> svc_comms_livekit
|
|
||||||
wl_comms_livekit["comms/livekit (Deployment)"]
|
|
||||||
svc_comms_livekit --> wl_comms_livekit
|
|
||||||
host_live_bstein_dev["live.bstein.dev"]
|
|
||||||
svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"]
|
|
||||||
host_live_bstein_dev --> svc_comms_othrys_element_element_web
|
|
||||||
wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"]
|
|
||||||
svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web
|
|
||||||
host_live_bstein_dev --> svc_comms_matrix_wellknown
|
|
||||||
svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
|
|
||||||
host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
|
||||||
wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"]
|
|
||||||
svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse
|
|
||||||
host_longhorn_bstein_dev["longhorn.bstein.dev"]
|
|
||||||
svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
|
|
||||||
host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
|
|
||||||
wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"]
|
|
||||||
svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn
|
|
||||||
host_mail_bstein_dev["mail.bstein.dev"]
|
|
||||||
svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"]
|
|
||||||
host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front
|
|
||||||
host_matrix_live_bstein_dev["matrix.live.bstein.dev"]
|
|
||||||
svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
|
|
||||||
host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service
|
|
||||||
wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
|
|
||||||
svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
|
|
||||||
host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown
|
|
||||||
host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
|
||||||
svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
|
|
||||||
host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register
|
|
||||||
wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
|
|
||||||
svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
|
|
||||||
host_monero_bstein_dev["monero.bstein.dev"]
|
|
||||||
svc_crypto_monerod["crypto/monerod (Service)"]
|
|
||||||
host_monero_bstein_dev --> svc_crypto_monerod
|
|
||||||
wl_crypto_monerod["crypto/monerod (Deployment)"]
|
|
||||||
svc_crypto_monerod --> wl_crypto_monerod
|
|
||||||
host_office_bstein_dev["office.bstein.dev"]
|
|
||||||
svc_nextcloud_collabora["nextcloud/collabora (Service)"]
|
|
||||||
host_office_bstein_dev --> svc_nextcloud_collabora
|
|
||||||
wl_nextcloud_collabora["nextcloud/collabora (Deployment)"]
|
|
||||||
svc_nextcloud_collabora --> wl_nextcloud_collabora
|
|
||||||
host_pegasus_bstein_dev["pegasus.bstein.dev"]
|
|
||||||
svc_jellyfin_pegasus["jellyfin/pegasus (Service)"]
|
|
||||||
host_pegasus_bstein_dev --> svc_jellyfin_pegasus
|
|
||||||
wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"]
|
|
||||||
svc_jellyfin_pegasus --> wl_jellyfin_pegasus
|
|
||||||
host_scm_bstein_dev["scm.bstein.dev"]
|
|
||||||
svc_gitea_gitea["gitea/gitea (Service)"]
|
|
||||||
host_scm_bstein_dev --> svc_gitea_gitea
|
|
||||||
wl_gitea_gitea["gitea/gitea (Deployment)"]
|
|
||||||
svc_gitea_gitea --> wl_gitea_gitea
|
|
||||||
host_secret_bstein_dev["secret.bstein.dev"]
|
|
||||||
svc_vault_vault["vault/vault (Service)"]
|
|
||||||
host_secret_bstein_dev --> svc_vault_vault
|
|
||||||
wl_vault_vault["vault/vault (StatefulSet)"]
|
|
||||||
svc_vault_vault --> wl_vault_vault
|
|
||||||
host_sso_bstein_dev["sso.bstein.dev"]
|
|
||||||
svc_sso_keycloak["sso/keycloak (Service)"]
|
|
||||||
host_sso_bstein_dev --> svc_sso_keycloak
|
|
||||||
wl_sso_keycloak["sso/keycloak (Deployment)"]
|
|
||||||
svc_sso_keycloak --> wl_sso_keycloak
|
|
||||||
host_stream_bstein_dev["stream.bstein.dev"]
|
|
||||||
svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"]
|
|
||||||
host_stream_bstein_dev --> svc_jellyfin_jellyfin
|
|
||||||
wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
|
|
||||||
svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
|
|
||||||
host_vault_bstein_dev["vault.bstein.dev"]
|
|
||||||
svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
|
|
||||||
host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
|
|
||||||
wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"]
|
|
||||||
svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden
|
|
||||||
|
|
||||||
subgraph bstein_dev_home[bstein-dev-home]
|
|
||||||
svc_bstein_dev_home_bstein_dev_home_frontend
|
|
||||||
wl_bstein_dev_home_bstein_dev_home_frontend
|
|
||||||
svc_bstein_dev_home_bstein_dev_home_backend
|
|
||||||
wl_bstein_dev_home_bstein_dev_home_backend
|
|
||||||
svc_bstein_dev_home_chat_ai_gateway
|
|
||||||
wl_bstein_dev_home_chat_ai_gateway
|
|
||||||
end
|
|
||||||
subgraph comms[comms]
|
|
||||||
svc_comms_matrix_wellknown
|
|
||||||
wl_comms_matrix_wellknown
|
|
||||||
svc_comms_element_call
|
|
||||||
wl_comms_element_call
|
|
||||||
svc_comms_livekit_token_service
|
|
||||||
wl_comms_livekit_token_service
|
|
||||||
svc_comms_livekit
|
|
||||||
wl_comms_livekit
|
|
||||||
svc_comms_othrys_element_element_web
|
|
||||||
wl_comms_othrys_element_element_web
|
|
||||||
svc_comms_othrys_synapse_matrix_synapse
|
|
||||||
wl_comms_othrys_synapse_matrix_synapse
|
|
||||||
svc_comms_matrix_authentication_service
|
|
||||||
wl_comms_matrix_authentication_service
|
|
||||||
svc_comms_matrix_guest_register
|
|
||||||
wl_comms_matrix_guest_register
|
|
||||||
end
|
|
||||||
subgraph crypto[crypto]
|
|
||||||
svc_crypto_monerod
|
|
||||||
wl_crypto_monerod
|
|
||||||
end
|
|
||||||
subgraph gitea[gitea]
|
|
||||||
svc_gitea_gitea
|
|
||||||
wl_gitea_gitea
|
|
||||||
end
|
|
||||||
subgraph jellyfin[jellyfin]
|
|
||||||
svc_jellyfin_pegasus
|
|
||||||
wl_jellyfin_pegasus
|
|
||||||
svc_jellyfin_jellyfin
|
|
||||||
wl_jellyfin_jellyfin
|
|
||||||
end
|
|
||||||
subgraph jenkins[jenkins]
|
|
||||||
svc_jenkins_jenkins
|
|
||||||
wl_jenkins_jenkins
|
|
||||||
end
|
|
||||||
subgraph longhorn_system[longhorn-system]
|
|
||||||
svc_longhorn_system_oauth2_proxy_longhorn
|
|
||||||
wl_longhorn_system_oauth2_proxy_longhorn
|
|
||||||
end
|
|
||||||
subgraph mailu_mailserver[mailu-mailserver]
|
|
||||||
svc_mailu_mailserver_mailu_front
|
|
||||||
end
|
|
||||||
subgraph nextcloud[nextcloud]
|
|
||||||
svc_nextcloud_nextcloud
|
|
||||||
wl_nextcloud_nextcloud
|
|
||||||
svc_nextcloud_collabora
|
|
||||||
wl_nextcloud_collabora
|
|
||||||
end
|
|
||||||
subgraph sso[sso]
|
|
||||||
svc_sso_oauth2_proxy
|
|
||||||
wl_sso_oauth2_proxy
|
|
||||||
svc_sso_keycloak
|
|
||||||
wl_sso_keycloak
|
|
||||||
end
|
|
||||||
subgraph vault[vault]
|
|
||||||
svc_vault_vault
|
|
||||||
wl_vault_vault
|
|
||||||
end
|
|
||||||
subgraph vaultwarden[vaultwarden]
|
|
||||||
svc_vaultwarden_vaultwarden_service
|
|
||||||
wl_vaultwarden_vaultwarden
|
|
||||||
end
|
|
||||||
@ -1,26 +0,0 @@
|
|||||||
# Metis (node recovery)
|
|
||||||
|
|
||||||
## Node classes (current map)
|
|
||||||
- rpi5 Ubuntu workers: titan-04,05,06,07,08,09,10,11,20,21 (Ubuntu 24.04.3, k3s agent)
|
|
||||||
- rpi5 control-plane: titan-0a/0b/0c (Ubuntu 24.04.1, k3s server, control-plane taint)
|
|
||||||
- rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks)
|
|
||||||
- rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent)
|
|
||||||
- amd64 agents: titan-22/24 (Debian 13, k3s agent)
|
|
||||||
- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21 (when added), plus any newcomers.
|
|
||||||
|
|
||||||
## Longhorn disk UUIDs (critical nodes)
|
|
||||||
- titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4)
|
|
||||||
- titan-15: /mnt/astreae UUID=f3362f14-5822-449f-944b-ac570b5cd615 (ext4); /mnt/asteria UUID=9c5316e6-f847-4884-b502-11f2d0d15d6f (ext4)
|
|
||||||
- titan-17: /mnt/astreae UUID=1fecdade-08b0-49cb-9ae3-be6c188b0a96 (ext4); /mnt/asteria UUID=2fe9f613-d372-47ca-b84f-82084e4edda0 (ext4)
|
|
||||||
- titan-19: /mnt/astreae UUID=4890abb9-dda2-4f4f-9c0f-081ee82849cf (ext4); /mnt/asteria UUID=2b4ea28d-b0e6-4fa3-841b-cd7067ae9153 (ext4)
|
|
||||||
|
|
||||||
## Metis repo (~/Development/metis)
|
|
||||||
- CLI skeleton in Go (`cmd/metis`), inventory loader (`pkg/inventory`), plan builder (`pkg/plan`).
|
|
||||||
- `inventory.example.yaml` shows expected schema (classes + per-node overlay, Longhorn disks, labels, taints).
|
|
||||||
- `AGENTS.md` in repo is untracked and holds raw notes.
|
|
||||||
|
|
||||||
## Next implementation steps
|
|
||||||
- Add per-class golden image refs and checksums (Harbor or file://) when ready.
|
|
||||||
- Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths.
|
|
||||||
- Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection.
|
|
||||||
- Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited.
|
|
||||||
@ -1,27 +0,0 @@
|
|||||||
---
|
|
||||||
title: "CI: Gitea → Jenkins pipeline"
|
|
||||||
tags: ["atlas", "ci", "gitea", "jenkins"]
|
|
||||||
owners: ["brad"]
|
|
||||||
entrypoints: ["scm.bstein.dev", "ci.bstein.dev"]
|
|
||||||
source_paths: ["services/gitea", "services/jenkins", "scripts/jenkins_cred_sync.sh", "scripts/gitea_cred_sync.sh"]
|
|
||||||
---
|
|
||||||
|
|
||||||
# CI: Gitea → Jenkins pipeline
|
|
||||||
|
|
||||||
## What this is
|
|
||||||
Atlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).
|
|
||||||
|
|
||||||
## Where it is configured
|
|
||||||
- Gitea manifests: `services/gitea/`
|
|
||||||
- Jenkins manifests: `services/jenkins/`
|
|
||||||
- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`
|
|
||||||
|
|
||||||
## What users do (typical flow)
|
|
||||||
- Create a repo in Gitea.
|
|
||||||
- Create/update a Jenkins job/pipeline that can fetch the repo.
|
|
||||||
- Configure a webhook (or SCM polling) so pushes trigger builds.
|
|
||||||
|
|
||||||
## Troubleshooting (common)
|
|
||||||
- “Webhook not firing”: confirm ingress host, webhook URL, and Jenkins job is reachable.
|
|
||||||
- “Auth denied cloning”: confirm Keycloak group membership and that Jenkins has a valid token/credential configured.
|
|
||||||
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
---
|
|
||||||
title: Othrys verification checklist
|
|
||||||
tags:
|
|
||||||
- comms
|
|
||||||
- matrix
|
|
||||||
- element
|
|
||||||
- livekit
|
|
||||||
entrypoints:
|
|
||||||
- https://live.bstein.dev
|
|
||||||
- https://matrix.live.bstein.dev
|
|
||||||
---
|
|
||||||
|
|
||||||
1) Guest join:
|
|
||||||
- Open a private window and visit:
|
|
||||||
`https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join`
|
|
||||||
- Confirm the guest join flow works and the displayname becomes `<word>-<word>`.
|
|
||||||
|
|
||||||
2) Keycloak login:
|
|
||||||
- Log in from `https://live.bstein.dev` and confirm MAS -> Keycloak -> Element redirect.
|
|
||||||
|
|
||||||
3) Video rooms:
|
|
||||||
- Start an Element Call room and confirm audio/video with a second account.
|
|
||||||
- Check that guests can read public rooms but cannot start calls.
|
|
||||||
|
|
||||||
4) Well-known:
|
|
||||||
- `https://live.bstein.dev/.well-known/matrix/client` returns JSON.
|
|
||||||
- `https://matrix.live.bstein.dev/.well-known/matrix/client` returns JSON.
|
|
||||||
|
|
||||||
5) TURN reachability:
|
|
||||||
- Confirm `turn.live.bstein.dev:3478` and `turns:5349` are reachable from WAN.
|
|
||||||
@ -1,34 +0,0 @@
|
|||||||
---
|
|
||||||
title: "KB authoring: what to write (and what not to)"
|
|
||||||
tags: ["atlas", "kb", "runbooks"]
|
|
||||||
owners: ["brad"]
|
|
||||||
entrypoints: []
|
|
||||||
source_paths: ["knowledge/runbooks", "scripts/knowledge_render_atlas.py"]
|
|
||||||
---
|
|
||||||
|
|
||||||
# KB authoring: what to write (and what not to)
|
|
||||||
|
|
||||||
## The goal
|
|
||||||
Give Atlas assistants enough grounded, Atlas-specific context to answer “how do I…?” questions without guessing.
|
|
||||||
|
|
||||||
## What to capture (high value)
|
|
||||||
- User workflows: “click here, set X, expected result”
|
|
||||||
- Operator workflows: “edit these files, reconcile this kustomization, verify with these commands”
|
|
||||||
- Wiring: “this host routes to this service; this service depends on Postgres/Vault/etc”
|
|
||||||
- Failure modes: exact error messages + the 2–5 checks that usually resolve them
|
|
||||||
- Permissions: Keycloak groups/roles and what they unlock
|
|
||||||
|
|
||||||
## What to avoid (low value / fluff)
|
|
||||||
- Generic Kubernetes explanations (link to upstream docs instead)
|
|
||||||
- Copy-pasting large manifests (prefer file paths + small snippets)
|
|
||||||
- Anything that will drift quickly (render it from GitOps instead)
|
|
||||||
- Any secret values (reference Secret/Vault locations by name only)
|
|
||||||
|
|
||||||
## Document pattern (recommended)
|
|
||||||
Each runbook should answer:
|
|
||||||
- “What is this?”
|
|
||||||
- “What do users do?”
|
|
||||||
- “What do operators change (where in Git)?”
|
|
||||||
- “How do we verify it works?”
|
|
||||||
- “What breaks and how to debug it?”
|
|
||||||
|
|
||||||
@ -1,26 +0,0 @@
|
|||||||
---
|
|
||||||
title: "Observability: Grafana + VictoriaMetrics (how to query safely)"
|
|
||||||
tags: ["atlas", "monitoring", "grafana", "victoriametrics"]
|
|
||||||
owners: ["brad"]
|
|
||||||
entrypoints: ["metrics.bstein.dev", "alerts.bstein.dev"]
|
|
||||||
source_paths: ["services/monitoring"]
|
|
||||||
---
|
|
||||||
|
|
||||||
# Observability: Grafana + VictoriaMetrics (how to query safely)
|
|
||||||
|
|
||||||
## Where it is configured
|
|
||||||
- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values)
|
|
||||||
- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL)
|
|
||||||
|
|
||||||
## Using metrics as a “tool” for Atlas assistants
|
|
||||||
The safest pattern is: map a small set of intents → fixed PromQL queries, then summarize results.
|
|
||||||
|
|
||||||
Examples (intents)
|
|
||||||
- “Is the cluster healthy?” → node readiness + pod restart rate
|
|
||||||
- “Why is Element Call failing?” → LiveKit/coturn pod restarts + synapse errors + ingress 5xx
|
|
||||||
- “Is Jenkins slow?” → pod CPU/memory + HTTP latency metrics (if exported)
|
|
||||||
|
|
||||||
## Why dashboards are not the KB
|
|
||||||
Dashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the
|
|
||||||
KB focused on wiring, runbooks, and stable conventions.
|
|
||||||
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
---
|
|
||||||
title: "<short title>"
|
|
||||||
tags: ["atlas", "<service>", "<topic>"]
|
|
||||||
owners: ["brad"]
|
|
||||||
entrypoints: ["<hostnames if relevant>"]
|
|
||||||
source_paths: ["services/<svc>", "clusters/atlas/<...>"]
|
|
||||||
---
|
|
||||||
|
|
||||||
# <Short title>
|
|
||||||
|
|
||||||
## What this is
|
|
||||||
|
|
||||||
## For users (how to)
|
|
||||||
|
|
||||||
## For operators (where configured)
|
|
||||||
|
|
||||||
## Troubleshooting (symptoms → checks)
|
|
||||||
|
|
||||||
@ -1,73 +0,0 @@
|
|||||||
# Metis (node recovery)
|
|
||||||
|
|
||||||
## Node classes (current map)
|
|
||||||
- rpi5 Ubuntu workers: titan-04,05,06,07,08,09,10,11,20,21 (Ubuntu 24.04.3, k3s agent)
|
|
||||||
- rpi5 control-plane: titan-0a/0b/0c (Ubuntu 24.04.1, k3s server, control-plane taint)
|
|
||||||
- rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks)
|
|
||||||
- rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent)
|
|
||||||
- amd64 agents: titan-22/24 (Debian 13, k3s agent)
|
|
||||||
- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, plus any newcomers.
|
|
||||||
|
|
||||||
### Jetson nodes (titan-20/21)
|
|
||||||
- Ubuntu 20.04.6 (Focal), kernel 5.10.104-tegra, CRI containerd 2.0.5-k3s2, arch arm64.
|
|
||||||
- Storage: NVMe 232G at / (ext4); onboard mmc partitions present but root on NVMe; 1.9T sda present (unused).
|
|
||||||
- k3s agent with drop-in 99-nofile.conf.
|
|
||||||
|
|
||||||
## Longhorn disk UUIDs (critical nodes)
|
|
||||||
- titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4)
|
|
||||||
- titan-15: /mnt/astreae UUID=f3362f14-5822-449f-944b-ac570b5cd615 (ext4); /mnt/asteria UUID=9c5316e6-f847-4884-b502-11f2d0d15d6f (ext4)
|
|
||||||
- titan-17: /mnt/astreae UUID=1fecdade-08b0-49cb-9ae3-be6c188b0a96 (ext4); /mnt/asteria UUID=2fe9f613-d372-47ca-b84f-82084e4edda0 (ext4)
|
|
||||||
- titan-19: /mnt/astreae UUID=4890abb9-dda2-4f4f-9c0f-081ee82849cf (ext4); /mnt/asteria UUID=2b4ea28d-b0e6-4fa3-841b-cd7067ae9153 (ext4)
|
|
||||||
|
|
||||||
## Metis repo (~/Development/metis)
|
|
||||||
- CLI skeleton in Go (`cmd/metis`), inventory loader (`pkg/inventory`), plan builder (`pkg/plan`).
|
|
||||||
- `inventory.example.yaml` shows expected schema (classes + per-node overlay, Longhorn disks, labels, taints).
|
|
||||||
- `AGENTS.md` in repo is untracked and holds raw notes.
|
|
||||||
|
|
||||||
## Next implementation steps
|
|
||||||
- Add per-class golden image refs and checksums (Harbor or file://) when ready.
|
|
||||||
- Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths.
|
|
||||||
- Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection.
|
|
||||||
- Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited.
|
|
||||||
|
|
||||||
## Node OS/Kernel/CRI snapshot (Jan 2026)
|
|
||||||
- titan-04: Ubuntu 24.04.3 LTS, kernel 6.8.0-1031-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-05: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-06: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-07: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-08: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-09: Ubuntu 24.04.3 LTS, kernel 6.8.0-1031-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-0a: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-0b: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-0c: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-10: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-11: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-12: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-13: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-14: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-15: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-17: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-18: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-19: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
|
||||||
- titan-20: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-21: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64
|
|
||||||
- titan-22: Debian 13 (trixie), kernel 6.12.41+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64
|
|
||||||
- titan-24: Debian 13 (trixie), kernel 6.12.57+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64
|
|
||||||
|
|
||||||
|
|
||||||
### External hosts
|
|
||||||
- titan-db: Ubuntu 24.10, kernel 6.11.0-1015-raspi, root on /dev/sda2 ext4 (465G), boot vfat /dev/sda1; PostgreSQL service enabled.
|
|
||||||
- titan-jh: Arch Linux ARM (rolling), kernel 6.18.4-2-rpi, NVMe root ext4 238G (/), boot vfat 512M; ~495 packages installed (pacman -Q).
|
|
||||||
- titan-23/oceanus: TODO audit (future).
|
|
||||||
|
|
||||||
|
|
||||||
### Control plane Pis (titan-0a/0b/0c)
|
|
||||||
- Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, containerd 2.0.5-k3s2.
|
|
||||||
- Storage: 477G SSD root (/dev/sda2 ext4), /boot/firmware vfat (/dev/sda1). fstab uses LABEL=writable and LABEL=system-boot.
|
|
||||||
- k3s server (control-plane taint expected); etcd snapshots not yet cataloged (TODO).
|
|
||||||
|
|
||||||
|
|
||||||
## k3s versions
|
|
||||||
- rpi5 workers/control-plane: k3s v1.33.3+k3s1 (crictl v1.31.0-k3s2)
|
|
||||||
- rpi4 nodes: k3s v1.31.5+k3s1 (crictl v1.31.0-k3s2)
|
|
||||||
- Jetson titan-20/21: k3s v1.33.3+k3s1 (per node info), crictl v1.31.0-k3s2
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
python scripts/knowledge_render_atlas.py --write
|
|
||||||
python scripts/knowledge_render_atlas.py --write --out services/comms/knowledge
|
|
||||||
@ -9,7 +9,6 @@ Usage:
|
|||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import textwrap
|
import textwrap
|
||||||
import urllib.parse
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -46,14 +45,12 @@ PERCENT_THRESHOLDS = {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_CPU_WINDOW = "1m"
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Cluster metadata
|
# Cluster metadata
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"]
|
CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"]
|
||||||
CONTROL_DEPENDENCIES = ["titan-db", "titan-jh"]
|
CONTROL_DEPENDENCIES = ["titan-db"]
|
||||||
CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES
|
CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES
|
||||||
WORKER_NODES = [
|
WORKER_NODES = [
|
||||||
"titan-04",
|
"titan-04",
|
||||||
@ -64,12 +61,11 @@ WORKER_NODES = [
|
|||||||
"titan-09",
|
"titan-09",
|
||||||
"titan-10",
|
"titan-10",
|
||||||
"titan-11",
|
"titan-11",
|
||||||
"titan-20",
|
|
||||||
"titan-21",
|
|
||||||
"titan-12",
|
"titan-12",
|
||||||
"titan-13",
|
"titan-13",
|
||||||
"titan-14",
|
"titan-14",
|
||||||
"titan-15",
|
"titan-15",
|
||||||
|
"titan-16",
|
||||||
"titan-17",
|
"titan-17",
|
||||||
"titan-18",
|
"titan-18",
|
||||||
"titan-19",
|
"titan-19",
|
||||||
@ -84,22 +80,7 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
|
|||||||
WORKER_TOTAL = len(WORKER_NODES)
|
WORKER_TOTAL = len(WORKER_NODES)
|
||||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||||
# Namespaces considered infrastructure (excluded from workload counts)
|
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system"
|
||||||
INFRA_NAMESPACES = [
|
|
||||||
"kube-system",
|
|
||||||
"longhorn-system",
|
|
||||||
"metallb-system",
|
|
||||||
"monitoring",
|
|
||||||
"logging",
|
|
||||||
"cert-manager",
|
|
||||||
"flux-system",
|
|
||||||
"traefik",
|
|
||||||
"maintenance",
|
|
||||||
"postgres",
|
|
||||||
]
|
|
||||||
INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$"
|
|
||||||
# Namespaces allowed on control plane without counting as workloads
|
|
||||||
CP_ALLOWED_NS = INFRA_REGEX
|
|
||||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||||
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
|
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
|
||||||
CONTROL_WORKLOADS_EXPR = (
|
CONTROL_WORKLOADS_EXPR = (
|
||||||
@ -189,48 +170,22 @@ def node_io_expr(scope=""):
|
|||||||
return scoped_node_expr(base, scope)
|
return scoped_node_expr(base, scope)
|
||||||
|
|
||||||
|
|
||||||
def namespace_selector(scope_var):
|
|
||||||
return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}'
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_gpu_selector(scope_var):
|
|
||||||
return f'namespace!="",pod!="",{scope_var}'
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_cpu_raw(scope_var):
|
|
||||||
return (
|
|
||||||
"sum(rate(container_cpu_usage_seconds_total"
|
|
||||||
f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_ram_raw(scope_var):
|
|
||||||
return f"sum(container_memory_working_set_bytes{{{namespace_selector(scope_var)}}}) by (namespace)"
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_gpu_usage_instant(scope_var):
|
|
||||||
return f"sum(DCGM_FI_DEV_GPU_UTIL{{{namespace_gpu_selector(scope_var)}}}) by (namespace)"
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_share_expr(resource_expr):
|
def namespace_share_expr(resource_expr):
|
||||||
total = f"clamp_min(sum( {resource_expr} ), 1)"
|
selected = f"( {resource_expr} ) and on(namespace) ( {NAMESPACE_TOP_FILTER} )"
|
||||||
return f"100 * ( {resource_expr} ) / {total}"
|
total = f"clamp_min(sum( {selected} ), 1)"
|
||||||
|
return f"100 * ( {selected} ) / {total}"
|
||||||
|
|
||||||
|
|
||||||
def namespace_cpu_share_expr(scope_var):
|
def namespace_cpu_share_expr():
|
||||||
return namespace_share_expr(namespace_cpu_raw(scope_var))
|
return namespace_share_expr(NAMESPACE_CPU_RAW)
|
||||||
|
|
||||||
|
|
||||||
def namespace_ram_share_expr(scope_var):
|
def namespace_ram_share_expr():
|
||||||
return namespace_share_expr(namespace_ram_raw(scope_var))
|
return namespace_share_expr(NAMESPACE_RAM_RAW)
|
||||||
|
|
||||||
|
|
||||||
def namespace_gpu_share_expr(scope_var):
|
def namespace_gpu_share_expr():
|
||||||
usage = namespace_gpu_usage_instant(scope_var)
|
return namespace_share_expr(NAMESPACE_GPU_RAW)
|
||||||
total = f"(sum({usage}) or on() vector(0))"
|
|
||||||
share = f"100 * ({usage}) / clamp_min({total}, 1)"
|
|
||||||
idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)"
|
|
||||||
return f"({share}) or ({idle})"
|
|
||||||
|
|
||||||
|
|
||||||
PROBLEM_PODS_EXPR = (
|
PROBLEM_PODS_EXPR = (
|
||||||
@ -315,12 +270,46 @@ STUCK_TABLE_EXPR = (
|
|||||||
")"
|
")"
|
||||||
)
|
)
|
||||||
|
|
||||||
NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
|
NAMESPACE_CPU_RAW = (
|
||||||
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
|
'sum(rate(container_cpu_usage_seconds_total{namespace!="",pod!="",container!=""}[5m])) by (namespace)'
|
||||||
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
|
)
|
||||||
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
|
NAMESPACE_RAM_RAW = (
|
||||||
|
'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)'
|
||||||
|
)
|
||||||
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
||||||
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
||||||
|
NAMESPACE_GPU_ALLOC = (
|
||||||
|
'sum((kube_pod_container_resource_requests{namespace!="",resource="nvidia.com/gpu"}'
|
||||||
|
' or kube_pod_container_resource_limits{namespace!="",resource="nvidia.com/gpu"})) by (namespace)'
|
||||||
|
)
|
||||||
|
NAMESPACE_GPU_USAGE_SHARE = (
|
||||||
|
'sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))'
|
||||||
|
)
|
||||||
|
NAMESPACE_GPU_USAGE_INSTANT = 'sum(DCGM_FI_DEV_GPU_UTIL{namespace!="",pod!=""}) by (namespace)'
|
||||||
|
NAMESPACE_GPU_RAW = (
|
||||||
|
"("
|
||||||
|
+ NAMESPACE_GPU_USAGE_SHARE
|
||||||
|
+ ") or on(namespace) ("
|
||||||
|
+ NAMESPACE_CPU_RAW
|
||||||
|
+ " * 0)"
|
||||||
|
)
|
||||||
|
NAMESPACE_GPU_WEIGHT = (
|
||||||
|
"("
|
||||||
|
+ NAMESPACE_GPU_ALLOC
|
||||||
|
+ ") or on(namespace) ("
|
||||||
|
+ NAMESPACE_CPU_RAW
|
||||||
|
+ " * 0)"
|
||||||
|
)
|
||||||
|
NAMESPACE_ACTIVITY_SCORE = (
|
||||||
|
"( "
|
||||||
|
+ NAMESPACE_CPU_RAW
|
||||||
|
+ " ) + ("
|
||||||
|
+ NAMESPACE_RAM_RAW
|
||||||
|
+ " / 1e9) + ("
|
||||||
|
+ NAMESPACE_GPU_WEIGHT
|
||||||
|
+ " * 100)"
|
||||||
|
)
|
||||||
|
NAMESPACE_TOP_FILTER = "(topk(10, " + NAMESPACE_ACTIVITY_SCORE + ") >= bool 0)"
|
||||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||||
TRAEFIK_NET_INGRESS = (
|
TRAEFIK_NET_INGRESS = (
|
||||||
'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
|
'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
|
||||||
@ -571,9 +560,9 @@ def table_panel(
|
|||||||
return panel
|
return panel
|
||||||
|
|
||||||
|
|
||||||
def pie_panel(panel_id, title, expr, grid, *, links=None, description=None):
|
def pie_panel(panel_id, title, expr, grid):
|
||||||
"""Return a pie chart panel with readable namespace labels."""
|
"""Return a pie chart panel with readable namespace labels."""
|
||||||
panel = {
|
return {
|
||||||
"id": panel_id,
|
"id": panel_id,
|
||||||
"type": "piechart",
|
"type": "piechart",
|
||||||
"title": title,
|
"title": title,
|
||||||
@ -597,71 +586,6 @@ def pie_panel(panel_id, title, expr, grid, *, links=None, description=None):
|
|||||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if links:
|
|
||||||
panel["links"] = links
|
|
||||||
if description:
|
|
||||||
panel["description"] = description
|
|
||||||
return panel
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_scope_variable(var_name, label):
|
|
||||||
options = [
|
|
||||||
{
|
|
||||||
"text": "workload namespaces only",
|
|
||||||
"value": NAMESPACE_SCOPE_WORKLOAD,
|
|
||||||
"selected": True,
|
|
||||||
},
|
|
||||||
{"text": "all namespaces", "value": NAMESPACE_SCOPE_ALL, "selected": False},
|
|
||||||
{
|
|
||||||
"text": "infrastructure namespaces only",
|
|
||||||
"value": NAMESPACE_SCOPE_INFRA,
|
|
||||||
"selected": False,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
query = (
|
|
||||||
"workload namespaces only : "
|
|
||||||
+ NAMESPACE_SCOPE_WORKLOAD
|
|
||||||
+ ",all namespaces : "
|
|
||||||
+ NAMESPACE_SCOPE_ALL
|
|
||||||
+ ",infrastructure namespaces only : "
|
|
||||||
+ NAMESPACE_SCOPE_INFRA
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
"name": var_name,
|
|
||||||
"label": label,
|
|
||||||
"type": "custom",
|
|
||||||
"query": query,
|
|
||||||
"current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True},
|
|
||||||
"options": options,
|
|
||||||
"hide": 2,
|
|
||||||
"multi": False,
|
|
||||||
"includeAll": False,
|
|
||||||
"refresh": 1,
|
|
||||||
"sort": 0,
|
|
||||||
"skipUrlSync": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_scope_links(var_name):
|
|
||||||
def with_value(value):
|
|
||||||
encoded = urllib.parse.quote(value, safe="")
|
|
||||||
params = []
|
|
||||||
for other in NAMESPACE_SCOPE_VARS:
|
|
||||||
if other == var_name:
|
|
||||||
params.append(f"var-{other}={encoded}")
|
|
||||||
else:
|
|
||||||
params.append(f"var-{other}=${{{other}}}")
|
|
||||||
return "?" + "&".join(params)
|
|
||||||
|
|
||||||
return [
|
|
||||||
{"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False},
|
|
||||||
{"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False},
|
|
||||||
{
|
|
||||||
"title": "Infrastructure namespaces only",
|
|
||||||
"url": with_value(NAMESPACE_SCOPE_INFRA),
|
|
||||||
"targetBlank": False,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def bargauge_panel(
|
def bargauge_panel(
|
||||||
@ -933,115 +857,6 @@ def build_overview():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
mail_bounce_rate_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "green", "value": None},
|
|
||||||
{"color": "yellow", "value": 5},
|
|
||||||
{"color": "orange", "value": 8},
|
|
||||||
{"color": "red", "value": 10},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
mail_limit_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "green", "value": None},
|
|
||||||
{"color": "yellow", "value": 70},
|
|
||||||
{"color": "orange", "value": 85},
|
|
||||||
{"color": "red", "value": 95},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
mail_success_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "red", "value": None},
|
|
||||||
{"color": "orange", "value": 90},
|
|
||||||
{"color": "yellow", "value": 95},
|
|
||||||
{"color": "green", "value": 98},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
30,
|
|
||||||
"Mail Sent (1d)",
|
|
||||||
'max(postmark_outbound_sent{window="1d"})',
|
|
||||||
{"h": 2, "w": 6, "x": 0, "y": 8},
|
|
||||||
unit="none",
|
|
||||||
links=link_to("atlas-mail"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
{
|
|
||||||
"id": 31,
|
|
||||||
"type": "stat",
|
|
||||||
"title": "Mail Bounces (1d)",
|
|
||||||
"datasource": PROM_DS,
|
|
||||||
"gridPos": {"h": 2, "w": 6, "x": 12, "y": 8},
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
|
||||||
"refId": "A",
|
|
||||||
"legendFormat": "Rate",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"expr": 'max(postmark_outbound_bounced{window="1d"})',
|
|
||||||
"refId": "B",
|
|
||||||
"legendFormat": "Count",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"color": {"mode": "thresholds"},
|
|
||||||
"custom": {"displayMode": "auto"},
|
|
||||||
"thresholds": mail_bounce_rate_thresholds,
|
|
||||||
"unit": "none",
|
|
||||||
},
|
|
||||||
"overrides": [
|
|
||||||
{
|
|
||||||
"matcher": {"id": "byName", "options": "Rate"},
|
|
||||||
"properties": [{"id": "unit", "value": "percent"}],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"matcher": {"id": "byName", "options": "Count"},
|
|
||||||
"properties": [{"id": "unit", "value": "none"}],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"options": {
|
|
||||||
"colorMode": "value",
|
|
||||||
"graphMode": "area",
|
|
||||||
"justifyMode": "center",
|
|
||||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
|
||||||
"textMode": "name_and_value",
|
|
||||||
},
|
|
||||||
"links": link_to("atlas-mail"),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
32,
|
|
||||||
"Mail Success Rate (1d)",
|
|
||||||
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
|
||||||
{"h": 2, "w": 6, "x": 6, "y": 8},
|
|
||||||
unit="percent",
|
|
||||||
thresholds=mail_success_thresholds,
|
|
||||||
decimals=1,
|
|
||||||
links=link_to("atlas-mail"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
33,
|
|
||||||
"Mail Limit Used (30d)",
|
|
||||||
"max(postmark_sending_limit_used_percent)",
|
|
||||||
{"h": 2, "w": 6, "x": 18, "y": 8},
|
|
||||||
unit="percent",
|
|
||||||
thresholds=mail_limit_thresholds,
|
|
||||||
decimals=1,
|
|
||||||
links=link_to("atlas-mail"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
storage_panels = [
|
storage_panels = [
|
||||||
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
||||||
(24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"),
|
(24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"),
|
||||||
@ -1061,38 +876,28 @@ def build_overview():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
cpu_scope = "$namespace_scope_cpu"
|
|
||||||
gpu_scope = "$namespace_scope_gpu"
|
|
||||||
ram_scope = "$namespace_scope_ram"
|
|
||||||
|
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
11,
|
11,
|
||||||
"Namespace CPU Share",
|
"Namespace CPU Share",
|
||||||
namespace_cpu_share_expr(cpu_scope),
|
namespace_cpu_share_expr(),
|
||||||
{"h": 9, "w": 8, "x": 0, "y": 16},
|
{"h": 9, "w": 8, "x": 0, "y": 16},
|
||||||
links=namespace_scope_links("namespace_scope_cpu"),
|
|
||||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
12,
|
12,
|
||||||
"Namespace GPU Share",
|
"Namespace GPU Share",
|
||||||
namespace_gpu_share_expr(gpu_scope),
|
namespace_gpu_share_expr(),
|
||||||
{"h": 9, "w": 8, "x": 8, "y": 16},
|
{"h": 9, "w": 8, "x": 8, "y": 16},
|
||||||
links=namespace_scope_links("namespace_scope_gpu"),
|
|
||||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
13,
|
13,
|
||||||
"Namespace RAM Share",
|
"Namespace RAM Share",
|
||||||
namespace_ram_share_expr(ram_scope),
|
namespace_ram_share_expr(),
|
||||||
{"h": 9, "w": 8, "x": 16, "y": 16},
|
{"h": 9, "w": 8, "x": 16, "y": 16},
|
||||||
links=namespace_scope_links("namespace_scope_ram"),
|
|
||||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1247,6 +1052,7 @@ def build_overview():
|
|||||||
links=link_to("atlas-storage"),
|
links=link_to("atlas-storage"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"uid": "atlas-overview",
|
"uid": "atlas-overview",
|
||||||
"title": "Atlas Overview",
|
"title": "Atlas Overview",
|
||||||
@ -1257,13 +1063,7 @@ def build_overview():
|
|||||||
"schemaVersion": 39,
|
"schemaVersion": 39,
|
||||||
"style": "dark",
|
"style": "dark",
|
||||||
"tags": ["atlas", "overview"],
|
"tags": ["atlas", "overview"],
|
||||||
"templating": {
|
"templating": {"list": []},
|
||||||
"list": [
|
|
||||||
namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
|
|
||||||
namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
|
|
||||||
namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"time": {"from": "now-1h", "to": "now"},
|
"time": {"from": "now-1h", "to": "now"},
|
||||||
"refresh": "1m",
|
"refresh": "1m",
|
||||||
"links": [],
|
"links": [],
|
||||||
@ -1713,33 +1513,6 @@ def build_storage_dashboard():
|
|||||||
time_from="90d",
|
time_from="90d",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
30,
|
|
||||||
"Maintenance Sweepers Ready",
|
|
||||||
'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100',
|
|
||||||
{"h": 4, "w": 12, "x": 0, "y": 44},
|
|
||||||
unit="percent",
|
|
||||||
thresholds=PERCENT_THRESHOLDS,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
31,
|
|
||||||
"Maintenance Cron Freshness (s)",
|
|
||||||
'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})',
|
|
||||||
{"h": 4, "w": 12, "x": 12, "y": 44},
|
|
||||||
unit="s",
|
|
||||||
thresholds={
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "green", "value": None},
|
|
||||||
{"color": "yellow", "value": 3600},
|
|
||||||
{"color": "red", "value": 10800},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return {
|
return {
|
||||||
"uid": "atlas-storage",
|
"uid": "atlas-storage",
|
||||||
"title": "Atlas Storage",
|
"title": "Atlas Storage",
|
||||||
@ -1929,231 +1702,21 @@ def build_network_dashboard():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_mail_dashboard():
|
|
||||||
panels = []
|
|
||||||
|
|
||||||
bounce_rate_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "green", "value": None},
|
|
||||||
{"color": "yellow", "value": 5},
|
|
||||||
{"color": "orange", "value": 8},
|
|
||||||
{"color": "red", "value": 10},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
limit_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "green", "value": None},
|
|
||||||
{"color": "yellow", "value": 70},
|
|
||||||
{"color": "orange", "value": 85},
|
|
||||||
{"color": "red", "value": 95},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
success_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "red", "value": None},
|
|
||||||
{"color": "orange", "value": 90},
|
|
||||||
{"color": "yellow", "value": 95},
|
|
||||||
{"color": "green", "value": 98},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
1,
|
|
||||||
"Sent (1d)",
|
|
||||||
'max(postmark_outbound_sent{window="1d"})',
|
|
||||||
{"h": 4, "w": 6, "x": 0, "y": 0},
|
|
||||||
decimals=0,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
2,
|
|
||||||
"Sent (7d)",
|
|
||||||
'max(postmark_outbound_sent{window="7d"})',
|
|
||||||
{"h": 4, "w": 6, "x": 6, "y": 0},
|
|
||||||
decimals=0,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"type": "stat",
|
|
||||||
"title": "Mail Bounces (1d)",
|
|
||||||
"datasource": PROM_DS,
|
|
||||||
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
|
||||||
"refId": "A",
|
|
||||||
"legendFormat": "Rate",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"expr": 'max(postmark_outbound_bounced{window="1d"})',
|
|
||||||
"refId": "B",
|
|
||||||
"legendFormat": "Count",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"color": {"mode": "thresholds"},
|
|
||||||
"custom": {"displayMode": "auto"},
|
|
||||||
"thresholds": bounce_rate_thresholds,
|
|
||||||
"unit": "none",
|
|
||||||
},
|
|
||||||
"overrides": [
|
|
||||||
{
|
|
||||||
"matcher": {"id": "byName", "options": "Rate"},
|
|
||||||
"properties": [{"id": "unit", "value": "percent"}],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"matcher": {"id": "byName", "options": "Count"},
|
|
||||||
"properties": [{"id": "unit", "value": "none"}],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"options": {
|
|
||||||
"colorMode": "value",
|
|
||||||
"graphMode": "area",
|
|
||||||
"justifyMode": "center",
|
|
||||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
|
||||||
"textMode": "name_and_value",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
4,
|
|
||||||
"Success Rate (1d)",
|
|
||||||
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
|
||||||
{"h": 4, "w": 6, "x": 18, "y": 0},
|
|
||||||
unit="percent",
|
|
||||||
thresholds=success_thresholds,
|
|
||||||
decimals=1,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
5,
|
|
||||||
"Limit Used (30d)",
|
|
||||||
"max(postmark_sending_limit_used_percent)",
|
|
||||||
{"h": 4, "w": 6, "x": 0, "y": 4},
|
|
||||||
thresholds=limit_thresholds,
|
|
||||||
unit="percent",
|
|
||||||
decimals=1,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
6,
|
|
||||||
"Send Limit (30d)",
|
|
||||||
"max(postmark_sending_limit)",
|
|
||||||
{"h": 4, "w": 6, "x": 6, "y": 4},
|
|
||||||
decimals=0,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
7,
|
|
||||||
"Last Success",
|
|
||||||
"max(postmark_last_success_timestamp_seconds)",
|
|
||||||
{"h": 4, "w": 6, "x": 12, "y": 4},
|
|
||||||
unit="dateTimeAsIso",
|
|
||||||
decimals=0,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
stat_panel(
|
|
||||||
8,
|
|
||||||
"Exporter Errors",
|
|
||||||
"sum(postmark_request_errors_total)",
|
|
||||||
{"h": 4, "w": 6, "x": 18, "y": 4},
|
|
||||||
decimals=0,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
panels.append(
|
|
||||||
timeseries_panel(
|
|
||||||
13,
|
|
||||||
"Bounce Rate (1d vs 7d)",
|
|
||||||
"max by (window) (postmark_outbound_bounce_rate)",
|
|
||||||
{"h": 8, "w": 12, "x": 0, "y": 12},
|
|
||||||
unit="percent",
|
|
||||||
legend="{{window}}",
|
|
||||||
legend_display="table",
|
|
||||||
legend_placement="right",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
timeseries_panel(
|
|
||||||
14,
|
|
||||||
"Bounced (1d vs 7d)",
|
|
||||||
"max by (window) (postmark_outbound_bounced)",
|
|
||||||
{"h": 8, "w": 12, "x": 12, "y": 12},
|
|
||||||
unit="none",
|
|
||||||
legend="{{window}}",
|
|
||||||
legend_display="table",
|
|
||||||
legend_placement="right",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
timeseries_panel(
|
|
||||||
15,
|
|
||||||
"Sent (1d vs 7d)",
|
|
||||||
"max by (window) (postmark_outbound_sent)",
|
|
||||||
{"h": 8, "w": 12, "x": 0, "y": 20},
|
|
||||||
unit="none",
|
|
||||||
legend="{{window}}",
|
|
||||||
legend_display="table",
|
|
||||||
legend_placement="right",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
timeseries_panel(
|
|
||||||
16,
|
|
||||||
"Exporter Errors",
|
|
||||||
"sum(postmark_request_errors_total)",
|
|
||||||
{"h": 8, "w": 12, "x": 12, "y": 20},
|
|
||||||
unit="none",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"uid": "atlas-mail",
|
|
||||||
"title": "Atlas Mail",
|
|
||||||
"folderUid": PRIVATE_FOLDER,
|
|
||||||
"editable": True,
|
|
||||||
"panels": panels,
|
|
||||||
"time": {"from": "now-30d", "to": "now"},
|
|
||||||
"annotations": {"list": []},
|
|
||||||
"schemaVersion": 39,
|
|
||||||
"style": "dark",
|
|
||||||
"tags": ["atlas", "mail"],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def build_gpu_dashboard():
|
def build_gpu_dashboard():
|
||||||
panels = []
|
panels = []
|
||||||
gpu_scope = "$namespace_scope_gpu"
|
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
1,
|
1,
|
||||||
"Namespace GPU Share",
|
"Namespace GPU Share",
|
||||||
namespace_gpu_share_expr(gpu_scope),
|
namespace_gpu_share_expr(),
|
||||||
{"h": 8, "w": 12, "x": 0, "y": 0},
|
{"h": 8, "w": 12, "x": 0, "y": 0},
|
||||||
links=namespace_scope_links("namespace_scope_gpu"),
|
|
||||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
timeseries_panel(
|
timeseries_panel(
|
||||||
2,
|
2,
|
||||||
"GPU Util by Namespace",
|
"GPU Util by Namespace",
|
||||||
namespace_gpu_usage_instant(gpu_scope),
|
NAMESPACE_GPU_USAGE_INSTANT,
|
||||||
{"h": 8, "w": 12, "x": 12, "y": 0},
|
{"h": 8, "w": 12, "x": 12, "y": 0},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
legend="{{namespace}}",
|
legend="{{namespace}}",
|
||||||
@ -2194,13 +1757,6 @@ def build_gpu_dashboard():
|
|||||||
"schemaVersion": 39,
|
"schemaVersion": 39,
|
||||||
"style": "dark",
|
"style": "dark",
|
||||||
"tags": ["atlas", "gpu"],
|
"tags": ["atlas", "gpu"],
|
||||||
"templating": {
|
|
||||||
"list": [
|
|
||||||
namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
|
|
||||||
namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
|
|
||||||
namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
|
|
||||||
]
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2225,10 +1781,6 @@ DASHBOARDS = {
|
|||||||
"builder": build_network_dashboard,
|
"builder": build_network_dashboard,
|
||||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml",
|
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml",
|
||||||
},
|
},
|
||||||
"atlas-mail": {
|
|
||||||
"builder": build_mail_dashboard,
|
|
||||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
|
|
||||||
},
|
|
||||||
"atlas-gpu": {
|
"atlas-gpu": {
|
||||||
"builder": build_gpu_dashboard,
|
"builder": build_gpu_dashboard,
|
||||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
|
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
|
||||||
|
|||||||
@ -1,445 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Generate OpenSearch Dashboards saved objects and render them into ConfigMaps.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
scripts/dashboards_render_logs.py --build # rebuild NDJSON + ConfigMap
|
|
||||||
scripts/dashboards_render_logs.py # re-render ConfigMap from NDJSON
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import textwrap
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
ROOT = Path(__file__).resolve().parents[1]
|
|
||||||
DASHBOARD_DIR = ROOT / "services" / "logging" / "dashboards"
|
|
||||||
NDJSON_PATH = DASHBOARD_DIR / "logs.ndjson"
|
|
||||||
CONFIG_PATH = ROOT / "services" / "logging" / "opensearch-dashboards-objects.yaml"
|
|
||||||
|
|
||||||
CONFIG_TEMPLATE = textwrap.dedent(
|
|
||||||
"""# {relative_path}
|
|
||||||
# Generated by scripts/dashboards_render_logs.py --build
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: opensearch-dashboards-objects
|
|
||||||
namespace: logging
|
|
||||||
data:
|
|
||||||
objects.ndjson: |
|
|
||||||
{payload}
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
DASHBOARD_VERSION = "7.10.0"
|
|
||||||
GRID_COLUMNS = 48
|
|
||||||
H_CHART = 10
|
|
||||||
H_ERRORS = 8
|
|
||||||
H_TABLE = 16
|
|
||||||
H_SEARCH = 18
|
|
||||||
TABLE_SIZE = 15
|
|
||||||
TABLE_PER_PAGE = 15
|
|
||||||
|
|
||||||
ERROR_TERMS = ("*error*", "*exception*", "*fail*")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class AppSpec:
|
|
||||||
slug: str
|
|
||||||
title: str
|
|
||||||
query: str
|
|
||||||
index_id: str = "kube-logs"
|
|
||||||
kind: str = "kube"
|
|
||||||
|
|
||||||
|
|
||||||
def error_query(base: str | None = None) -> str:
|
|
||||||
parts = [f'(log : "{term}" or message : "{term}")' for term in ERROR_TERMS]
|
|
||||||
expr = " or ".join(parts)
|
|
||||||
if base:
|
|
||||||
return f"({base}) and ({expr})"
|
|
||||||
return f"({expr})"
|
|
||||||
|
|
||||||
|
|
||||||
def json_line(obj: dict) -> str:
|
|
||||||
return json.dumps(obj, separators=(",", ":"))
|
|
||||||
|
|
||||||
|
|
||||||
def search_source(query: str) -> dict:
|
|
||||||
return {
|
|
||||||
"query": {"language": "kuery", "query": query},
|
|
||||||
"filter": [],
|
|
||||||
"indexRefName": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def index_pattern(object_id: str, title: str, time_field: str = "@timestamp") -> dict:
|
|
||||||
return {
|
|
||||||
"type": "index-pattern",
|
|
||||||
"id": object_id,
|
|
||||||
"attributes": {"title": title, "timeFieldName": time_field},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def histogram_vis(object_id: str, title: str, query: str, index_id: str) -> dict:
|
|
||||||
vis_state = {
|
|
||||||
"title": title,
|
|
||||||
"type": "histogram",
|
|
||||||
"aggs": [
|
|
||||||
{"id": "1", "enabled": True, "type": "count", "schema": "metric"},
|
|
||||||
{
|
|
||||||
"id": "2",
|
|
||||||
"enabled": True,
|
|
||||||
"type": "date_histogram",
|
|
||||||
"schema": "segment",
|
|
||||||
"params": {"field": "@timestamp", "interval": "auto", "min_doc_count": 1},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"params": {"addTooltip": True, "addLegend": False, "scale": "linear", "interpolate": "linear"},
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
"type": "visualization",
|
|
||||||
"id": object_id,
|
|
||||||
"attributes": {
|
|
||||||
"title": title,
|
|
||||||
"visState": json.dumps(vis_state, separators=(",", ":")),
|
|
||||||
"uiStateJSON": "{}",
|
|
||||||
"description": "",
|
|
||||||
"version": 1,
|
|
||||||
"kibanaSavedObjectMeta": {
|
|
||||||
"searchSourceJSON": json.dumps(search_source(query), separators=(",", ":"))
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"references": [
|
|
||||||
{
|
|
||||||
"name": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
|
||||||
"type": "index-pattern",
|
|
||||||
"id": index_id,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def table_vis(object_id: str, title: str, field: str, query: str, index_id: str) -> dict:
|
|
||||||
vis_state = {
|
|
||||||
"title": title,
|
|
||||||
"type": "table",
|
|
||||||
"aggs": [
|
|
||||||
{"id": "1", "enabled": True, "type": "count", "schema": "metric"},
|
|
||||||
{
|
|
||||||
"id": "2",
|
|
||||||
"enabled": True,
|
|
||||||
"type": "terms",
|
|
||||||
"schema": "bucket",
|
|
||||||
"params": {"field": field, "size": TABLE_SIZE, "order": "desc", "orderBy": "1"},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"params": {
|
|
||||||
"perPage": TABLE_PER_PAGE,
|
|
||||||
"showPartialRows": False,
|
|
||||||
"showMetricsAtAllLevels": False,
|
|
||||||
"sort": {"columnIndex": 1, "direction": "desc"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
"type": "visualization",
|
|
||||||
"id": object_id,
|
|
||||||
"attributes": {
|
|
||||||
"title": title,
|
|
||||||
"visState": json.dumps(vis_state, separators=(",", ":")),
|
|
||||||
"uiStateJSON": "{}",
|
|
||||||
"description": "",
|
|
||||||
"version": 1,
|
|
||||||
"kibanaSavedObjectMeta": {
|
|
||||||
"searchSourceJSON": json.dumps(search_source(query), separators=(",", ":"))
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"references": [
|
|
||||||
{
|
|
||||||
"name": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
|
||||||
"type": "index-pattern",
|
|
||||||
"id": index_id,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def search_object(object_id: str, title: str, columns: list[str], query: str, index_id: str) -> dict:
|
|
||||||
return {
|
|
||||||
"type": "search",
|
|
||||||
"id": object_id,
|
|
||||||
"attributes": {
|
|
||||||
"title": title,
|
|
||||||
"description": "",
|
|
||||||
"columns": columns,
|
|
||||||
"sort": [["@timestamp", "desc"]],
|
|
||||||
"kibanaSavedObjectMeta": {
|
|
||||||
"searchSourceJSON": json.dumps(search_source(query), separators=(",", ":"))
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"references": [
|
|
||||||
{
|
|
||||||
"name": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
|
||||||
"type": "index-pattern",
|
|
||||||
"id": index_id,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def grid(x: int, y: int, w: int, h: int, i: int) -> dict:
|
|
||||||
return {"x": x, "y": y, "w": w, "h": h, "i": str(i)}
|
|
||||||
|
|
||||||
|
|
||||||
def panel(panel_id: str, panel_type: str, grid_data: dict, index: int) -> dict:
|
|
||||||
return {
|
|
||||||
"panelIndex": str(index),
|
|
||||||
"gridData": grid_data,
|
|
||||||
"id": panel_id,
|
|
||||||
"type": panel_type,
|
|
||||||
"version": DASHBOARD_VERSION,
|
|
||||||
"embeddableConfig": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def full_width_panels(specs: list[tuple[str, str, int]]) -> list[dict]:
|
|
||||||
panels = []
|
|
||||||
y = 0
|
|
||||||
for index, (panel_id, panel_type, height) in enumerate(specs, start=1):
|
|
||||||
panels.append(panel(panel_id, panel_type, grid(0, y, GRID_COLUMNS, height, index), index))
|
|
||||||
y += height
|
|
||||||
return panels
|
|
||||||
|
|
||||||
|
|
||||||
def dashboard_object(object_id: str, title: str, panels: list[dict]) -> dict:
|
|
||||||
return {
|
|
||||||
"type": "dashboard",
|
|
||||||
"id": object_id,
|
|
||||||
"attributes": {
|
|
||||||
"title": title,
|
|
||||||
"description": "",
|
|
||||||
"hits": 0,
|
|
||||||
"panelsJSON": json.dumps(panels, separators=(",", ":")),
|
|
||||||
"optionsJSON": json.dumps({"useMargins": True, "hidePanelTitles": False}, separators=(",", ":")),
|
|
||||||
"version": 1,
|
|
||||||
"timeRestore": False,
|
|
||||||
"kibanaSavedObjectMeta": {
|
|
||||||
"searchSourceJSON": json.dumps({"query": {"language": "kuery", "query": ""}, "filter": []})
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def app_dashboard_objects(app: AppSpec) -> list[dict]:
|
|
||||||
prefix = f"logs-{app.slug}"
|
|
||||||
objects = []
|
|
||||||
|
|
||||||
if app.kind == "journald":
|
|
||||||
columns = ["@timestamp", "_HOSTNAME", "_SYSTEMD_UNIT", "MESSAGE"]
|
|
||||||
objects.append(histogram_vis(f"{prefix}-volume", f"{app.title} logs", app.query, app.index_id))
|
|
||||||
objects.append(histogram_vis(f"{prefix}-errors", f"{app.title} errors", error_query(app.query), app.index_id))
|
|
||||||
objects.append(table_vis(f"{prefix}-top-units", "Top units", "_SYSTEMD_UNIT.keyword", app.query, app.index_id))
|
|
||||||
objects.append(search_object(f"{prefix}-recent", "Recent logs", columns, app.query, app.index_id))
|
|
||||||
objects.append(
|
|
||||||
search_object(
|
|
||||||
f"{prefix}-recent-errors",
|
|
||||||
"Recent errors",
|
|
||||||
columns,
|
|
||||||
error_query(app.query),
|
|
||||||
app.index_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels = full_width_panels(
|
|
||||||
[
|
|
||||||
(f"{prefix}-volume", "visualization", H_CHART),
|
|
||||||
(f"{prefix}-errors", "visualization", H_ERRORS),
|
|
||||||
(f"{prefix}-top-units", "visualization", H_TABLE),
|
|
||||||
(f"{prefix}-recent", "search", H_SEARCH),
|
|
||||||
(f"{prefix}-recent-errors", "search", H_SEARCH),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
objects.append(dashboard_object(prefix, f"{app.title} Logs", panels))
|
|
||||||
return objects
|
|
||||||
|
|
||||||
columns = ["@timestamp", "kubernetes.pod_name", "kubernetes.container_name", "log", "message"]
|
|
||||||
objects.append(histogram_vis(f"{prefix}-volume", f"{app.title} logs", app.query, app.index_id))
|
|
||||||
objects.append(histogram_vis(f"{prefix}-errors", f"{app.title} errors", error_query(app.query), app.index_id))
|
|
||||||
objects.append(table_vis(f"{prefix}-top-pods", "Top pods", "kubernetes.pod_name.keyword", app.query, app.index_id))
|
|
||||||
objects.append(
|
|
||||||
table_vis(f"{prefix}-top-containers", "Top containers", "kubernetes.container_name.keyword", app.query, app.index_id)
|
|
||||||
)
|
|
||||||
objects.append(search_object(f"{prefix}-recent", "Recent logs", columns, app.query, app.index_id))
|
|
||||||
objects.append(
|
|
||||||
search_object(
|
|
||||||
f"{prefix}-recent-errors",
|
|
||||||
"Recent errors",
|
|
||||||
columns,
|
|
||||||
error_query(app.query),
|
|
||||||
app.index_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels = full_width_panels(
|
|
||||||
[
|
|
||||||
(f"{prefix}-volume", "visualization", H_CHART),
|
|
||||||
(f"{prefix}-errors", "visualization", H_ERRORS),
|
|
||||||
(f"{prefix}-top-pods", "visualization", H_TABLE),
|
|
||||||
(f"{prefix}-top-containers", "visualization", H_TABLE),
|
|
||||||
(f"{prefix}-recent", "search", H_SEARCH),
|
|
||||||
(f"{prefix}-recent-errors", "search", H_SEARCH),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
objects.append(dashboard_object(prefix, f"{app.title} Logs", panels))
|
|
||||||
return objects
|
|
||||||
|
|
||||||
|
|
||||||
def overview_objects() -> list[dict]:
|
|
||||||
objects = []
|
|
||||||
objects.append(histogram_vis("logs-overview-volume", "Logs per minute", "*", "kube-logs"))
|
|
||||||
objects.append(histogram_vis("logs-overview-errors", "Errors per minute", error_query(), "kube-logs"))
|
|
||||||
objects.append(
|
|
||||||
table_vis(
|
|
||||||
"logs-overview-top-ns",
|
|
||||||
"Top namespaces",
|
|
||||||
"kubernetes.namespace_name.keyword",
|
|
||||||
"*",
|
|
||||||
"kube-logs",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
objects.append(
|
|
||||||
table_vis(
|
|
||||||
"logs-overview-top-error-ns",
|
|
||||||
"Top error namespaces",
|
|
||||||
"kubernetes.namespace_name.keyword",
|
|
||||||
error_query(),
|
|
||||||
"kube-logs",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
objects.append(table_vis("logs-overview-top-pods", "Top pods", "kubernetes.pod_name.keyword", "*", "kube-logs"))
|
|
||||||
objects.append(
|
|
||||||
table_vis(
|
|
||||||
"logs-overview-top-nodes",
|
|
||||||
"Top nodes",
|
|
||||||
"kubernetes.node_name.keyword",
|
|
||||||
"*",
|
|
||||||
"kube-logs",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
objects.append(
|
|
||||||
search_object(
|
|
||||||
"logs-overview-recent-errors",
|
|
||||||
"Recent errors",
|
|
||||||
["@timestamp", "kubernetes.namespace_name", "kubernetes.pod_name", "log", "message"],
|
|
||||||
error_query(),
|
|
||||||
"kube-logs",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels = full_width_panels(
|
|
||||||
[
|
|
||||||
("logs-overview-volume", "visualization", H_CHART),
|
|
||||||
("logs-overview-errors", "visualization", H_ERRORS),
|
|
||||||
("logs-overview-top-ns", "visualization", H_TABLE),
|
|
||||||
("logs-overview-top-error-ns", "visualization", H_TABLE),
|
|
||||||
("logs-overview-top-pods", "visualization", H_TABLE),
|
|
||||||
("logs-overview-top-nodes", "visualization", H_TABLE),
|
|
||||||
("logs-overview-recent-errors", "search", H_SEARCH),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
objects.append(dashboard_object("logs-overview", "Atlas Logs Overview", panels))
|
|
||||||
return objects
|
|
||||||
|
|
||||||
|
|
||||||
def build_objects() -> list[dict]:
|
|
||||||
objects = [
|
|
||||||
index_pattern("kube-logs", "kube-*"),
|
|
||||||
index_pattern("journald-logs", "journald-*"),
|
|
||||||
]
|
|
||||||
|
|
||||||
objects.extend(overview_objects())
|
|
||||||
|
|
||||||
apps = [
|
|
||||||
AppSpec("bstein-dev-home", "bstein-dev-home", 'kubernetes.namespace_name: "bstein-dev-home"'),
|
|
||||||
AppSpec(
|
|
||||||
"pegasus",
|
|
||||||
"pegasus",
|
|
||||||
'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "pegasus"',
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"jellyfin",
|
|
||||||
"jellyfin",
|
|
||||||
'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "jellyfin"',
|
|
||||||
),
|
|
||||||
AppSpec("vaultwarden", "vaultwarden", 'kubernetes.namespace_name: "vaultwarden"'),
|
|
||||||
AppSpec("mailu", "mailu", 'kubernetes.namespace_name: "mailu-mailserver"'),
|
|
||||||
AppSpec("nextcloud", "nextcloud", 'kubernetes.namespace_name: "nextcloud"'),
|
|
||||||
AppSpec("gitea", "gitea", 'kubernetes.namespace_name: "gitea"'),
|
|
||||||
AppSpec("jenkins", "jenkins", 'kubernetes.namespace_name: "jenkins"'),
|
|
||||||
AppSpec("harbor", "harbor", 'kubernetes.namespace_name: "harbor"'),
|
|
||||||
AppSpec("vault", "vault", 'kubernetes.namespace_name: "vault"'),
|
|
||||||
AppSpec("keycloak", "keycloak", 'kubernetes.namespace_name: "sso"'),
|
|
||||||
AppSpec("flux-system", "flux-system", 'kubernetes.namespace_name: "flux-system"'),
|
|
||||||
AppSpec("comms", "comms", 'kubernetes.namespace_name: "comms"'),
|
|
||||||
AppSpec(
|
|
||||||
"element-web",
|
|
||||||
"element-web",
|
|
||||||
'kubernetes.namespace_name: "comms" and kubernetes.container_name: "element-web"',
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"element-call",
|
|
||||||
"element-call",
|
|
||||||
'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "element-call"',
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"matrix-synapse",
|
|
||||||
"matrix-synapse",
|
|
||||||
'kubernetes.namespace_name: "comms" and kubernetes.container_name: "synapse"',
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"livekit",
|
|
||||||
"livekit",
|
|
||||||
'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "livekit"',
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"coturn",
|
|
||||||
"coturn",
|
|
||||||
'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "coturn"',
|
|
||||||
),
|
|
||||||
AppSpec("lesavka", "lesavka", '_HOSTNAME: "titan-jh"', index_id="journald-logs", kind="journald"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for app in apps:
|
|
||||||
objects.extend(app_dashboard_objects(app))
|
|
||||||
|
|
||||||
return objects
|
|
||||||
|
|
||||||
|
|
||||||
def write_ndjson(objects: list[dict], path: Path) -> None:
|
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
payload = "\n".join(json_line(obj) for obj in objects)
|
|
||||||
path.write_text(payload + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
def render_configmap(ndjson_path: Path, output_path: Path) -> None:
|
|
||||||
payload_lines = ndjson_path.read_text().splitlines()
|
|
||||||
payload = "\n".join(" " + line for line in payload_lines)
|
|
||||||
relative_path = output_path.relative_to(ROOT)
|
|
||||||
output_path.write_text(CONFIG_TEMPLATE.format(relative_path=relative_path, payload=payload))
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--build", action="store_true", help="Regenerate saved object NDJSON and ConfigMap")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.build:
|
|
||||||
objects = build_objects()
|
|
||||||
write_ndjson(objects, NDJSON_PATH)
|
|
||||||
|
|
||||||
if not NDJSON_PATH.exists():
|
|
||||||
raise SystemExit(f"Missing NDJSON file: {NDJSON_PATH}. Run with --build first.")
|
|
||||||
|
|
||||||
render_configmap(NDJSON_PATH, CONFIG_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,554 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Render Atlas knowledge artifacts from Flux + kustomize manifests.
|
|
||||||
|
|
||||||
Outputs (committed to git for stable diffs + RAG):
|
|
||||||
- knowledge/catalog/*.yaml
|
|
||||||
- knowledge/diagrams/*.mmd
|
|
||||||
|
|
||||||
This is intentionally conservative:
|
|
||||||
- never includes Secret objects
|
|
||||||
- never includes secret values
|
|
||||||
- keeps output deterministic (sorted)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Iterable
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
||||||
|
|
||||||
CLUSTER_SCOPED_KINDS = {
|
|
||||||
"Namespace",
|
|
||||||
"Node",
|
|
||||||
"CustomResourceDefinition",
|
|
||||||
"ClusterRole",
|
|
||||||
"ClusterRoleBinding",
|
|
||||||
"StorageClass",
|
|
||||||
"PersistentVolume",
|
|
||||||
"MutatingWebhookConfiguration",
|
|
||||||
"ValidatingWebhookConfiguration",
|
|
||||||
"APIService",
|
|
||||||
}
|
|
||||||
|
|
||||||
INCLUDED_KINDS = {
|
|
||||||
"Namespace",
|
|
||||||
"Deployment",
|
|
||||||
"StatefulSet",
|
|
||||||
"DaemonSet",
|
|
||||||
"Service",
|
|
||||||
"Ingress",
|
|
||||||
"IngressRoute", # traefik
|
|
||||||
"HelmRelease", # only to harvest ingress hostnames from values
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _run(cmd: list[str], *, cwd: Path) -> str:
|
|
||||||
res = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, check=False)
|
|
||||||
if res.returncode != 0:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Command failed ({res.returncode}): {' '.join(cmd)}\n{res.stderr.strip()}"
|
|
||||||
)
|
|
||||||
return res.stdout
|
|
||||||
|
|
||||||
|
|
||||||
def kustomize_build(path: Path) -> str:
|
|
||||||
rel = path.relative_to(REPO_ROOT)
|
|
||||||
try:
|
|
||||||
return _run(["kubectl", "kustomize", str(rel)], cwd=REPO_ROOT)
|
|
||||||
except Exception as e:
|
|
||||||
msg = str(e)
|
|
||||||
if "is not in or below" in msg:
|
|
||||||
# Repo uses configMapGenerators that reference ../../scripts/*.py.
|
|
||||||
# Kustomize load restriction must be disabled for a full render.
|
|
||||||
try:
|
|
||||||
return _run(
|
|
||||||
["kubectl", "kustomize", "--load-restrictor=LoadRestrictionsNone", str(rel)],
|
|
||||||
cwd=REPO_ROOT,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return _run(["kustomize", "build", "--load-restrictor=LoadRestrictionsNone", str(rel)], cwd=REPO_ROOT)
|
|
||||||
|
|
||||||
|
|
||||||
def _iter_docs(raw_yaml: str) -> Iterable[dict[str, Any]]:
|
|
||||||
for doc in yaml.safe_load_all(raw_yaml):
|
|
||||||
if not isinstance(doc, dict):
|
|
||||||
continue
|
|
||||||
kind = doc.get("kind")
|
|
||||||
if kind == "List" and isinstance(doc.get("items"), list):
|
|
||||||
for item in doc["items"]:
|
|
||||||
if isinstance(item, dict):
|
|
||||||
yield item
|
|
||||||
continue
|
|
||||||
if kind:
|
|
||||||
yield doc
|
|
||||||
|
|
||||||
|
|
||||||
def _meta(doc: dict[str, Any]) -> tuple[str, str | None]:
|
|
||||||
md = doc.get("metadata") or {}
|
|
||||||
name = md.get("name") or ""
|
|
||||||
namespace = md.get("namespace")
|
|
||||||
return name, namespace
|
|
||||||
|
|
||||||
|
|
||||||
def _is_namespaced(doc: dict[str, Any]) -> bool:
|
|
||||||
kind = doc.get("kind") or ""
|
|
||||||
return kind not in CLUSTER_SCOPED_KINDS
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class FluxKustomization:
|
|
||||||
name: str
|
|
||||||
path: str
|
|
||||||
target_namespace: str | None
|
|
||||||
|
|
||||||
|
|
||||||
def find_flux_kustomizations() -> list[FluxKustomization]:
|
|
||||||
"""Find Flux Kustomization CRs under clusters/atlas/flux-system."""
|
|
||||||
root = REPO_ROOT / "clusters" / "atlas" / "flux-system"
|
|
||||||
items: list[FluxKustomization] = []
|
|
||||||
for file in sorted(root.rglob("*.yaml")):
|
|
||||||
raw = file.read_text()
|
|
||||||
for doc in _iter_docs(raw):
|
|
||||||
if doc.get("kind") != "Kustomization":
|
|
||||||
continue
|
|
||||||
api = str(doc.get("apiVersion") or "")
|
|
||||||
if not api.startswith("kustomize.toolkit.fluxcd.io/"):
|
|
||||||
continue
|
|
||||||
name, _ = _meta(doc)
|
|
||||||
spec = doc.get("spec") or {}
|
|
||||||
path = spec.get("path")
|
|
||||||
if not isinstance(path, str) or not path.strip():
|
|
||||||
continue
|
|
||||||
items.append(
|
|
||||||
FluxKustomization(
|
|
||||||
name=name,
|
|
||||||
path=path.strip().lstrip("./"),
|
|
||||||
target_namespace=spec.get("targetNamespace"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return sorted(items, key=lambda k: k.name)
|
|
||||||
|
|
||||||
|
|
||||||
def _safe_string_scan_for_hosts(value: Any) -> set[str]:
|
|
||||||
"""Best-effort host scan from HelmRelease values without chart rendering."""
|
|
||||||
hosts: set[str] = set()
|
|
||||||
if isinstance(value, str):
|
|
||||||
for m in re.finditer(r"(?i)([a-z0-9-]+(?:\.[a-z0-9-]+)+)", value):
|
|
||||||
host = m.group(1).lower()
|
|
||||||
if host.endswith("bstein.dev"):
|
|
||||||
hosts.add(host)
|
|
||||||
return hosts
|
|
||||||
if isinstance(value, list):
|
|
||||||
for item in value:
|
|
||||||
hosts |= _safe_string_scan_for_hosts(item)
|
|
||||||
return hosts
|
|
||||||
if isinstance(value, dict):
|
|
||||||
for item in value.values():
|
|
||||||
hosts |= _safe_string_scan_for_hosts(item)
|
|
||||||
return hosts
|
|
||||||
return hosts
|
|
||||||
|
|
||||||
|
|
||||||
def _service_ports(svc: dict[str, Any]) -> list[dict[str, Any]]:
|
|
||||||
spec = svc.get("spec") or {}
|
|
||||||
out: list[dict[str, Any]] = []
|
|
||||||
for p in spec.get("ports") or []:
|
|
||||||
if not isinstance(p, dict):
|
|
||||||
continue
|
|
||||||
out.append(
|
|
||||||
{
|
|
||||||
"name": p.get("name"),
|
|
||||||
"port": p.get("port"),
|
|
||||||
"targetPort": p.get("targetPort"),
|
|
||||||
"protocol": p.get("protocol", "TCP"),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def _workload_labels(doc: dict[str, Any]) -> dict[str, str]:
|
|
||||||
tpl = (doc.get("spec") or {}).get("template") or {}
|
|
||||||
md = tpl.get("metadata") or {}
|
|
||||||
labels = md.get("labels") or {}
|
|
||||||
return {str(k): str(v) for k, v in labels.items()} if isinstance(labels, dict) else {}
|
|
||||||
|
|
||||||
|
|
||||||
def _service_selector(doc: dict[str, Any]) -> dict[str, str]:
|
|
||||||
spec = doc.get("spec") or {}
|
|
||||||
sel = spec.get("selector") or {}
|
|
||||||
return {str(k): str(v) for k, v in sel.items()} if isinstance(sel, dict) else {}
|
|
||||||
|
|
||||||
|
|
||||||
def _selector_matches(selector: dict[str, str], labels: dict[str, str]) -> bool:
|
|
||||||
if not selector:
|
|
||||||
return False
|
|
||||||
return all(labels.get(k) == v for k, v in selector.items())
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_node_id(text: str) -> str:
|
|
||||||
return re.sub(r"[^a-zA-Z0-9_]", "_", text)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_catalog(
|
|
||||||
rendered: list[tuple[FluxKustomization, list[dict[str, Any]]]],
|
|
||||||
) -> tuple[dict[str, Any], dict[str, Any], str]:
|
|
||||||
"""Build knowledge catalog + mermaid diagram from rendered docs."""
|
|
||||||
# Index workloads and services for mapping.
|
|
||||||
workloads: dict[tuple[str, str], dict[str, Any]] = {}
|
|
||||||
services: dict[tuple[str, str], dict[str, Any]] = {}
|
|
||||||
ingresses: list[dict[str, Any]] = []
|
|
||||||
ingressroutes: list[dict[str, Any]] = []
|
|
||||||
helmrelease_hosts: dict[str, list[str]] = {}
|
|
||||||
|
|
||||||
for src, docs in rendered:
|
|
||||||
for doc in docs:
|
|
||||||
kind = doc.get("kind")
|
|
||||||
if kind not in INCLUDED_KINDS:
|
|
||||||
continue
|
|
||||||
if kind == "Secret":
|
|
||||||
continue
|
|
||||||
|
|
||||||
name, namespace = _meta(doc)
|
|
||||||
if _is_namespaced(doc) and not namespace and src.target_namespace:
|
|
||||||
namespace = src.target_namespace
|
|
||||||
doc = dict(doc)
|
|
||||||
doc.setdefault("metadata", {})["namespace"] = namespace
|
|
||||||
|
|
||||||
if kind in ("Deployment", "StatefulSet", "DaemonSet"):
|
|
||||||
workloads[(namespace or "", name)] = {
|
|
||||||
"kind": kind,
|
|
||||||
"namespace": namespace or "",
|
|
||||||
"name": name,
|
|
||||||
"labels": _workload_labels(doc),
|
|
||||||
"serviceAccountName": ((doc.get("spec") or {}).get("template") or {})
|
|
||||||
.get("spec", {})
|
|
||||||
.get("serviceAccountName"),
|
|
||||||
"nodeSelector": ((doc.get("spec") or {}).get("template") or {})
|
|
||||||
.get("spec", {})
|
|
||||||
.get("nodeSelector", {}),
|
|
||||||
"images": sorted(
|
|
||||||
{
|
|
||||||
c.get("image")
|
|
||||||
for c in (
|
|
||||||
(((doc.get("spec") or {}).get("template") or {}).get("spec") or {}).get(
|
|
||||||
"containers"
|
|
||||||
)
|
|
||||||
or []
|
|
||||||
)
|
|
||||||
if isinstance(c, dict) and c.get("image")
|
|
||||||
}
|
|
||||||
),
|
|
||||||
}
|
|
||||||
elif kind == "Service":
|
|
||||||
services[(namespace or "", name)] = {
|
|
||||||
"namespace": namespace or "",
|
|
||||||
"name": name,
|
|
||||||
"type": (doc.get("spec") or {}).get("type", "ClusterIP"),
|
|
||||||
"selector": _service_selector(doc),
|
|
||||||
"ports": _service_ports(doc),
|
|
||||||
}
|
|
||||||
elif kind == "Ingress":
|
|
||||||
ingresses.append({"source": src.name, "doc": doc})
|
|
||||||
elif kind == "IngressRoute":
|
|
||||||
ingressroutes.append({"source": src.name, "doc": doc})
|
|
||||||
elif kind == "HelmRelease":
|
|
||||||
spec = doc.get("spec") or {}
|
|
||||||
vals = spec.get("values") or {}
|
|
||||||
hosts = sorted(_safe_string_scan_for_hosts(vals))
|
|
||||||
if hosts:
|
|
||||||
helmrelease_hosts[f"{src.name}:{namespace or ''}/{name}"] = hosts
|
|
||||||
|
|
||||||
# Map services to workloads.
|
|
||||||
service_to_workloads: dict[tuple[str, str], list[dict[str, str]]] = {}
|
|
||||||
for (ns, svc_name), svc in services.items():
|
|
||||||
selector = svc.get("selector") or {}
|
|
||||||
matches: list[dict[str, str]] = []
|
|
||||||
for (w_ns, w_name), w in workloads.items():
|
|
||||||
if w_ns != ns:
|
|
||||||
continue
|
|
||||||
if _selector_matches(selector, w.get("labels") or {}):
|
|
||||||
matches.append({"kind": w["kind"], "name": w_name})
|
|
||||||
service_to_workloads[(ns, svc_name)] = sorted(matches, key=lambda m: (m["kind"], m["name"]))
|
|
||||||
|
|
||||||
# Extract HTTP endpoints.
|
|
||||||
endpoints: list[dict[str, Any]] = []
|
|
||||||
|
|
||||||
def add_endpoint(
|
|
||||||
*,
|
|
||||||
host: str,
|
|
||||||
path: str,
|
|
||||||
namespace: str,
|
|
||||||
service: str,
|
|
||||||
port: Any,
|
|
||||||
source: str,
|
|
||||||
kind: str,
|
|
||||||
obj_name: str,
|
|
||||||
):
|
|
||||||
wk = service_to_workloads.get((namespace, service), [])
|
|
||||||
endpoints.append(
|
|
||||||
{
|
|
||||||
"host": host,
|
|
||||||
"path": path,
|
|
||||||
"backend": {
|
|
||||||
"namespace": namespace,
|
|
||||||
"service": service,
|
|
||||||
"port": port,
|
|
||||||
"workloads": wk,
|
|
||||||
},
|
|
||||||
"via": {"kind": kind, "name": obj_name, "source": source},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
for item in ingresses:
|
|
||||||
doc = item["doc"]
|
|
||||||
source = item["source"]
|
|
||||||
name, namespace = _meta(doc)
|
|
||||||
namespace = namespace or ""
|
|
||||||
spec = doc.get("spec") or {}
|
|
||||||
for rule in spec.get("rules") or []:
|
|
||||||
if not isinstance(rule, dict):
|
|
||||||
continue
|
|
||||||
host = (rule.get("host") or "").strip()
|
|
||||||
http = rule.get("http") or {}
|
|
||||||
for p in http.get("paths") or []:
|
|
||||||
if not isinstance(p, dict):
|
|
||||||
continue
|
|
||||||
backend = (p.get("backend") or {}).get("service") or {}
|
|
||||||
svc_name = backend.get("name")
|
|
||||||
svc_port = (backend.get("port") or {}).get("number") or (backend.get("port") or {}).get("name")
|
|
||||||
if not host or not svc_name:
|
|
||||||
continue
|
|
||||||
add_endpoint(
|
|
||||||
host=host,
|
|
||||||
path=p.get("path") or "/",
|
|
||||||
namespace=namespace,
|
|
||||||
service=svc_name,
|
|
||||||
port=svc_port,
|
|
||||||
source=source,
|
|
||||||
kind="Ingress",
|
|
||||||
obj_name=name,
|
|
||||||
)
|
|
||||||
|
|
||||||
host_re = re.compile(r"Host\(`([^`]+)`\)")
|
|
||||||
pathprefix_re = re.compile(r"PathPrefix\(`([^`]+)`\)")
|
|
||||||
for item in ingressroutes:
|
|
||||||
doc = item["doc"]
|
|
||||||
source = item["source"]
|
|
||||||
name, namespace = _meta(doc)
|
|
||||||
namespace = namespace or ""
|
|
||||||
spec = doc.get("spec") or {}
|
|
||||||
for route in spec.get("routes") or []:
|
|
||||||
if not isinstance(route, dict):
|
|
||||||
continue
|
|
||||||
match = route.get("match") or ""
|
|
||||||
hosts = host_re.findall(match)
|
|
||||||
pathprefixes = pathprefix_re.findall(match) or ["/"]
|
|
||||||
for svc in route.get("services") or []:
|
|
||||||
if not isinstance(svc, dict):
|
|
||||||
continue
|
|
||||||
svc_name = svc.get("name")
|
|
||||||
svc_port = svc.get("port")
|
|
||||||
if not svc_name:
|
|
||||||
continue
|
|
||||||
for host in hosts:
|
|
||||||
for pp in pathprefixes:
|
|
||||||
add_endpoint(
|
|
||||||
host=host,
|
|
||||||
path=pp,
|
|
||||||
namespace=namespace,
|
|
||||||
service=svc_name,
|
|
||||||
port=svc_port,
|
|
||||||
source=source,
|
|
||||||
kind="IngressRoute",
|
|
||||||
obj_name=name,
|
|
||||||
)
|
|
||||||
|
|
||||||
endpoints = sorted(
|
|
||||||
endpoints,
|
|
||||||
key=lambda e: (
|
|
||||||
e["host"],
|
|
||||||
e["path"],
|
|
||||||
e["backend"]["namespace"],
|
|
||||||
e["backend"]["service"],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
catalog = {
|
|
||||||
"cluster": "atlas",
|
|
||||||
"sources": [
|
|
||||||
{"name": k.name, "path": k.path, "targetNamespace": k.target_namespace}
|
|
||||||
for k, _ in rendered
|
|
||||||
],
|
|
||||||
"workloads": sorted(
|
|
||||||
list(workloads.values()),
|
|
||||||
key=lambda w: (w["namespace"], w["kind"], w["name"]),
|
|
||||||
),
|
|
||||||
"services": sorted(
|
|
||||||
list(services.values()),
|
|
||||||
key=lambda s: (s["namespace"], s["name"]),
|
|
||||||
),
|
|
||||||
"http_endpoints": endpoints,
|
|
||||||
"helmrelease_host_hints": {k: v for k, v in sorted(helmrelease_hosts.items())},
|
|
||||||
}
|
|
||||||
|
|
||||||
# Mermaid diagram: host -> service -> workload (grouped by namespace).
|
|
||||||
ns_nodes: dict[str, list[str]] = {}
|
|
||||||
lines: list[str] = ["flowchart LR"]
|
|
||||||
edges: set[tuple[str, str]] = set()
|
|
||||||
|
|
||||||
def ensure_ns_node(ns: str, node_id: str):
|
|
||||||
ns_nodes.setdefault(ns, [])
|
|
||||||
if node_id not in ns_nodes[ns]:
|
|
||||||
ns_nodes[ns].append(node_id)
|
|
||||||
|
|
||||||
host_nodes: dict[str, str] = {}
|
|
||||||
|
|
||||||
for ep in endpoints:
|
|
||||||
host = ep["host"]
|
|
||||||
host_id = host_nodes.get(host)
|
|
||||||
if not host_id:
|
|
||||||
host_id = f"host_{_sanitize_node_id(host)}"
|
|
||||||
host_nodes[host] = host_id
|
|
||||||
lines.append(f' {host_id}["{host}"]')
|
|
||||||
|
|
||||||
ns = ep["backend"]["namespace"]
|
|
||||||
svc = ep["backend"]["service"]
|
|
||||||
svc_id = f"svc_{_sanitize_node_id(ns)}_{_sanitize_node_id(svc)}"
|
|
||||||
if svc_id not in ns_nodes.get(ns, []):
|
|
||||||
lines.append(f' {svc_id}["{ns}/{svc} (Service)"]')
|
|
||||||
ensure_ns_node(ns, svc_id)
|
|
||||||
|
|
||||||
if (host_id, svc_id) not in edges:
|
|
||||||
edges.add((host_id, svc_id))
|
|
||||||
lines.append(f" {host_id} --> {svc_id}")
|
|
||||||
|
|
||||||
for w in ep["backend"]["workloads"]:
|
|
||||||
w_id = f"wl_{_sanitize_node_id(ns)}_{_sanitize_node_id(w['name'])}"
|
|
||||||
if w_id not in ns_nodes.get(ns, []):
|
|
||||||
lines.append(f' {w_id}["{ns}/{w["name"]} ({w["kind"]})"]')
|
|
||||||
ensure_ns_node(ns, w_id)
|
|
||||||
if (svc_id, w_id) not in edges:
|
|
||||||
edges.add((svc_id, w_id))
|
|
||||||
lines.append(f" {svc_id} --> {w_id}")
|
|
||||||
|
|
||||||
# Wrap namespace subgraphs at the end for stability (sorted namespaces).
|
|
||||||
if ns_nodes:
|
|
||||||
lines.append("")
|
|
||||||
for ns in sorted(ns_nodes.keys()):
|
|
||||||
lines.append(f" subgraph { _sanitize_node_id(ns) }[{ns}]")
|
|
||||||
for node_id in ns_nodes[ns]:
|
|
||||||
lines.append(f" {node_id}")
|
|
||||||
lines.append(" end")
|
|
||||||
|
|
||||||
diagram = "\n".join(lines).rstrip() + "\n"
|
|
||||||
|
|
||||||
summary = {
|
|
||||||
"counts": {
|
|
||||||
"workloads": len(workloads),
|
|
||||||
"services": len(services),
|
|
||||||
"http_endpoints": len(endpoints),
|
|
||||||
"helmrelease_host_hints": sum(len(v) for v in helmrelease_hosts.values()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return catalog, summary, diagram
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument("--out", default="knowledge", help="Output base directory (default: knowledge/)")
|
|
||||||
ap.add_argument(
|
|
||||||
"--write",
|
|
||||||
action="store_true",
|
|
||||||
help="Write generated files (otherwise just print a summary).",
|
|
||||||
)
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
out_dir = REPO_ROOT / args.out
|
|
||||||
flux = find_flux_kustomizations()
|
|
||||||
if not flux:
|
|
||||||
print("No Flux Kustomizations found under clusters/atlas/flux-system.", file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
rendered: list[tuple[FluxKustomization, list[dict[str, Any]]]] = []
|
|
||||||
for k in flux:
|
|
||||||
path = REPO_ROOT / k.path
|
|
||||||
if not path.exists():
|
|
||||||
continue
|
|
||||||
raw = kustomize_build(path)
|
|
||||||
docs = [d for d in _iter_docs(raw) if d.get("kind") != "Secret"]
|
|
||||||
rendered.append((k, docs))
|
|
||||||
|
|
||||||
rendered = sorted(rendered, key=lambda item: item[0].name)
|
|
||||||
catalog, summary, diagram = extract_catalog(rendered)
|
|
||||||
|
|
||||||
if not args.write:
|
|
||||||
print(json.dumps(summary, indent=2, sort_keys=True))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
(out_dir / "catalog").mkdir(parents=True, exist_ok=True)
|
|
||||||
(out_dir / "diagrams").mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
catalog_path = out_dir / "catalog" / "atlas.yaml"
|
|
||||||
catalog_json_path = out_dir / "catalog" / "atlas.json"
|
|
||||||
summary_path = out_dir / "catalog" / "atlas-summary.json"
|
|
||||||
diagram_path = out_dir / "diagrams" / "atlas-http.mmd"
|
|
||||||
runbooks_json_path = out_dir / "catalog" / "runbooks.json"
|
|
||||||
|
|
||||||
catalog_path.write_text(
|
|
||||||
"# Generated by scripts/knowledge_render_atlas.py (do not edit by hand)\n"
|
|
||||||
+ yaml.safe_dump(catalog, sort_keys=False),
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
|
||||||
catalog_json_path.write_text(json.dumps(catalog, indent=2, sort_keys=False) + "\n", encoding="utf-8")
|
|
||||||
summary_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
||||||
diagram_path.write_text(diagram, encoding="utf-8")
|
|
||||||
|
|
||||||
# Render runbooks into JSON for lightweight, dependency-free consumption in-cluster.
|
|
||||||
runbooks_dir = out_dir / "runbooks"
|
|
||||||
runbooks: list[dict[str, Any]] = []
|
|
||||||
if runbooks_dir.exists():
|
|
||||||
for md_file in sorted(runbooks_dir.glob("*.md")):
|
|
||||||
raw = md_file.read_text(encoding="utf-8")
|
|
||||||
fm: dict[str, Any] = {}
|
|
||||||
body = raw
|
|
||||||
if raw.startswith("---\n"):
|
|
||||||
try:
|
|
||||||
_, rest = raw.split("---\n", 1)
|
|
||||||
fm_raw, body = rest.split("\n---\n", 1)
|
|
||||||
fm = yaml.safe_load(fm_raw) or {}
|
|
||||||
except Exception:
|
|
||||||
fm = {}
|
|
||||||
body = raw
|
|
||||||
runbooks.append(
|
|
||||||
{
|
|
||||||
"path": str(md_file.relative_to(out_dir)),
|
|
||||||
"title": fm.get("title") or md_file.stem,
|
|
||||||
"tags": fm.get("tags") or [],
|
|
||||||
"entrypoints": fm.get("entrypoints") or [],
|
|
||||||
"source_paths": fm.get("source_paths") or [],
|
|
||||||
"body": body.strip(),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
runbooks_json_path.write_text(json.dumps(runbooks, indent=2, sort_keys=False) + "\n", encoding="utf-8")
|
|
||||||
|
|
||||||
print(f"Wrote {catalog_path.relative_to(REPO_ROOT)}")
|
|
||||||
print(f"Wrote {catalog_json_path.relative_to(REPO_ROOT)}")
|
|
||||||
print(f"Wrote {summary_path.relative_to(REPO_ROOT)}")
|
|
||||||
print(f"Wrote {diagram_path.relative_to(REPO_ROOT)}")
|
|
||||||
print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main())
|
|
||||||
@ -1,313 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Generate OpenSearch Observability seed objects and render them into ConfigMaps.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
scripts/logging_render_observability.py --build # rebuild JSON + ConfigMap
|
|
||||||
scripts/logging_render_observability.py # re-render ConfigMap from JSON
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import textwrap
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
ROOT = Path(__file__).resolve().parents[1]
|
|
||||||
OBS_DIR = ROOT / "services" / "logging" / "observability"
|
|
||||||
APPS_PATH = OBS_DIR / "applications.json"
|
|
||||||
QUERIES_PATH = OBS_DIR / "saved_queries.json"
|
|
||||||
VIS_PATH = OBS_DIR / "saved_visualizations.json"
|
|
||||||
CONFIG_PATH = ROOT / "services" / "logging" / "opensearch-observability-objects.yaml"
|
|
||||||
|
|
||||||
CONFIG_TEMPLATE = textwrap.dedent(
|
|
||||||
"""# {relative_path}
|
|
||||||
# Generated by scripts/logging_render_observability.py --build
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: opensearch-observability-objects
|
|
||||||
namespace: logging
|
|
||||||
data:
|
|
||||||
applications.json: |
|
|
||||||
{applications}
|
|
||||||
saved_queries.json: |
|
|
||||||
{queries}
|
|
||||||
saved_visualizations.json: |
|
|
||||||
{visualizations}
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
DEFAULT_RANGE = {"start": "now-24h", "end": "now", "text": ""}
|
|
||||||
DEFAULT_TIMESTAMP = {"name": "@timestamp", "type": "timestamp"}
|
|
||||||
DEFAULT_FIELDS = {"text": "", "tokens": []}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class AppSpec:
|
|
||||||
name: str
|
|
||||||
base_query: str
|
|
||||||
kind: str = "kube"
|
|
||||||
description: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class QuerySpec:
|
|
||||||
name: str
|
|
||||||
query: str
|
|
||||||
description: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class VisualizationSpec:
|
|
||||||
name: str
|
|
||||||
query: str
|
|
||||||
vis_type: str
|
|
||||||
description: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
def source_query(index: str, where: str | None = None) -> str:
|
|
||||||
query = f"source = {index}"
|
|
||||||
if where:
|
|
||||||
query += f" | where {where}"
|
|
||||||
return query
|
|
||||||
|
|
||||||
|
|
||||||
def error_filter(fields: list[str]) -> str:
|
|
||||||
parts = [f"match({field}, 'error|exception|fail')" for field in fields]
|
|
||||||
return " or ".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
def saved_query(spec: QuerySpec) -> dict:
|
|
||||||
return {
|
|
||||||
"name": spec.name,
|
|
||||||
"description": spec.description,
|
|
||||||
"query": spec.query,
|
|
||||||
"selected_date_range": DEFAULT_RANGE,
|
|
||||||
"selected_timestamp": DEFAULT_TIMESTAMP,
|
|
||||||
"selected_fields": DEFAULT_FIELDS,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def saved_visualization(spec: VisualizationSpec) -> dict:
|
|
||||||
return {
|
|
||||||
"name": spec.name,
|
|
||||||
"description": spec.description,
|
|
||||||
"query": spec.query,
|
|
||||||
"type": spec.vis_type,
|
|
||||||
"selected_date_range": DEFAULT_RANGE,
|
|
||||||
"selected_timestamp": DEFAULT_TIMESTAMP,
|
|
||||||
"selected_fields": DEFAULT_FIELDS,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def build_objects() -> tuple[list[dict], list[dict], list[dict]]:
|
|
||||||
kube_error = error_filter(["log", "message"])
|
|
||||||
journald_error = error_filter(["MESSAGE"])
|
|
||||||
|
|
||||||
apps = [
|
|
||||||
AppSpec("bstein-dev-home", source_query("kube-*", "kubernetes.namespace_name = 'bstein-dev-home'")),
|
|
||||||
AppSpec(
|
|
||||||
"pegasus",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"jellyfin",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec("vaultwarden", source_query("kube-*", "kubernetes.namespace_name = 'vaultwarden'")),
|
|
||||||
AppSpec("mailu", source_query("kube-*", "kubernetes.namespace_name = 'mailu-mailserver'")),
|
|
||||||
AppSpec("nextcloud", source_query("kube-*", "kubernetes.namespace_name = 'nextcloud'")),
|
|
||||||
AppSpec("gitea", source_query("kube-*", "kubernetes.namespace_name = 'gitea'")),
|
|
||||||
AppSpec("jenkins", source_query("kube-*", "kubernetes.namespace_name = 'jenkins'")),
|
|
||||||
AppSpec("harbor", source_query("kube-*", "kubernetes.namespace_name = 'harbor'")),
|
|
||||||
AppSpec("vault", source_query("kube-*", "kubernetes.namespace_name = 'vault'")),
|
|
||||||
AppSpec("keycloak", source_query("kube-*", "kubernetes.namespace_name = 'sso'")),
|
|
||||||
AppSpec("flux-system", source_query("kube-*", "kubernetes.namespace_name = 'flux-system'")),
|
|
||||||
AppSpec("comms", source_query("kube-*", "kubernetes.namespace_name = 'comms'")),
|
|
||||||
AppSpec(
|
|
||||||
"element-web",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"element-call",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"matrix-synapse",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"livekit",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"coturn",
|
|
||||||
source_query(
|
|
||||||
"kube-*",
|
|
||||||
"kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
AppSpec(
|
|
||||||
"lesavka",
|
|
||||||
source_query("journald-*", "_HOSTNAME = 'titan-jh'"),
|
|
||||||
kind="journald",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
applications = [
|
|
||||||
{
|
|
||||||
"name": app.name,
|
|
||||||
"description": app.description,
|
|
||||||
"baseQuery": app.base_query,
|
|
||||||
"servicesEntities": [],
|
|
||||||
"traceGroups": [app.name],
|
|
||||||
}
|
|
||||||
for app in apps
|
|
||||||
]
|
|
||||||
|
|
||||||
queries = [
|
|
||||||
saved_query(QuerySpec("kube logs", source_query("kube-*"))),
|
|
||||||
saved_query(QuerySpec("kube errors", f"{source_query('kube-*')} | where {kube_error}")),
|
|
||||||
saved_query(QuerySpec("journald logs", source_query("journald-*"))),
|
|
||||||
saved_query(QuerySpec("journald errors", f"{source_query('journald-*')} | where {journald_error}")),
|
|
||||||
]
|
|
||||||
|
|
||||||
for app in apps:
|
|
||||||
query_base = app.base_query
|
|
||||||
error_clause = journald_error if app.kind == "journald" else kube_error
|
|
||||||
queries.append(saved_query(QuerySpec(f"{app.name} logs", query_base)))
|
|
||||||
queries.append(saved_query(QuerySpec(f"{app.name} errors", f"{query_base} | where {error_clause}")))
|
|
||||||
|
|
||||||
visualizations = [
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Logs per hour",
|
|
||||||
"source = kube-* | stats count() as log_count by span(`@timestamp`, 1h)",
|
|
||||||
"line",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Errors per hour",
|
|
||||||
f"source = kube-* | where {kube_error} | stats count() as error_count by span(`@timestamp`, 1h)",
|
|
||||||
"line",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Top namespaces",
|
|
||||||
"source = kube-* | stats count() as log_count by kubernetes.namespace_name | sort - log_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Top error namespaces",
|
|
||||||
f"source = kube-* | where {kube_error} | stats count() as error_count by kubernetes.namespace_name | sort - error_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Top pods",
|
|
||||||
"source = kube-* | stats count() as log_count by kubernetes.pod_name | sort - log_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Top error pods",
|
|
||||||
f"source = kube-* | where {kube_error} | stats count() as error_count by kubernetes.pod_name | sort - error_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Kube] Top nodes",
|
|
||||||
"source = kube-* | stats count() as log_count by kubernetes.node_name | sort - log_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Journald] Top units",
|
|
||||||
"source = journald-* | stats count() as log_count by _SYSTEMD_UNIT | sort - log_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
saved_visualization(
|
|
||||||
VisualizationSpec(
|
|
||||||
"[Journald] Top error units",
|
|
||||||
f"source = journald-* | where {journald_error} | stats count() as error_count by _SYSTEMD_UNIT | sort - error_count",
|
|
||||||
"bar",
|
|
||||||
)
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
return applications, queries, visualizations
|
|
||||||
|
|
||||||
|
|
||||||
def write_json(payload: list[dict], path: Path) -> None:
|
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
path.write_text(json.dumps(payload, indent=2) + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
def render_configmap(apps_path: Path, queries_path: Path, vis_path: Path, output_path: Path) -> None:
|
|
||||||
relative_path = output_path.relative_to(ROOT)
|
|
||||||
applications = indent_payload(apps_path)
|
|
||||||
queries = indent_payload(queries_path)
|
|
||||||
visualizations = indent_payload(vis_path)
|
|
||||||
output_path.write_text(
|
|
||||||
CONFIG_TEMPLATE.format(
|
|
||||||
relative_path=relative_path,
|
|
||||||
applications=applications,
|
|
||||||
queries=queries,
|
|
||||||
visualizations=visualizations,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def indent_payload(path: Path) -> str:
|
|
||||||
lines = path.read_text().splitlines()
|
|
||||||
return "\n".join(" " + line for line in lines)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--build", action="store_true", help="Regenerate JSON payloads and ConfigMap")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.build:
|
|
||||||
applications, queries, visualizations = build_objects()
|
|
||||||
write_json(applications, APPS_PATH)
|
|
||||||
write_json(queries, QUERIES_PATH)
|
|
||||||
write_json(visualizations, VIS_PATH)
|
|
||||||
|
|
||||||
if not (APPS_PATH.exists() and QUERIES_PATH.exists() and VIS_PATH.exists()):
|
|
||||||
raise SystemExit("Missing observability JSON payloads. Run with --build first.")
|
|
||||||
|
|
||||||
render_configmap(APPS_PATH, QUERIES_PATH, VIS_PATH, CONFIG_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -110,33 +110,13 @@ def random_password():
|
|||||||
alphabet = string.ascii_letters + string.digits
|
alphabet = string.ascii_letters + string.digits
|
||||||
return "".join(secrets.choice(alphabet) for _ in range(24))
|
return "".join(secrets.choice(alphabet) for _ in range(24))
|
||||||
|
|
||||||
def get_attribute_value(attributes, key):
|
|
||||||
raw = (attributes or {}).get(key)
|
|
||||||
if isinstance(raw, list):
|
|
||||||
return raw[0] if raw else None
|
|
||||||
if isinstance(raw, str):
|
|
||||||
return raw
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_mailu_email(user, attributes):
|
|
||||||
explicit = get_attribute_value(attributes, "mailu_email")
|
|
||||||
if explicit:
|
|
||||||
return explicit
|
|
||||||
|
|
||||||
email = user.get("email") or ""
|
|
||||||
if "@" in email and email.lower().endswith(f"@{MAILU_DOMAIN.lower()}"):
|
|
||||||
return email
|
|
||||||
|
|
||||||
return f"{user['username']}@{MAILU_DOMAIN}"
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_mailu_user(cursor, email, password, display_name):
|
def ensure_mailu_user(cursor, email, password, display_name):
|
||||||
localpart, domain = email.split("@", 1)
|
localpart, domain = email.split("@", 1)
|
||||||
if domain.lower() != MAILU_DOMAIN.lower():
|
if domain.lower() != MAILU_DOMAIN.lower():
|
||||||
return
|
return
|
||||||
hashed = bcrypt_sha256.hash(password)
|
hashed = bcrypt_sha256.hash(password)
|
||||||
now = datetime.datetime.now(datetime.timezone.utc)
|
now = datetime.datetime.utcnow()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO "user" (
|
INSERT INTO "user" (
|
||||||
@ -187,29 +167,30 @@ def main():
|
|||||||
|
|
||||||
for user in users:
|
for user in users:
|
||||||
attrs = user.get("attributes", {}) or {}
|
attrs = user.get("attributes", {}) or {}
|
||||||
app_pw = get_attribute_value(attrs, "mailu_app_password")
|
app_pw_value = attrs.get("mailu_app_password")
|
||||||
mailu_email = resolve_mailu_email(user, attrs)
|
if isinstance(app_pw_value, list):
|
||||||
|
app_pw = app_pw_value[0] if app_pw_value else None
|
||||||
|
elif isinstance(app_pw_value, str):
|
||||||
|
app_pw = app_pw_value
|
||||||
|
else:
|
||||||
|
app_pw = None
|
||||||
|
|
||||||
needs_update = False
|
email = user.get("email")
|
||||||
if not get_attribute_value(attrs, "mailu_email"):
|
if not email:
|
||||||
attrs["mailu_email"] = [mailu_email]
|
email = f"{user['username']}@{MAILU_DOMAIN}"
|
||||||
needs_update = True
|
|
||||||
|
|
||||||
if not app_pw:
|
if not app_pw:
|
||||||
app_pw = random_password()
|
app_pw = random_password()
|
||||||
attrs["mailu_app_password"] = [app_pw]
|
attrs["mailu_app_password"] = app_pw
|
||||||
needs_update = True
|
|
||||||
|
|
||||||
if needs_update:
|
|
||||||
kc_update_attributes(token, user, attrs)
|
kc_update_attributes(token, user, attrs)
|
||||||
log(f"Updated Mailu attributes for {mailu_email}")
|
log(f"Set mailu_app_password for {email}")
|
||||||
|
|
||||||
display_name = " ".join(
|
display_name = " ".join(
|
||||||
part for part in [user.get("firstName"), user.get("lastName")] if part
|
part for part in [user.get("firstName"), user.get("lastName")] if part
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
ensure_mailu_user(cursor, mailu_email, app_pw, display_name)
|
ensure_mailu_user(cursor, email, app_pw, display_name)
|
||||||
log(f"Synced mailbox for {mailu_email}")
|
log(f"Synced mailbox for {email}")
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
@ -1,149 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import datetime as dt
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from prometheus_client import Gauge, Info, start_http_server
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Window:
|
|
||||||
label: str
|
|
||||||
days: int
|
|
||||||
|
|
||||||
|
|
||||||
WINDOWS = [
|
|
||||||
Window("today", 0),
|
|
||||||
Window("1d", 1),
|
|
||||||
Window("7d", 7),
|
|
||||||
Window("30d", 30),
|
|
||||||
]
|
|
||||||
|
|
||||||
API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/")
|
|
||||||
POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60"))
|
|
||||||
LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
|
|
||||||
LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000"))
|
|
||||||
|
|
||||||
PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip()
|
|
||||||
FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip()
|
|
||||||
LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip()
|
|
||||||
LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip()
|
|
||||||
try:
|
|
||||||
SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0
|
|
||||||
except ValueError:
|
|
||||||
SENDING_LIMIT = 0.0
|
|
||||||
|
|
||||||
EXPORTER_INFO = Info("postmark_exporter", "Exporter build info")
|
|
||||||
EXPORTER_INFO.info(
|
|
||||||
{
|
|
||||||
"api_base": API_BASE,
|
|
||||||
"windows": ",".join(window.label for window in WINDOWS),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)")
|
|
||||||
POSTMARK_LAST_SUCCESS = Gauge(
|
|
||||||
"postmark_last_success_timestamp_seconds",
|
|
||||||
"Unix timestamp of the last successful Postmark stats refresh",
|
|
||||||
)
|
|
||||||
POSTMARK_REQUEST_ERRORS = Gauge(
|
|
||||||
"postmark_request_errors_total",
|
|
||||||
"Total Postmark stats request errors since exporter start",
|
|
||||||
)
|
|
||||||
|
|
||||||
POSTMARK_OUTBOUND_SENT = Gauge(
|
|
||||||
"postmark_outbound_sent",
|
|
||||||
"Outbound emails sent within the selected window",
|
|
||||||
labelnames=("window",),
|
|
||||||
)
|
|
||||||
POSTMARK_OUTBOUND_BOUNCED = Gauge(
|
|
||||||
"postmark_outbound_bounced",
|
|
||||||
"Outbound emails bounced within the selected window",
|
|
||||||
labelnames=("window",),
|
|
||||||
)
|
|
||||||
POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge(
|
|
||||||
"postmark_outbound_bounce_rate",
|
|
||||||
"Outbound bounce rate percentage within the selected window",
|
|
||||||
labelnames=("window",),
|
|
||||||
)
|
|
||||||
POSTMARK_SENDING_LIMIT_GAUGE = Gauge(
|
|
||||||
"postmark_sending_limit",
|
|
||||||
"Configured Postmark sending limit for the active account",
|
|
||||||
)
|
|
||||||
POSTMARK_SENDING_LIMIT_USED = Gauge(
|
|
||||||
"postmark_sending_limit_used",
|
|
||||||
"Messages sent within the configured send limit window",
|
|
||||||
)
|
|
||||||
POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge(
|
|
||||||
"postmark_sending_limit_used_percent",
|
|
||||||
"Percent of the configured send limit used within the limit window",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_outbound_stats(token: str, window: Window) -> dict:
|
|
||||||
today = dt.date.today()
|
|
||||||
fromdate = today - dt.timedelta(days=window.days)
|
|
||||||
params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()}
|
|
||||||
headers = {
|
|
||||||
"Accept": "application/json",
|
|
||||||
"X-Postmark-Server-Token": token,
|
|
||||||
}
|
|
||||||
response = requests.get(
|
|
||||||
f"{API_BASE}/stats/outbound",
|
|
||||||
headers=headers,
|
|
||||||
params=params,
|
|
||||||
timeout=15,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response.json()
|
|
||||||
|
|
||||||
|
|
||||||
def update_metrics(token: str) -> None:
|
|
||||||
sent_by_window = {}
|
|
||||||
for window in WINDOWS:
|
|
||||||
data = fetch_outbound_stats(token, window)
|
|
||||||
sent = int(data.get("Sent", 0) or 0)
|
|
||||||
bounced = int(data.get("Bounced", 0) or 0)
|
|
||||||
rate = (bounced / sent * 100.0) if sent else 0.0
|
|
||||||
sent_by_window[window.label] = sent
|
|
||||||
POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent)
|
|
||||||
POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced)
|
|
||||||
POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate)
|
|
||||||
|
|
||||||
POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT)
|
|
||||||
limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0)
|
|
||||||
POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent)
|
|
||||||
if SENDING_LIMIT:
|
|
||||||
POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0)
|
|
||||||
else:
|
|
||||||
POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
if not PRIMARY_TOKEN and not FALLBACK_TOKEN:
|
|
||||||
raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required")
|
|
||||||
|
|
||||||
start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS)
|
|
||||||
|
|
||||||
tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token]
|
|
||||||
token_index = 0
|
|
||||||
|
|
||||||
while True:
|
|
||||||
token = tokens[token_index % len(tokens)]
|
|
||||||
token_index += 1
|
|
||||||
try:
|
|
||||||
update_metrics(token)
|
|
||||||
POSTMARK_API_UP.set(1)
|
|
||||||
POSTMARK_LAST_SUCCESS.set(time.time())
|
|
||||||
except Exception as exc: # noqa: BLE001
|
|
||||||
POSTMARK_API_UP.set(0)
|
|
||||||
POSTMARK_REQUEST_ERRORS.inc()
|
|
||||||
print(f"postmark_exporter: refresh failed: {exc}", flush=True)
|
|
||||||
time.sleep(POLL_INTERVAL_SECONDS)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,35 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
def indent(text: str, spaces: int) -> str:
|
|
||||||
prefix = " " * spaces
|
|
||||||
return "".join(prefix + line if line.strip("\n") else line for line in text.splitlines(keepends=True))
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
root = Path(__file__).resolve().parents[1]
|
|
||||||
source = root / "scripts" / "monitoring_postmark_exporter.py"
|
|
||||||
target = root / "services" / "monitoring" / "postmark-exporter-script.yaml"
|
|
||||||
|
|
||||||
payload = source.read_text(encoding="utf-8")
|
|
||||||
if not payload.endswith("\n"):
|
|
||||||
payload += "\n"
|
|
||||||
|
|
||||||
yaml = (
|
|
||||||
f"# services/monitoring/postmark-exporter-script.yaml\n"
|
|
||||||
f"apiVersion: v1\n"
|
|
||||||
f"kind: ConfigMap\n"
|
|
||||||
f"metadata:\n"
|
|
||||||
f" name: postmark-exporter-script\n"
|
|
||||||
f"data:\n"
|
|
||||||
f" monitoring_postmark_exporter.py: |\n"
|
|
||||||
f"{indent(payload, 4)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
target.write_text(yaml, encoding="utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
49
scripts/nextcloud-mail-sync.sh
Executable file
49
scripts/nextcloud-mail-sync.sh
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
KC_BASE="${KC_BASE:?}"
|
||||||
|
KC_REALM="${KC_REALM:?}"
|
||||||
|
KC_ADMIN_USER="${KC_ADMIN_USER:?}"
|
||||||
|
KC_ADMIN_PASS="${KC_ADMIN_PASS:?}"
|
||||||
|
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
apt-get update && apt-get install -y jq curl >/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
account_exists() {
|
||||||
|
# Skip if the account email is already present in the mail app.
|
||||||
|
runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq " ${1}" || \
|
||||||
|
runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq "${1} "
|
||||||
|
}
|
||||||
|
|
||||||
|
token=$(
|
||||||
|
curl -s -d "grant_type=password" \
|
||||||
|
-d "client_id=admin-cli" \
|
||||||
|
-d "username=${KC_ADMIN_USER}" \
|
||||||
|
-d "password=${KC_ADMIN_PASS}" \
|
||||||
|
"${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token'
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ -z "${token}" || "${token}" == "null" ]]; then
|
||||||
|
echo "Failed to obtain admin token"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
users=$(curl -s -H "Authorization: Bearer ${token}" \
|
||||||
|
"${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000")
|
||||||
|
|
||||||
|
echo "${users}" | jq -c '.[]' | while read -r user; do
|
||||||
|
username=$(echo "${user}" | jq -r '.username')
|
||||||
|
email=$(echo "${user}" | jq -r '.email // empty')
|
||||||
|
app_pw=$(echo "${user}" | jq -r '.attributes.mailu_app_password[0] // empty')
|
||||||
|
[[ -z "${email}" || -z "${app_pw}" ]] && continue
|
||||||
|
if account_exists "${email}"; then
|
||||||
|
echo "Skipping ${email}, already exists"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
echo "Syncing ${email}"
|
||||||
|
runuser -u www-data -- php occ mail:account:create \
|
||||||
|
"${username}" "${username}" "${email}" \
|
||||||
|
mail.bstein.dev 993 ssl "${email}" "${app_pw}" \
|
||||||
|
mail.bstein.dev 587 tls "${email}" "${app_pw}" login || true
|
||||||
|
done
|
||||||
65
scripts/nextcloud-maintenance.sh
Executable file
65
scripts/nextcloud-maintenance.sh
Executable file
@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NC_URL="${NC_URL:-https://cloud.bstein.dev}"
|
||||||
|
ADMIN_USER="${ADMIN_USER:?}"
|
||||||
|
ADMIN_PASS="${ADMIN_PASS:?}"
|
||||||
|
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
apt-get update -qq
|
||||||
|
apt-get install -y -qq curl jq >/dev/null
|
||||||
|
|
||||||
|
run_occ() {
|
||||||
|
runuser -u www-data -- php occ "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
log() { echo "[$(date -Is)] $*"; }
|
||||||
|
|
||||||
|
log "Applying Atlas theming"
|
||||||
|
run_occ theming:config name "Atlas Cloud"
|
||||||
|
run_occ theming:config slogan "Unified access to Atlas services"
|
||||||
|
run_occ theming:config url "https://cloud.bstein.dev"
|
||||||
|
run_occ theming:config color "#0f172a"
|
||||||
|
run_occ theming:config disable-user-theming yes
|
||||||
|
|
||||||
|
log "Setting default quota to 200 GB"
|
||||||
|
run_occ config:app:set files default_quota --value "200 GB"
|
||||||
|
|
||||||
|
API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1"
|
||||||
|
AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true")
|
||||||
|
|
||||||
|
log "Removing existing external links"
|
||||||
|
existing=$(curl -sf "${AUTH[@]}" "${API_BASE}?format=json" | jq -r '.ocs.data[].id // empty')
|
||||||
|
for id in ${existing}; do
|
||||||
|
curl -sf "${AUTH[@]}" -X DELETE "${API_BASE}/sites/${id}?format=json" >/dev/null || true
|
||||||
|
done
|
||||||
|
|
||||||
|
SITES=(
|
||||||
|
"Vaultwarden|https://vault.bstein.dev"
|
||||||
|
"Jellyfin|https://stream.bstein.dev"
|
||||||
|
"Gitea|https://scm.bstein.dev"
|
||||||
|
"Jenkins|https://ci.bstein.dev"
|
||||||
|
"Harbor|https://registry.bstein.dev"
|
||||||
|
"Vault|https://secret.bstein.dev"
|
||||||
|
"Jitsi|https://meet.bstein.dev"
|
||||||
|
"Grafana|https://metrics.bstein.dev"
|
||||||
|
"Chat LLM|https://chat.ai.bstein.dev"
|
||||||
|
"Vision|https://draw.ai.bstein.dev"
|
||||||
|
"STT/TTS|https://talk.ai.bstein.dev"
|
||||||
|
)
|
||||||
|
|
||||||
|
log "Seeding external links"
|
||||||
|
for entry in "${SITES[@]}"; do
|
||||||
|
IFS="|" read -r name url <<<"${entry}"
|
||||||
|
curl -sf "${AUTH[@]}" -X POST "${API_BASE}/sites?format=json" \
|
||||||
|
-d "name=${name}" \
|
||||||
|
-d "url=${url}" \
|
||||||
|
-d "lang=" \
|
||||||
|
-d "type=link" \
|
||||||
|
-d "device=" \
|
||||||
|
-d "icon=" \
|
||||||
|
-d "groups[]=" \
|
||||||
|
-d "redirect=1" >/dev/null
|
||||||
|
done
|
||||||
|
|
||||||
|
log "Maintenance run completed"
|
||||||
@ -1,509 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Clean up Atlas test users and portal requests (manual-only).
|
|
||||||
|
|
||||||
Default behavior is DRY RUN. This script is intended for operators to clean up
|
|
||||||
test accounts created via the bstein-dev-home onboarding portal.
|
|
||||||
|
|
||||||
Targets (best-effort):
|
|
||||||
- Keycloak users in realm "atlas"
|
|
||||||
- Atlas portal Postgres rows (access_requests + dependent tables)
|
|
||||||
- Vaultwarden users/invites created by the portal
|
|
||||||
|
|
||||||
Safety:
|
|
||||||
- Requires an explicit username prefix (e.g. "test-")
|
|
||||||
- Dry-run unless --apply is set
|
|
||||||
- --apply requires an explicit --confirm guard
|
|
||||||
- Validates prefixes to a conservative charset
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import base64
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Any, Iterable
|
|
||||||
|
|
||||||
|
|
||||||
_SAFE_PREFIX_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class KeycloakUser:
|
|
||||||
user_id: str
|
|
||||||
username: str
|
|
||||||
email: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class PortalRequestRow:
|
|
||||||
request_code: str
|
|
||||||
username: str
|
|
||||||
status: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class VaultwardenUser:
|
|
||||||
user_id: str
|
|
||||||
email: str
|
|
||||||
status: int
|
|
||||||
|
|
||||||
|
|
||||||
def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str:
|
|
||||||
proc = subprocess.run(
|
|
||||||
cmd,
|
|
||||||
input=input_bytes,
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
check=False,
|
|
||||||
)
|
|
||||||
if proc.returncode != 0:
|
|
||||||
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
|
|
||||||
raise RuntimeError(f"command failed ({proc.returncode}): {' '.join(cmd)}\n{stderr}")
|
|
||||||
return proc.stdout.decode("utf-8", errors="replace")
|
|
||||||
|
|
||||||
|
|
||||||
def _kubectl_get_secret_value(namespace: str, name: str, key: str) -> str:
|
|
||||||
raw_b64 = _run(
|
|
||||||
[
|
|
||||||
"kubectl",
|
|
||||||
"-n",
|
|
||||||
namespace,
|
|
||||||
"get",
|
|
||||||
"secret",
|
|
||||||
name,
|
|
||||||
"-o",
|
|
||||||
f"jsonpath={{.data.{key}}}",
|
|
||||||
]
|
|
||||||
).strip()
|
|
||||||
if not raw_b64:
|
|
||||||
raise RuntimeError(f"secret {namespace}/{name} key {key} is empty")
|
|
||||||
return base64.b64decode(raw_b64).decode("utf-8").strip()
|
|
||||||
|
|
||||||
|
|
||||||
def _kubectl_first_pod(namespace: str) -> str:
|
|
||||||
raw = _run(
|
|
||||||
[
|
|
||||||
"kubectl",
|
|
||||||
"-n",
|
|
||||||
namespace,
|
|
||||||
"get",
|
|
||||||
"pods",
|
|
||||||
"-o",
|
|
||||||
"json",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
data = json.loads(raw)
|
|
||||||
items = data.get("items") or []
|
|
||||||
if not isinstance(items, list) or not items:
|
|
||||||
raise RuntimeError(f"no pods found in namespace {namespace}")
|
|
||||||
pod_name = items[0].get("metadata", {}).get("name")
|
|
||||||
if not isinstance(pod_name, str) or not pod_name:
|
|
||||||
raise RuntimeError(f"unexpected pod list in namespace {namespace}")
|
|
||||||
return pod_name
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_prefixes(prefixes: list[str]) -> list[str]:
|
|
||||||
cleaned: list[str] = []
|
|
||||||
for prefix in prefixes:
|
|
||||||
prefix = prefix.strip()
|
|
||||||
if not prefix:
|
|
||||||
continue
|
|
||||||
if not _SAFE_PREFIX_RE.match(prefix):
|
|
||||||
raise SystemExit(
|
|
||||||
f"invalid prefix '{prefix}': must match {_SAFE_PREFIX_RE.pattern} (alnum plus ._-)"
|
|
||||||
)
|
|
||||||
cleaned.append(prefix)
|
|
||||||
if not cleaned:
|
|
||||||
raise SystemExit("at least one --prefix is required")
|
|
||||||
return cleaned
|
|
||||||
|
|
||||||
|
|
||||||
def _starts_with_any(value: str, prefixes: Iterable[str]) -> bool:
|
|
||||||
return any(value.startswith(p) for p in prefixes)
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_token(server: str, realm: str, client_id: str, client_secret: str) -> str:
|
|
||||||
data = urllib.parse.urlencode(
|
|
||||||
{
|
|
||||||
"grant_type": "client_credentials",
|
|
||||||
"client_id": client_id,
|
|
||||||
"client_secret": client_secret,
|
|
||||||
}
|
|
||||||
).encode("utf-8")
|
|
||||||
req = urllib.request.Request(
|
|
||||||
f"{server}/realms/{realm}/protocol/openid-connect/token",
|
|
||||||
data=data,
|
|
||||||
method="POST",
|
|
||||||
)
|
|
||||||
req.add_header("Content-Type", "application/x-www-form-urlencoded")
|
|
||||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
||||||
payload = json.loads(resp.read().decode("utf-8"))
|
|
||||||
token = payload.get("access_token")
|
|
||||||
if not isinstance(token, str) or not token:
|
|
||||||
raise RuntimeError("failed to obtain keycloak access token")
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_list_users(server: str, realm: str, token: str, search: str) -> list[KeycloakUser]:
|
|
||||||
query = urllib.parse.urlencode({"max": "1000", "search": search})
|
|
||||||
req = urllib.request.Request(f"{server}/admin/realms/{realm}/users?{query}", method="GET")
|
|
||||||
req.add_header("Authorization", f"Bearer {token}")
|
|
||||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
||||||
payload = json.loads(resp.read().decode("utf-8"))
|
|
||||||
if not isinstance(payload, list):
|
|
||||||
raise RuntimeError("unexpected keycloak users response")
|
|
||||||
users: list[KeycloakUser] = []
|
|
||||||
for item in payload:
|
|
||||||
if not isinstance(item, dict):
|
|
||||||
continue
|
|
||||||
user_id = item.get("id")
|
|
||||||
username = item.get("username") or ""
|
|
||||||
email = item.get("email") or ""
|
|
||||||
if not isinstance(user_id, str) or not user_id:
|
|
||||||
continue
|
|
||||||
if not isinstance(username, str):
|
|
||||||
continue
|
|
||||||
users.append(KeycloakUser(user_id=user_id, username=username, email=str(email)))
|
|
||||||
return users
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_delete_user(server: str, realm: str, token: str, user_id: str) -> None:
|
|
||||||
req = urllib.request.Request(f"{server}/admin/realms/{realm}/users/{user_id}", method="DELETE")
|
|
||||||
req.add_header("Authorization", f"Bearer {token}")
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
||||||
_ = resp.read()
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
if exc.code == 404:
|
|
||||||
return
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def _psql_json(portal_db_url: str, sql: str) -> list[dict[str, Any]]:
|
|
||||||
postgres_pod = _kubectl_first_pod("postgres")
|
|
||||||
out = _run(
|
|
||||||
[
|
|
||||||
"kubectl",
|
|
||||||
"-n",
|
|
||||||
"postgres",
|
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
postgres_pod,
|
|
||||||
"--",
|
|
||||||
"psql",
|
|
||||||
portal_db_url,
|
|
||||||
"-At",
|
|
||||||
"-F",
|
|
||||||
"\t",
|
|
||||||
"-c",
|
|
||||||
sql,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
rows: list[dict[str, Any]] = []
|
|
||||||
for line in out.splitlines():
|
|
||||||
parts = line.split("\t")
|
|
||||||
rows.append({"cols": parts})
|
|
||||||
return rows
|
|
||||||
|
|
||||||
|
|
||||||
def _portal_list_requests(portal_db_url: str, prefixes: list[str]) -> list[PortalRequestRow]:
|
|
||||||
clauses = " OR ".join([f"username LIKE '{p}%'" for p in prefixes])
|
|
||||||
sql = (
|
|
||||||
"SELECT request_code, username, status "
|
|
||||||
"FROM access_requests "
|
|
||||||
f"WHERE {clauses} "
|
|
||||||
"ORDER BY created_at DESC;"
|
|
||||||
)
|
|
||||||
raw_rows = _psql_json(portal_db_url, sql)
|
|
||||||
parsed: list[PortalRequestRow] = []
|
|
||||||
for row in raw_rows:
|
|
||||||
cols = row.get("cols") or []
|
|
||||||
if len(cols) < 3:
|
|
||||||
continue
|
|
||||||
parsed.append(PortalRequestRow(request_code=cols[0], username=cols[1], status=cols[2]))
|
|
||||||
return parsed
|
|
||||||
|
|
||||||
|
|
||||||
def _portal_delete_requests(portal_db_url: str, prefixes: list[str]) -> int:
|
|
||||||
clauses = " OR ".join([f"username LIKE '{p}%'" for p in prefixes])
|
|
||||||
sql = f"DELETE FROM access_requests WHERE {clauses};"
|
|
||||||
postgres_pod = _kubectl_first_pod("postgres")
|
|
||||||
out = _run(
|
|
||||||
[
|
|
||||||
"kubectl",
|
|
||||||
"-n",
|
|
||||||
"postgres",
|
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
postgres_pod,
|
|
||||||
"--",
|
|
||||||
"psql",
|
|
||||||
portal_db_url,
|
|
||||||
"-c",
|
|
||||||
sql,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
# psql prints "DELETE <n>"
|
|
||||||
match = re.search(r"DELETE\\s+(\\d+)", out)
|
|
||||||
return int(match.group(1)) if match else 0
|
|
||||||
|
|
||||||
|
|
||||||
def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str:
|
|
||||||
data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8")
|
|
||||||
req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST")
|
|
||||||
req.add_header("Content-Type", "application/x-www-form-urlencoded")
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
||||||
set_cookie = resp.headers.get("Set-Cookie") or ""
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
if exc.code == 429:
|
|
||||||
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
|
|
||||||
raise
|
|
||||||
cookie = set_cookie.split(";", 1)[0].strip()
|
|
||||||
if not cookie:
|
|
||||||
raise RuntimeError("vaultwarden admin cookie missing")
|
|
||||||
return cookie
|
|
||||||
|
|
||||||
|
|
||||||
def _vaultwarden_list_users(base_url: str, cookie: str) -> list[VaultwardenUser]:
|
|
||||||
req = urllib.request.Request(f"{base_url}/admin/users", method="GET")
|
|
||||||
req.add_header("Cookie", cookie)
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
||||||
payload = json.loads(resp.read().decode("utf-8"))
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
if exc.code == 429:
|
|
||||||
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
|
|
||||||
raise
|
|
||||||
if not isinstance(payload, list):
|
|
||||||
raise RuntimeError("unexpected vaultwarden /admin/users response")
|
|
||||||
users: list[VaultwardenUser] = []
|
|
||||||
for item in payload:
|
|
||||||
if not isinstance(item, dict):
|
|
||||||
continue
|
|
||||||
user_id = item.get("id")
|
|
||||||
email = item.get("email")
|
|
||||||
status = item.get("_status")
|
|
||||||
if not isinstance(user_id, str) or not user_id:
|
|
||||||
continue
|
|
||||||
if not isinstance(email, str) or not email:
|
|
||||||
continue
|
|
||||||
if not isinstance(status, int):
|
|
||||||
status = -1
|
|
||||||
users.append(VaultwardenUser(user_id=user_id, email=email, status=status))
|
|
||||||
return users
|
|
||||||
|
|
||||||
|
|
||||||
def _vaultwarden_delete_user(base_url: str, cookie: str, user_id: str) -> None:
|
|
||||||
req = urllib.request.Request(f"{base_url}/admin/users/{user_id}", method="DELETE")
|
|
||||||
req.add_header("Cookie", cookie)
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
||||||
_ = resp.read()
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
if exc.code in {404}:
|
|
||||||
return
|
|
||||||
if exc.code == 429:
|
|
||||||
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def _port_forward(namespace: str, target: str, local_port: int, remote_port: int) -> subprocess.Popen[bytes]:
|
|
||||||
# Keep stdout/stderr muted to avoid leaking internal details in output.
|
|
||||||
return subprocess.Popen(
|
|
||||||
[
|
|
||||||
"kubectl",
|
|
||||||
"-n",
|
|
||||||
namespace,
|
|
||||||
"port-forward",
|
|
||||||
target,
|
|
||||||
f"{local_port}:{remote_port}",
|
|
||||||
"--address",
|
|
||||||
"127.0.0.1",
|
|
||||||
],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.DEVNULL,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
parser = argparse.ArgumentParser(description=__doc__)
|
|
||||||
parser.add_argument(
|
|
||||||
"--prefix",
|
|
||||||
action="append",
|
|
||||||
default=[],
|
|
||||||
help="Username prefix to match (repeatable). Example: --prefix test-",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--apply",
|
|
||||||
action="store_true",
|
|
||||||
help="Actually delete; otherwise dry-run only.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--confirm",
|
|
||||||
default="",
|
|
||||||
help=(
|
|
||||||
"Required when using --apply. Must exactly equal the comma-separated "
|
|
||||||
"sorted prefix list (e.g. 'atlas-,bob-,e2e-,test-')."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.")
|
|
||||||
parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.")
|
|
||||||
parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--protect-keycloak-username",
|
|
||||||
action="append",
|
|
||||||
default=[],
|
|
||||||
help="Keycloak usernames that must never be deleted (repeatable).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--protect-vaultwarden-email",
|
|
||||||
action="append",
|
|
||||||
default=[],
|
|
||||||
help="Vaultwarden emails that must never be deleted (repeatable).",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
prefixes = sorted(set(_validate_prefixes(args.prefix)))
|
|
||||||
apply = bool(args.apply)
|
|
||||||
expected_confirm = ",".join(prefixes)
|
|
||||||
protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]}
|
|
||||||
protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()}
|
|
||||||
|
|
||||||
if apply and args.confirm != expected_confirm:
|
|
||||||
raise SystemExit(
|
|
||||||
f"refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("Atlas test-user cleanup")
|
|
||||||
print("prefixes:", expected_confirm)
|
|
||||||
print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)")
|
|
||||||
if protected_keycloak:
|
|
||||||
print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak)))
|
|
||||||
if protected_vaultwarden:
|
|
||||||
print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden)))
|
|
||||||
print()
|
|
||||||
|
|
||||||
if not args.skip_portal_db:
|
|
||||||
portal_db_url = _kubectl_get_secret_value("bstein-dev-home", "atlas-portal-db", "PORTAL_DATABASE_URL")
|
|
||||||
requests = _portal_list_requests(portal_db_url, prefixes)
|
|
||||||
print(f"Portal DB: {len(requests)} access_requests matched")
|
|
||||||
for row in requests[:50]:
|
|
||||||
print(f" {row.request_code}\t{row.status}\t{row.username}")
|
|
||||||
if len(requests) > 50:
|
|
||||||
print(f" ... and {len(requests) - 50} more")
|
|
||||||
if apply and requests:
|
|
||||||
deleted = _portal_delete_requests(portal_db_url, prefixes)
|
|
||||||
print(f"Portal DB: deleted {deleted} access_requests (cascade removes tasks/steps/artifacts).")
|
|
||||||
print()
|
|
||||||
|
|
||||||
if not args.skip_keycloak:
|
|
||||||
kc_server = os.getenv("KEYCLOAK_PUBLIC_URL", "https://sso.bstein.dev").rstrip("/")
|
|
||||||
kc_realm = os.getenv("KEYCLOAK_REALM", "atlas")
|
|
||||||
kc_client_id = os.getenv("KEYCLOAK_ADMIN_CLIENT_ID", "bstein-dev-home-admin")
|
|
||||||
kc_client_secret = _kubectl_get_secret_value(
|
|
||||||
"bstein-dev-home", "bstein-dev-home-keycloak-admin", "client_secret"
|
|
||||||
)
|
|
||||||
token = _keycloak_token(kc_server, kc_realm, kc_client_id, kc_client_secret)
|
|
||||||
found: dict[str, KeycloakUser] = {}
|
|
||||||
for prefix in prefixes:
|
|
||||||
for user in _keycloak_list_users(kc_server, kc_realm, token, prefix):
|
|
||||||
if not _starts_with_any(user.username, prefixes):
|
|
||||||
continue
|
|
||||||
if user.username in protected_keycloak:
|
|
||||||
continue
|
|
||||||
found[user.user_id] = user
|
|
||||||
users = list(found.values())
|
|
||||||
users.sort(key=lambda u: u.username)
|
|
||||||
print(f"Keycloak: {len(users)} users matched")
|
|
||||||
for user in users[:50]:
|
|
||||||
email = user.email or "-"
|
|
||||||
print(f" {user.username}\t{email}\t{user.user_id}")
|
|
||||||
if len(users) > 50:
|
|
||||||
print(f" ... and {len(users) - 50} more")
|
|
||||||
if apply and users:
|
|
||||||
for user in users:
|
|
||||||
_keycloak_delete_user(kc_server, kc_realm, token, user.user_id)
|
|
||||||
print(f"Keycloak: deleted {len(users)} users.")
|
|
||||||
print()
|
|
||||||
|
|
||||||
if not args.skip_vaultwarden:
|
|
||||||
pf = _port_forward("vaultwarden", "svc/vaultwarden-service", 18081, 80)
|
|
||||||
try:
|
|
||||||
# wait briefly for the port-forward to come up
|
|
||||||
for _ in range(30):
|
|
||||||
try:
|
|
||||||
urllib.request.urlopen("http://127.0.0.1:18081/", timeout=1).read(1)
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
time.sleep(0.2)
|
|
||||||
|
|
||||||
admin_token = _kubectl_get_secret_value("vaultwarden", "vaultwarden-admin", "ADMIN_TOKEN")
|
|
||||||
base_url = "http://127.0.0.1:18081"
|
|
||||||
try:
|
|
||||||
cookie = ""
|
|
||||||
for attempt in range(7):
|
|
||||||
try:
|
|
||||||
cookie = _vaultwarden_admin_cookie(admin_token, base_url)
|
|
||||||
break
|
|
||||||
except RuntimeError as exc:
|
|
||||||
if "rate limited" in str(exc).lower():
|
|
||||||
time.sleep(min(60.0, 2.0**attempt))
|
|
||||||
continue
|
|
||||||
raise
|
|
||||||
if not cookie:
|
|
||||||
raise RuntimeError("vaultwarden admin login repeatedly rate limited")
|
|
||||||
|
|
||||||
users: list[VaultwardenUser] = []
|
|
||||||
for attempt in range(7):
|
|
||||||
try:
|
|
||||||
users = _vaultwarden_list_users(base_url, cookie)
|
|
||||||
break
|
|
||||||
except RuntimeError as exc:
|
|
||||||
if "rate limited" in str(exc).lower():
|
|
||||||
time.sleep(min(60.0, 2.0**attempt))
|
|
||||||
continue
|
|
||||||
raise
|
|
||||||
if not users:
|
|
||||||
raise RuntimeError("vaultwarden user list unavailable (possibly rate limited)")
|
|
||||||
except RuntimeError as exc:
|
|
||||||
print(f"Vaultwarden: ERROR: {exc}")
|
|
||||||
print()
|
|
||||||
return 1
|
|
||||||
matched: list[VaultwardenUser] = []
|
|
||||||
for user in users:
|
|
||||||
local = user.email.split("@", 1)[0]
|
|
||||||
if _starts_with_any(local, prefixes):
|
|
||||||
if user.email in protected_vaultwarden:
|
|
||||||
continue
|
|
||||||
matched.append(user)
|
|
||||||
matched.sort(key=lambda u: u.email)
|
|
||||||
print(f"Vaultwarden: {len(matched)} users matched")
|
|
||||||
for user in matched[:50]:
|
|
||||||
print(f" {user.email}\tstatus={user.status}\t{user.user_id}")
|
|
||||||
if len(matched) > 50:
|
|
||||||
print(f" ... and {len(matched) - 50} more")
|
|
||||||
if apply and matched:
|
|
||||||
for user in matched:
|
|
||||||
_vaultwarden_delete_user(base_url, cookie, user.user_id)
|
|
||||||
print(f"Vaultwarden: deleted {len(matched)} users.")
|
|
||||||
print()
|
|
||||||
finally:
|
|
||||||
pf.terminate()
|
|
||||||
try:
|
|
||||||
pf.wait(timeout=3)
|
|
||||||
except Exception:
|
|
||||||
pf.kill()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main())
|
|
||||||
@ -1,276 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import sys
|
|
||||||
from collections import defaultdict
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Any, Iterable
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from atlas_portal import db, settings
|
|
||||||
from atlas_portal.keycloak import admin_client
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class KeycloakUser:
|
|
||||||
id: str
|
|
||||||
username: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class PortalRequest:
|
|
||||||
request_code: str
|
|
||||||
username: str
|
|
||||||
status: str
|
|
||||||
|
|
||||||
|
|
||||||
def _dedupe_by_id(users: Iterable[KeycloakUser]) -> list[KeycloakUser]:
|
|
||||||
seen: set[str] = set()
|
|
||||||
out: list[KeycloakUser] = []
|
|
||||||
for user in users:
|
|
||||||
if user.id in seen:
|
|
||||||
continue
|
|
||||||
seen.add(user.id)
|
|
||||||
out.append(user)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def _iter_keycloak_users_for_prefix(prefix: str, max_results: int) -> list[KeycloakUser]:
|
|
||||||
client = admin_client()
|
|
||||||
if not client.ready():
|
|
||||||
raise RuntimeError("keycloak admin client not configured in this environment")
|
|
||||||
|
|
||||||
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
|
||||||
# Keycloak can return false positives for search; we do a strict prefix match client-side.
|
|
||||||
params = {"search": prefix, "max": str(max_results), "briefRepresentation": "true"}
|
|
||||||
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
|
||||||
resp = http.get(url, params=params, headers=client.headers())
|
|
||||||
resp.raise_for_status()
|
|
||||||
payload = resp.json()
|
|
||||||
|
|
||||||
if not isinstance(payload, list):
|
|
||||||
return []
|
|
||||||
|
|
||||||
found: list[KeycloakUser] = []
|
|
||||||
for item in payload:
|
|
||||||
if not isinstance(item, dict):
|
|
||||||
continue
|
|
||||||
username = item.get("username")
|
|
||||||
user_id = item.get("id")
|
|
||||||
if not isinstance(username, str) or not isinstance(user_id, str):
|
|
||||||
continue
|
|
||||||
if not username.startswith(prefix):
|
|
||||||
continue
|
|
||||||
if username.startswith("service-account-"):
|
|
||||||
continue
|
|
||||||
found.append(KeycloakUser(id=user_id, username=username))
|
|
||||||
return found
|
|
||||||
|
|
||||||
|
|
||||||
def _find_keycloak_users(prefixes: list[str], max_results: int, protected: set[str]) -> list[KeycloakUser]:
|
|
||||||
matches: list[KeycloakUser] = []
|
|
||||||
for prefix in prefixes:
|
|
||||||
matches.extend(_iter_keycloak_users_for_prefix(prefix, max_results=max_results))
|
|
||||||
|
|
||||||
deduped = _dedupe_by_id(matches)
|
|
||||||
return [user for user in deduped if user.username not in protected]
|
|
||||||
|
|
||||||
|
|
||||||
def _delete_keycloak_users(users: list[KeycloakUser]) -> None:
|
|
||||||
if not users:
|
|
||||||
return
|
|
||||||
|
|
||||||
client = admin_client()
|
|
||||||
if not client.ready():
|
|
||||||
raise RuntimeError("keycloak admin client not configured in this environment")
|
|
||||||
|
|
||||||
base = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
|
||||||
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
|
||||||
for user in users:
|
|
||||||
url = f"{base}/{quote(user.id, safe='')}"
|
|
||||||
resp = http.delete(url, headers=client.headers())
|
|
||||||
# Deleting a non-existent user is treated as success for idempotency.
|
|
||||||
if resp.status_code == 404:
|
|
||||||
continue
|
|
||||||
resp.raise_for_status()
|
|
||||||
|
|
||||||
|
|
||||||
def _find_portal_requests(prefixes: list[str], max_results: int) -> list[PortalRequest]:
|
|
||||||
if not db.configured():
|
|
||||||
return []
|
|
||||||
|
|
||||||
like_prefixes = [f"{prefix}%" for prefix in prefixes]
|
|
||||||
rows: list[dict[str, Any]] = []
|
|
||||||
with db.connect() as conn:
|
|
||||||
for like in like_prefixes:
|
|
||||||
cursor = conn.execute(
|
|
||||||
"""
|
|
||||||
SELECT request_code, username, status
|
|
||||||
FROM access_requests
|
|
||||||
WHERE username LIKE %s
|
|
||||||
ORDER BY created_at DESC
|
|
||||||
LIMIT %s
|
|
||||||
""",
|
|
||||||
(like, max_results),
|
|
||||||
)
|
|
||||||
batch = cursor.fetchall()
|
|
||||||
if isinstance(batch, list):
|
|
||||||
rows.extend([r for r in batch if isinstance(r, dict)])
|
|
||||||
|
|
||||||
out: list[PortalRequest] = []
|
|
||||||
for row in rows:
|
|
||||||
request_code = row.get("request_code")
|
|
||||||
username = row.get("username")
|
|
||||||
status = row.get("status")
|
|
||||||
if not isinstance(request_code, str) or not isinstance(username, str) or not isinstance(status, str):
|
|
||||||
continue
|
|
||||||
out.append(PortalRequest(request_code=request_code, username=username, status=status))
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def _delete_portal_requests(prefixes: list[str]) -> int:
|
|
||||||
if not db.configured():
|
|
||||||
return 0
|
|
||||||
|
|
||||||
like_prefixes = [f"{prefix}%" for prefix in prefixes]
|
|
||||||
deleted = 0
|
|
||||||
with db.connect() as conn:
|
|
||||||
for like in like_prefixes:
|
|
||||||
cursor = conn.execute("DELETE FROM access_requests WHERE username LIKE %s", (like,))
|
|
||||||
deleted += cursor.rowcount or 0
|
|
||||||
return deleted
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_portal_requests(rows: list[PortalRequest]) -> dict[str, int]:
|
|
||||||
counts: dict[str, int] = defaultdict(int)
|
|
||||||
for row in rows:
|
|
||||||
counts[row.status] += 1
|
|
||||||
return dict(counts)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
prog="test_user_cleanup",
|
|
||||||
description=(
|
|
||||||
"Manual-only cleanup for test users/requests. "
|
|
||||||
"This script is intended to be run inside the bstein-dev-home backend container."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--prefix",
|
|
||||||
action="append",
|
|
||||||
required=True,
|
|
||||||
help="Username prefix to target (repeatable). Example: --prefix test-",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--max",
|
|
||||||
type=int,
|
|
||||||
default=500,
|
|
||||||
help="Maximum users/requests to enumerate per prefix (default: 500).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--apply",
|
|
||||||
action="store_true",
|
|
||||||
help="Apply deletions (default is dry-run). Requires --confirm.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--confirm",
|
|
||||||
default="",
|
|
||||||
help="Required when using --apply. Must exactly equal the comma-separated prefix list.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--skip-keycloak",
|
|
||||||
action="store_true",
|
|
||||||
help="Skip deleting Keycloak users.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--skip-portal",
|
|
||||||
action="store_true",
|
|
||||||
help="Skip deleting portal (DB) access requests.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--protect",
|
|
||||||
action="append",
|
|
||||||
default=[],
|
|
||||||
help="Extra usernames to never delete (repeatable).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose",
|
|
||||||
action="store_true",
|
|
||||||
help="List matched usernames/request codes.",
|
|
||||||
)
|
|
||||||
return parser.parse_args(argv)
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str]) -> int:
|
|
||||||
args = _parse_args(argv)
|
|
||||||
prefixes = sorted({p.strip() for p in args.prefix if p.strip()})
|
|
||||||
if not prefixes:
|
|
||||||
print("error: no valid --prefix values provided", file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
expected_confirm = ",".join(prefixes)
|
|
||||||
protected = {"bstein", "robotuser", *[p.strip() for p in args.protect if p.strip()]}
|
|
||||||
|
|
||||||
if args.apply and args.confirm != expected_confirm:
|
|
||||||
print(
|
|
||||||
f"error: refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
keycloak_users: list[KeycloakUser] = []
|
|
||||||
portal_requests: list[PortalRequest] = []
|
|
||||||
|
|
||||||
if not args.skip_keycloak:
|
|
||||||
keycloak_users = _find_keycloak_users(prefixes, max_results=args.max, protected=protected)
|
|
||||||
|
|
||||||
if not args.skip_portal:
|
|
||||||
portal_requests = _find_portal_requests(prefixes, max_results=args.max)
|
|
||||||
|
|
||||||
print(f"prefixes: {expected_confirm}")
|
|
||||||
print(f"mode: {'APPLY' if args.apply else 'DRY-RUN'}")
|
|
||||||
if protected:
|
|
||||||
print(f"protected usernames: {', '.join(sorted(protected))}")
|
|
||||||
|
|
||||||
if not args.skip_keycloak:
|
|
||||||
print(f"keycloak users matched: {len(keycloak_users)}")
|
|
||||||
if args.verbose and keycloak_users:
|
|
||||||
for user in sorted(keycloak_users, key=lambda u: u.username):
|
|
||||||
print(f" - {user.username}")
|
|
||||||
|
|
||||||
if not args.skip_portal:
|
|
||||||
print(f"portal requests matched: {len(portal_requests)}")
|
|
||||||
if portal_requests:
|
|
||||||
summary = _summarize_portal_requests(portal_requests)
|
|
||||||
summary_str = ", ".join(f"{k}={v}" for k, v in sorted(summary.items()))
|
|
||||||
print(f" statuses: {summary_str}")
|
|
||||||
if args.verbose and portal_requests:
|
|
||||||
for req in portal_requests[: min(50, len(portal_requests))]:
|
|
||||||
print(f" - {req.request_code} ({req.status})")
|
|
||||||
if len(portal_requests) > 50:
|
|
||||||
print(f" ... and {len(portal_requests) - 50} more")
|
|
||||||
|
|
||||||
if not args.apply:
|
|
||||||
print("dry-run complete (no changes made)")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if not args.skip_portal:
|
|
||||||
deleted = _delete_portal_requests(prefixes)
|
|
||||||
print(f"deleted portal requests: {deleted}")
|
|
||||||
|
|
||||||
if not args.skip_keycloak:
|
|
||||||
_delete_keycloak_users(keycloak_users)
|
|
||||||
print(f"deleted keycloak users: {len(keycloak_users)}")
|
|
||||||
|
|
||||||
print("done")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Manual-only helper to run `scripts/test_user_cleanup.py` inside the portal backend container.
|
|
||||||
#
|
|
||||||
# Usage (dry-run):
|
|
||||||
# scripts/test_user_cleanup.sh --prefix test-
|
|
||||||
#
|
|
||||||
# Usage (apply):
|
|
||||||
# scripts/test_user_cleanup.sh --prefix test- --apply --confirm test-
|
|
||||||
|
|
||||||
NS="${PORTAL_NAMESPACE:-bstein-dev-home}"
|
|
||||||
TARGET="${PORTAL_BACKEND_EXEC_TARGET:-deploy/bstein-dev-home-backend}"
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
|
||||||
|
|
||||||
cat "${SCRIPT_DIR}/test_user_cleanup.py" | kubectl -n "${NS}" exec -i "${TARGET}" -- python - "$@"
|
|
||||||
|
|
||||||
@ -1,318 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Clean up Vaultwarden test users and invites (manual-only).
|
|
||||||
|
|
||||||
This script deletes Vaultwarden rows directly from the Postgres database. It is
|
|
||||||
intended only for removing test fallout (e.g. e2e-*, test-*) and is deliberately
|
|
||||||
conservative:
|
|
||||||
|
|
||||||
- Requires one or more explicit email prefixes (repeatable).
|
|
||||||
- Dry-run by default; --apply requires an exact --confirm guard.
|
|
||||||
- Refuses to delete any user with dependent data in Vaultwarden tables.
|
|
||||||
- Supports a protected email allowlist to prevent catastrophic mistakes.
|
|
||||||
|
|
||||||
Example (dry-run):
|
|
||||||
scripts/test_vaultwarden_user_cleanup.py --prefix e2e-
|
|
||||||
|
|
||||||
Example (apply):
|
|
||||||
scripts/test_vaultwarden_user_cleanup.py --prefix e2e- --apply --confirm e2e-
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Iterable, Sequence
|
|
||||||
|
|
||||||
|
|
||||||
_SAFE_PREFIX_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$")
|
|
||||||
_UUID_RE = re.compile(r"^[0-9a-fA-F-]{32,36}$")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class VaultwardenUser:
|
|
||||||
uuid: str
|
|
||||||
email: str
|
|
||||||
dependent_rows: int
|
|
||||||
|
|
||||||
|
|
||||||
def _run(cmd: Sequence[str], *, input_bytes: bytes | None = None) -> str:
|
|
||||||
proc = subprocess.run(
|
|
||||||
list(cmd),
|
|
||||||
input=input_bytes,
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
check=False,
|
|
||||||
)
|
|
||||||
if proc.returncode != 0:
|
|
||||||
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
|
|
||||||
raise RuntimeError(f"command failed ({proc.returncode}): {' '.join(cmd)}\n{stderr}")
|
|
||||||
return proc.stdout.decode("utf-8", errors="replace")
|
|
||||||
|
|
||||||
|
|
||||||
def _kubectl_first_pod(namespace: str) -> str:
|
|
||||||
raw = _run(["kubectl", "-n", namespace, "get", "pods", "-o", "json"])
|
|
||||||
data = json.loads(raw)
|
|
||||||
items = data.get("items") or []
|
|
||||||
if not isinstance(items, list) or not items:
|
|
||||||
raise RuntimeError(f"no pods found in namespace {namespace}")
|
|
||||||
name = items[0].get("metadata", {}).get("name")
|
|
||||||
if not isinstance(name, str) or not name:
|
|
||||||
raise RuntimeError(f"unexpected pod list in namespace {namespace}")
|
|
||||||
return name
|
|
||||||
|
|
||||||
|
|
||||||
def _psql(sql: str) -> str:
|
|
||||||
pod = _kubectl_first_pod("postgres")
|
|
||||||
return _run(
|
|
||||||
[
|
|
||||||
"kubectl",
|
|
||||||
"-n",
|
|
||||||
"postgres",
|
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
pod,
|
|
||||||
"--",
|
|
||||||
"psql",
|
|
||||||
"-U",
|
|
||||||
"postgres",
|
|
||||||
"-d",
|
|
||||||
"vaultwarden",
|
|
||||||
"-At",
|
|
||||||
"-F",
|
|
||||||
"\t",
|
|
||||||
"-c",
|
|
||||||
sql,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_prefixes(prefixes: Iterable[str]) -> list[str]:
|
|
||||||
cleaned: list[str] = []
|
|
||||||
for prefix in prefixes:
|
|
||||||
prefix = prefix.strip()
|
|
||||||
if not prefix:
|
|
||||||
continue
|
|
||||||
if not _SAFE_PREFIX_RE.match(prefix):
|
|
||||||
raise SystemExit(
|
|
||||||
f"invalid prefix '{prefix}': must match {_SAFE_PREFIX_RE.pattern} (alnum plus ._-)"
|
|
||||||
)
|
|
||||||
if not prefix.endswith("-"):
|
|
||||||
raise SystemExit(f"refusing prefix '{prefix}': must end with '-' for safety")
|
|
||||||
cleaned.append(prefix)
|
|
||||||
if not cleaned:
|
|
||||||
raise SystemExit("at least one --prefix is required")
|
|
||||||
return sorted(set(cleaned))
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_rows(tsv: str) -> list[list[str]]:
|
|
||||||
rows: list[list[str]] = []
|
|
||||||
for line in tsv.splitlines():
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
rows.append(line.split("\t"))
|
|
||||||
return rows
|
|
||||||
|
|
||||||
|
|
||||||
def _sql_or_email_prefixes(prefixes: list[str]) -> str:
|
|
||||||
# prefixes validated to safe charset; safe to interpolate.
|
|
||||||
clauses = [f"email LIKE '{p}%'" for p in prefixes]
|
|
||||||
return " OR ".join(clauses) if clauses else "FALSE"
|
|
||||||
|
|
||||||
|
|
||||||
def _sql_quote(value: str) -> str:
|
|
||||||
return "'" + value.replace("'", "''") + "'"
|
|
||||||
|
|
||||||
|
|
||||||
def _sql_text_array(values: Iterable[str]) -> str:
|
|
||||||
items = ",".join(_sql_quote(v) for v in values)
|
|
||||||
return f"ARRAY[{items}]::text[]"
|
|
||||||
|
|
||||||
|
|
||||||
def _list_users(prefixes: list[str], protected: set[str]) -> list[VaultwardenUser]:
|
|
||||||
clause = _sql_or_email_prefixes(prefixes)
|
|
||||||
sql = f"""
|
|
||||||
WITH candidates AS (
|
|
||||||
SELECT uuid, email
|
|
||||||
FROM users
|
|
||||||
WHERE enabled
|
|
||||||
AND ({clause})
|
|
||||||
AND email <> ALL({_sql_text_array(sorted(protected))})
|
|
||||||
)
|
|
||||||
SELECT
|
|
||||||
candidates.uuid,
|
|
||||||
candidates.email,
|
|
||||||
(
|
|
||||||
(SELECT COUNT(*) FROM auth_requests WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM ciphers WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM devices WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM emergency_access WHERE grantor_uuid = candidates.uuid OR grantee_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM favorites WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM folders WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM sends WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM twofactor WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM twofactor_incomplete WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM users_collections WHERE user_uuid = candidates.uuid) +
|
|
||||||
(SELECT COUNT(*) FROM users_organizations WHERE user_uuid = candidates.uuid)
|
|
||||||
) AS dependent_rows
|
|
||||||
FROM candidates
|
|
||||||
ORDER BY candidates.email;
|
|
||||||
"""
|
|
||||||
out = _psql(sql)
|
|
||||||
users: list[VaultwardenUser] = []
|
|
||||||
for row in _parse_rows(out):
|
|
||||||
if len(row) < 3:
|
|
||||||
continue
|
|
||||||
uuid, email, dep_raw = row[0].strip(), row[1].strip(), row[2].strip()
|
|
||||||
if not uuid or not email:
|
|
||||||
continue
|
|
||||||
if not _UUID_RE.match(uuid):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
dep = int(dep_raw)
|
|
||||||
except ValueError:
|
|
||||||
dep = 0
|
|
||||||
users.append(VaultwardenUser(uuid=uuid, email=email, dependent_rows=dep))
|
|
||||||
return users
|
|
||||||
|
|
||||||
|
|
||||||
def _list_invitations(prefixes: list[str], protected: set[str]) -> list[str]:
|
|
||||||
clause = _sql_or_email_prefixes(prefixes)
|
|
||||||
protected_clause = ""
|
|
||||||
if protected:
|
|
||||||
protected_clause = f"AND email <> ALL({_sql_text_array(sorted(protected))})"
|
|
||||||
sql = f"SELECT email FROM invitations WHERE ({clause}) {protected_clause} ORDER BY email;"
|
|
||||||
out = _psql(sql)
|
|
||||||
invites: list[str] = []
|
|
||||||
for row in _parse_rows(out):
|
|
||||||
if not row:
|
|
||||||
continue
|
|
||||||
email = row[0].strip()
|
|
||||||
if email:
|
|
||||||
invites.append(email)
|
|
||||||
return invites
|
|
||||||
|
|
||||||
|
|
||||||
def _delete_invitations(emails: list[str]) -> int:
|
|
||||||
if not emails:
|
|
||||||
return 0
|
|
||||||
email_list = ",".join(_sql_quote(e) for e in emails)
|
|
||||||
sql = f"DELETE FROM invitations WHERE email IN ({email_list});"
|
|
||||||
out = _psql(sql)
|
|
||||||
match = re.search(r"DELETE\s+(\d+)", out)
|
|
||||||
return int(match.group(1)) if match else 0
|
|
||||||
|
|
||||||
|
|
||||||
def _delete_users(uuids: list[str]) -> int:
|
|
||||||
if not uuids:
|
|
||||||
return 0
|
|
||||||
uuid_list = ",".join(_sql_quote(u) for u in uuids)
|
|
||||||
sql = f"DELETE FROM users WHERE uuid IN ({uuid_list});"
|
|
||||||
out = _psql(sql)
|
|
||||||
match = re.search(r"DELETE\s+(\d+)", out)
|
|
||||||
return int(match.group(1)) if match else 0
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
prog="test_vaultwarden_user_cleanup",
|
|
||||||
description="Manual-only cleanup for Vaultwarden test users/invites (DB-level).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--prefix",
|
|
||||||
action="append",
|
|
||||||
required=True,
|
|
||||||
help="Email prefix to target (repeatable). Example: --prefix e2e-",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--apply",
|
|
||||||
action="store_true",
|
|
||||||
help="Apply deletions (default is dry-run). Requires --confirm.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--confirm",
|
|
||||||
default="",
|
|
||||||
help="Required when using --apply. Must exactly equal the comma-separated prefix list.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--protect-email",
|
|
||||||
action="append",
|
|
||||||
default=[],
|
|
||||||
help="Vaultwarden emails that must never be deleted (repeatable).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose",
|
|
||||||
action="store_true",
|
|
||||||
help="List matched emails (and invitation emails).",
|
|
||||||
)
|
|
||||||
return parser.parse_args(argv)
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str]) -> int:
|
|
||||||
args = _parse_args(argv)
|
|
||||||
prefixes = _validate_prefixes(args.prefix)
|
|
||||||
expected_confirm = ",".join(prefixes)
|
|
||||||
|
|
||||||
protected = {e.strip() for e in args.protect_email if e.strip()}
|
|
||||||
protected |= {
|
|
||||||
"brad@bstein.dev",
|
|
||||||
"edstein87@outlook.com",
|
|
||||||
"indifox8@gmail.com",
|
|
||||||
"mgs.stein@gmail.com",
|
|
||||||
"patriot87@gmail.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.apply and args.confirm != expected_confirm:
|
|
||||||
print(
|
|
||||||
f"error: refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
users = _list_users(prefixes, protected=protected)
|
|
||||||
invites = _list_invitations(prefixes, protected=protected)
|
|
||||||
|
|
||||||
print(f"prefixes: {expected_confirm}")
|
|
||||||
print(f"mode: {'APPLY' if args.apply else 'DRY-RUN'}")
|
|
||||||
if protected:
|
|
||||||
print(f"protected emails: {', '.join(sorted(protected))}")
|
|
||||||
print(f"vaultwarden users matched: {len(users)}")
|
|
||||||
print(f"vaultwarden invitations matched: {len(invites)}")
|
|
||||||
|
|
||||||
if args.verbose:
|
|
||||||
for user in users[: min(100, len(users))]:
|
|
||||||
print(f" user: {user.email} (deps={user.dependent_rows})")
|
|
||||||
if len(users) > 100:
|
|
||||||
print(f" ... and {len(users) - 100} more users")
|
|
||||||
for email in invites[: min(100, len(invites))]:
|
|
||||||
print(f" invite: {email}")
|
|
||||||
if len(invites) > 100:
|
|
||||||
print(f" ... and {len(invites) - 100} more invitations")
|
|
||||||
|
|
||||||
unsafe = [u for u in users if u.dependent_rows > 0]
|
|
||||||
if unsafe:
|
|
||||||
print("refusing to delete users with dependent data:", file=sys.stderr)
|
|
||||||
for user in unsafe[: min(50, len(unsafe))]:
|
|
||||||
print(f" - {user.email} deps={user.dependent_rows}", file=sys.stderr)
|
|
||||||
if len(unsafe) > 50:
|
|
||||||
print(f" ... and {len(unsafe) - 50} more", file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
if not args.apply:
|
|
||||||
print("dry-run complete (no changes made)")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
deleted_invites = _delete_invitations(invites)
|
|
||||||
deleted_users = _delete_users([u.uuid for u in users])
|
|
||||||
print(f"deleted vaultwarden invitations: {deleted_invites}")
|
|
||||||
print(f"deleted vaultwarden users: {deleted_users}")
|
|
||||||
print("done")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Manual-only helper to clean Vaultwarden test users and invites from Postgres.
|
|
||||||
#
|
|
||||||
# Usage (dry-run):
|
|
||||||
# scripts/test_vaultwarden_user_cleanup.sh --prefix e2e-
|
|
||||||
#
|
|
||||||
# Usage (apply):
|
|
||||||
# scripts/test_vaultwarden_user_cleanup.sh --prefix e2e- --apply --confirm e2e-
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
|
||||||
|
|
||||||
python3 "${SCRIPT_DIR}/test_vaultwarden_user_cleanup.py" "$@"
|
|
||||||
|
|
||||||
@ -20,13 +20,7 @@ def load_sync_module(monkeypatch):
|
|||||||
}
|
}
|
||||||
for k, v in env.items():
|
for k, v in env.items():
|
||||||
monkeypatch.setenv(k, v)
|
monkeypatch.setenv(k, v)
|
||||||
module_path = (
|
module_path = pathlib.Path(__file__).resolve().parents[1] / "mailu_sync.py"
|
||||||
pathlib.Path(__file__).resolve().parents[2]
|
|
||||||
/ "services"
|
|
||||||
/ "mailu"
|
|
||||||
/ "scripts"
|
|
||||||
/ "mailu_sync.py"
|
|
||||||
)
|
|
||||||
spec = importlib.util.spec_from_file_location("mailu_sync_testmod", module_path)
|
spec = importlib.util.spec_from_file_location("mailu_sync_testmod", module_path)
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec)
|
||||||
assert spec.loader is not None
|
assert spec.loader is not None
|
||||||
@ -108,8 +102,7 @@ def test_kc_get_users_paginates(monkeypatch):
|
|||||||
sync.SESSION = _PagedSession()
|
sync.SESSION = _PagedSession()
|
||||||
users = sync.kc_get_users("tok")
|
users = sync.kc_get_users("tok")
|
||||||
assert [u["id"] for u in users] == ["u1", "u2"]
|
assert [u["id"] for u in users] == ["u1", "u2"]
|
||||||
# Pagination stops when results < page size.
|
assert sync.SESSION.calls == 2
|
||||||
assert sync.SESSION.calls == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
||||||
@ -126,7 +119,6 @@ def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
|||||||
|
|
||||||
def test_ensure_mailu_user_upserts(monkeypatch):
|
def test_ensure_mailu_user_upserts(monkeypatch):
|
||||||
sync = load_sync_module(monkeypatch)
|
sync = load_sync_module(monkeypatch)
|
||||||
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
|
|
||||||
captured = {}
|
captured = {}
|
||||||
|
|
||||||
class _Cursor:
|
class _Cursor:
|
||||||
@ -142,7 +134,6 @@ def test_ensure_mailu_user_upserts(monkeypatch):
|
|||||||
|
|
||||||
def test_main_generates_password_and_upserts(monkeypatch):
|
def test_main_generates_password_and_upserts(monkeypatch):
|
||||||
sync = load_sync_module(monkeypatch)
|
sync = load_sync_module(monkeypatch)
|
||||||
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
|
|
||||||
users = [
|
users = [
|
||||||
{"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}},
|
{"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}},
|
||||||
{"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}},
|
{"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}},
|
||||||
@ -185,6 +176,6 @@ def test_main_generates_password_and_upserts(monkeypatch):
|
|||||||
|
|
||||||
sync.main()
|
sync.main()
|
||||||
|
|
||||||
# Always backfill mailu_email, even if Keycloak recovery email is external.
|
# Should attempt two inserts (third user skipped due to domain mismatch)
|
||||||
assert len(updated) == 3
|
assert len(updated) == 1 # only one missing attr was backfilled
|
||||||
assert conns and len(conns[0]._cursor.executions) == 3
|
assert conns and len(conns[0]._cursor.executions) == 2
|
||||||
|
|||||||
@ -1,105 +0,0 @@
|
|||||||
# services/ai-llm/deployment.yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: ollama
|
|
||||||
namespace: ai
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
revisionHistoryLimit: 2
|
|
||||||
strategy:
|
|
||||||
type: RollingUpdate
|
|
||||||
rollingUpdate:
|
|
||||||
maxSurge: 0
|
|
||||||
maxUnavailable: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: ollama
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: ollama
|
|
||||||
annotations:
|
|
||||||
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
|
|
||||||
ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
|
|
||||||
spec:
|
|
||||||
affinity:
|
|
||||||
nodeAffinity:
|
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
nodeSelectorTerms:
|
|
||||||
- matchExpressions:
|
|
||||||
- key: kubernetes.io/hostname
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- titan-20
|
|
||||||
- titan-21
|
|
||||||
- titan-22
|
|
||||||
- titan-24
|
|
||||||
runtimeClassName: nvidia
|
|
||||||
volumes:
|
|
||||||
- name: models
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: ollama-models
|
|
||||||
initContainers:
|
|
||||||
- name: warm-model
|
|
||||||
image: ollama/ollama:latest
|
|
||||||
env:
|
|
||||||
- name: OLLAMA_HOST
|
|
||||||
value: 0.0.0.0
|
|
||||||
- name: NVIDIA_VISIBLE_DEVICES
|
|
||||||
value: all
|
|
||||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
||||||
value: compute,utility
|
|
||||||
- name: OLLAMA_MODELS
|
|
||||||
value: /root/.ollama
|
|
||||||
- name: OLLAMA_MODEL
|
|
||||||
value: qwen2.5-coder:7b-instruct-q4_0
|
|
||||||
command:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- |
|
|
||||||
set -e
|
|
||||||
ollama serve >/tmp/ollama.log 2>&1 &
|
|
||||||
sleep 6
|
|
||||||
ollama pull "${OLLAMA_MODEL}"
|
|
||||||
pkill ollama || true
|
|
||||||
volumeMounts:
|
|
||||||
- name: models
|
|
||||||
mountPath: /root/.ollama
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 250m
|
|
||||||
memory: 1Gi
|
|
||||||
nvidia.com/gpu.shared: 1
|
|
||||||
limits:
|
|
||||||
nvidia.com/gpu.shared: 1
|
|
||||||
containers:
|
|
||||||
- name: ollama
|
|
||||||
image: ollama/ollama:latest
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: 11434
|
|
||||||
env:
|
|
||||||
- name: OLLAMA_HOST
|
|
||||||
value: 0.0.0.0
|
|
||||||
- name: OLLAMA_KEEP_ALIVE
|
|
||||||
value: 6h
|
|
||||||
- name: OLLAMA_MODELS
|
|
||||||
value: /root/.ollama
|
|
||||||
- name: NVIDIA_VISIBLE_DEVICES
|
|
||||||
value: all
|
|
||||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
||||||
value: compute,utility
|
|
||||||
volumeMounts:
|
|
||||||
- name: models
|
|
||||||
mountPath: /root/.ollama
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: "2"
|
|
||||||
memory: 8Gi
|
|
||||||
nvidia.com/gpu.shared: 1
|
|
||||||
limits:
|
|
||||||
cpu: "4"
|
|
||||||
memory: 12Gi
|
|
||||||
nvidia.com/gpu.shared: 1
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
# services/ai-llm/kustomization.yaml
|
|
||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
namespace: ai
|
|
||||||
resources:
|
|
||||||
- namespace.yaml
|
|
||||||
- pvc.yaml
|
|
||||||
- deployment.yaml
|
|
||||||
- service.yaml
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
# services/ai-llm/namespace.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: ai
|
|
||||||
@ -1,13 +0,0 @@
|
|||||||
# services/ai-llm/pvc.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: ollama-models
|
|
||||||
namespace: ai
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 30Gi
|
|
||||||
storageClassName: astreae
|
|
||||||
@ -1,14 +0,0 @@
|
|||||||
# services/ai-llm/service.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: ollama
|
|
||||||
namespace: ai
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
selector:
|
|
||||||
app: ollama
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 11434
|
|
||||||
targetPort: 11434
|
|
||||||
@ -5,7 +5,7 @@ metadata:
|
|||||||
name: bstein-dev-home-backend
|
name: bstein-dev-home-backend
|
||||||
namespace: bstein-dev-home
|
namespace: bstein-dev-home
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 2
|
||||||
revisionHistoryLimit: 3
|
revisionHistoryLimit: 3
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -15,8 +15,6 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: bstein-dev-home-backend
|
app: bstein-dev-home-backend
|
||||||
spec:
|
spec:
|
||||||
automountServiceAccountToken: true
|
|
||||||
serviceAccountName: bstein-dev-home
|
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/arch: arm64
|
kubernetes.io/arch: arm64
|
||||||
node-role.kubernetes.io/worker: "true"
|
node-role.kubernetes.io/worker: "true"
|
||||||
@ -24,73 +22,8 @@ spec:
|
|||||||
- name: harbor-bstein-robot
|
- name: harbor-bstein-robot
|
||||||
containers:
|
containers:
|
||||||
- name: backend
|
- name: backend
|
||||||
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command: ["gunicorn"]
|
|
||||||
args:
|
|
||||||
- -b
|
|
||||||
- 0.0.0.0:8080
|
|
||||||
- --workers
|
|
||||||
- "2"
|
|
||||||
- --timeout
|
|
||||||
- "180"
|
|
||||||
- app:app
|
|
||||||
env:
|
|
||||||
- name: AI_CHAT_API
|
|
||||||
value: http://ollama.ai.svc.cluster.local:11434
|
|
||||||
- name: AI_CHAT_MODEL
|
|
||||||
value: qwen2.5-coder:7b-instruct-q4_0
|
|
||||||
- name: AI_CHAT_TIMEOUT_SEC
|
|
||||||
value: "60"
|
|
||||||
- name: AI_NODE_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: spec.nodeName
|
|
||||||
- name: AI_NODE_GPU_MAP
|
|
||||||
value: |
|
|
||||||
{"titan-20": "Jetson Xavier (edge GPU)", "titan-21": "Jetson Xavier (edge GPU)", "titan-22": "RTX 3050 8GB (local GPU)", "titan-24": "RTX 3080 8GB (local GPU)"}
|
|
||||||
- name: KEYCLOAK_ENABLED
|
|
||||||
value: "true"
|
|
||||||
- name: KEYCLOAK_URL
|
|
||||||
value: https://sso.bstein.dev
|
|
||||||
- name: KEYCLOAK_REALM
|
|
||||||
value: atlas
|
|
||||||
- name: KEYCLOAK_CLIENT_ID
|
|
||||||
value: bstein-dev-home
|
|
||||||
- name: KEYCLOAK_ISSUER
|
|
||||||
value: https://sso.bstein.dev/realms/atlas
|
|
||||||
- name: KEYCLOAK_JWKS_URL
|
|
||||||
value: http://keycloak.sso.svc.cluster.local/realms/atlas/protocol/openid-connect/certs
|
|
||||||
- name: KEYCLOAK_ADMIN_URL
|
|
||||||
value: http://keycloak.sso.svc.cluster.local
|
|
||||||
- name: KEYCLOAK_ADMIN_REALM
|
|
||||||
value: atlas
|
|
||||||
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
|
||||||
value: bstein-dev-home-admin
|
|
||||||
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: bstein-dev-home-keycloak-admin
|
|
||||||
key: client_secret
|
|
||||||
- name: ACCOUNT_ALLOWED_GROUPS
|
|
||||||
value: ""
|
|
||||||
- name: PORTAL_DATABASE_URL
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: atlas-portal-db
|
|
||||||
key: PORTAL_DATABASE_URL
|
|
||||||
- name: HTTP_CHECK_TIMEOUT_SEC
|
|
||||||
value: "2"
|
|
||||||
- name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT
|
|
||||||
value: "30"
|
|
||||||
- name: ACCESS_REQUEST_SUBMIT_RATE_WINDOW_SEC
|
|
||||||
value: "3600"
|
|
||||||
- name: ACCESS_REQUEST_STATUS_RATE_LIMIT
|
|
||||||
value: "120"
|
|
||||||
- name: ACCESS_REQUEST_STATUS_RATE_WINDOW_SEC
|
|
||||||
value: "60"
|
|
||||||
- name: ACCESS_REQUEST_INTERNAL_EMAIL_ALLOWLIST
|
|
||||||
value: robotuser@bstein.dev
|
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
containerPort: 8080
|
containerPort: 8080
|
||||||
@ -100,18 +33,16 @@ spec:
|
|||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 2
|
initialDelaySeconds: 2
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
timeoutSeconds: 3
|
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /api/healthz
|
path: /api/healthz
|
||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
timeoutSeconds: 3
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 50m
|
||||||
memory: 128Mi
|
memory: 64Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 300m
|
||||||
memory: 512Mi
|
memory: 256Mi
|
||||||
|
|||||||
@ -1,69 +0,0 @@
|
|||||||
# services/bstein-dev-home/chat-ai-gateway-deployment.yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: chat-ai-gateway
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
revisionHistoryLimit: 2
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: chat-ai-gateway
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: chat-ai-gateway
|
|
||||||
spec:
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/arch: arm64
|
|
||||||
node-role.kubernetes.io/worker: "true"
|
|
||||||
containers:
|
|
||||||
- name: gateway
|
|
||||||
image: python:3.11-slim
|
|
||||||
command: ["/bin/sh","-c"]
|
|
||||||
args:
|
|
||||||
- python /app/gateway.py
|
|
||||||
env:
|
|
||||||
- name: UPSTREAM_URL
|
|
||||||
value: http://bstein-dev-home-backend/api/chat
|
|
||||||
- name: CHAT_KEY_MATRIX
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: chat-ai-keys-runtime
|
|
||||||
key: matrix
|
|
||||||
- name: CHAT_KEY_HOMEPAGE
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: chat-ai-keys-runtime
|
|
||||||
key: homepage
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: 8080
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /healthz
|
|
||||||
port: http
|
|
||||||
initialDelaySeconds: 2
|
|
||||||
periodSeconds: 5
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /healthz
|
|
||||||
port: http
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 10
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 20m
|
|
||||||
memory: 64Mi
|
|
||||||
limits:
|
|
||||||
cpu: 200m
|
|
||||||
memory: 256Mi
|
|
||||||
volumeMounts:
|
|
||||||
- name: code
|
|
||||||
mountPath: /app/gateway.py
|
|
||||||
subPath: gateway.py
|
|
||||||
volumes:
|
|
||||||
- name: code
|
|
||||||
configMap:
|
|
||||||
name: chat-ai-gateway
|
|
||||||
@ -1,13 +0,0 @@
|
|||||||
# services/bstein-dev-home/chat-ai-gateway-service.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: chat-ai-gateway
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: chat-ai-gateway
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 80
|
|
||||||
targetPort: 8080
|
|
||||||
@ -5,7 +5,7 @@ metadata:
|
|||||||
name: bstein-dev-home-frontend
|
name: bstein-dev-home-frontend
|
||||||
namespace: bstein-dev-home
|
namespace: bstein-dev-home
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 2
|
||||||
revisionHistoryLimit: 3
|
revisionHistoryLimit: 3
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -22,7 +22,7 @@ spec:
|
|||||||
- name: harbor-bstein-robot
|
- name: harbor-bstein-robot
|
||||||
containers:
|
containers:
|
||||||
- name: frontend
|
- name: frontend
|
||||||
image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
|
image: registry.bstein.dev/bstein/bstein-dev-home-frontend:latest
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
|
|||||||
@ -11,7 +11,7 @@ metadata:
|
|||||||
cert-manager.io/cluster-issuer: letsencrypt
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
spec:
|
spec:
|
||||||
tls:
|
tls:
|
||||||
- hosts: [ "bstein.dev", "chat.ai.bstein.dev" ]
|
- hosts: [ "bstein.dev" ]
|
||||||
secretName: bstein-dev-home-tls
|
secretName: bstein-dev-home-tls
|
||||||
rules:
|
rules:
|
||||||
- host: bstein.dev
|
- host: bstein.dev
|
||||||
@ -29,12 +29,3 @@ spec:
|
|||||||
service:
|
service:
|
||||||
name: bstein-dev-home-frontend
|
name: bstein-dev-home-frontend
|
||||||
port: { number: 80 }
|
port: { number: 80 }
|
||||||
- host: chat.ai.bstein.dev
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: chat-ai-gateway
|
|
||||||
port: { number: 80 }
|
|
||||||
|
|||||||
@ -5,38 +5,13 @@ namespace: bstein-dev-home
|
|||||||
resources:
|
resources:
|
||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
- image.yaml
|
- image.yaml
|
||||||
- rbac.yaml
|
|
||||||
- portal-e2e-client-secret-sync-rbac.yaml
|
|
||||||
- chat-ai-gateway-deployment.yaml
|
|
||||||
- chat-ai-gateway-service.yaml
|
|
||||||
- frontend-deployment.yaml
|
- frontend-deployment.yaml
|
||||||
- frontend-service.yaml
|
- frontend-service.yaml
|
||||||
- backend-deployment.yaml
|
- backend-deployment.yaml
|
||||||
- backend-service.yaml
|
- backend-service.yaml
|
||||||
- vaultwarden-cred-sync-cronjob.yaml
|
|
||||||
- portal-onboarding-e2e-test-job.yaml
|
|
||||||
- ingress.yaml
|
- ingress.yaml
|
||||||
images:
|
images:
|
||||||
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
|
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
|
||||||
newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
|
newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
|
||||||
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
|
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
|
||||||
newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
||||||
configMapGenerator:
|
|
||||||
- name: chat-ai-gateway
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
files:
|
|
||||||
- gateway.py=scripts/gateway.py
|
|
||||||
options:
|
|
||||||
disableNameSuffixHash: true
|
|
||||||
- name: vaultwarden-cred-sync-script
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
files:
|
|
||||||
- vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py
|
|
||||||
options:
|
|
||||||
disableNameSuffixHash: true
|
|
||||||
- name: portal-onboarding-e2e-tests
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
files:
|
|
||||||
- test_portal_onboarding_flow.py=scripts/test_portal_onboarding_flow.py
|
|
||||||
options:
|
|
||||||
disableNameSuffixHash: true
|
|
||||||
|
|||||||
@ -1,24 +0,0 @@
|
|||||||
# services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: portal-e2e-client-secret-sync-target
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["secrets"]
|
|
||||||
verbs: ["get", "create", "patch", "update"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: portal-e2e-client-secret-sync-target
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: portal-e2e-client-secret-sync
|
|
||||||
namespace: sso
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: portal-e2e-client-secret-sync-target
|
|
||||||
@ -1,66 +0,0 @@
|
|||||||
# services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml
|
|
||||||
apiVersion: batch/v1
|
|
||||||
kind: Job
|
|
||||||
metadata:
|
|
||||||
name: portal-onboarding-e2e-test-11
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
spec:
|
|
||||||
backoffLimit: 0
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
restartPolicy: Never
|
|
||||||
containers:
|
|
||||||
- name: test
|
|
||||||
image: python:3.11-slim
|
|
||||||
env:
|
|
||||||
- name: PORTAL_BASE_URL
|
|
||||||
value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local
|
|
||||||
- name: KEYCLOAK_ADMIN_URL
|
|
||||||
value: https://sso.bstein.dev
|
|
||||||
- name: KEYCLOAK_REALM
|
|
||||||
value: atlas
|
|
||||||
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
|
||||||
value: bstein-dev-home-admin
|
|
||||||
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: bstein-dev-home-keycloak-admin
|
|
||||||
key: client_secret
|
|
||||||
- name: PORTAL_E2E_CLIENT_ID
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: portal-e2e-client
|
|
||||||
key: client_id
|
|
||||||
- name: PORTAL_E2E_CLIENT_SECRET
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: portal-e2e-client
|
|
||||||
key: client_secret
|
|
||||||
- name: PORTAL_TARGET_CLIENT_ID
|
|
||||||
value: bstein-dev-home
|
|
||||||
- name: E2E_PORTAL_ADMIN_USERNAME
|
|
||||||
value: bstein
|
|
||||||
- name: E2E_USERNAME_PREFIX
|
|
||||||
value: e2e-portal
|
|
||||||
- name: E2E_CONTACT_EMAIL
|
|
||||||
value: robotuser@bstein.dev
|
|
||||||
- name: E2E_IMAP_KEYCLOAK_USERNAME
|
|
||||||
value: robotuser
|
|
||||||
- name: E2E_DEADLINE_SECONDS
|
|
||||||
value: "600"
|
|
||||||
- name: E2E_POLL_SECONDS
|
|
||||||
value: "10"
|
|
||||||
command: ["/bin/sh", "-c"]
|
|
||||||
args:
|
|
||||||
- |
|
|
||||||
set -euo pipefail
|
|
||||||
python /scripts/test_portal_onboarding_flow.py
|
|
||||||
volumeMounts:
|
|
||||||
- name: tests
|
|
||||||
mountPath: /scripts
|
|
||||||
readOnly: true
|
|
||||||
volumes:
|
|
||||||
- name: tests
|
|
||||||
configMap:
|
|
||||||
name: portal-onboarding-e2e-tests
|
|
||||||
defaultMode: 0555
|
|
||||||
@ -1,108 +0,0 @@
|
|||||||
# services/bstein-dev-home/rbac.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-ai-reader
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["pods"]
|
|
||||||
verbs: ["get", "list", "watch"]
|
|
||||||
resourceNames: []
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-ai-reader
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: bstein-dev-home-ai-reader
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: bstein-dev-home
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-vaultwarden-admin-secret-reader
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["secrets"]
|
|
||||||
verbs: ["get"]
|
|
||||||
resourceNames: ["vaultwarden-admin"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-vaultwarden-admin-secret-reader
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: bstein-dev-home-vaultwarden-admin-secret-reader
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: bstein-dev-home
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-vaultwarden-admin-token-reader
|
|
||||||
namespace: vaultwarden
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["secrets"]
|
|
||||||
verbs: ["get"]
|
|
||||||
resourceNames: ["vaultwarden-admin"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-vaultwarden-admin-token-reader
|
|
||||||
namespace: vaultwarden
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: bstein-dev-home-vaultwarden-admin-token-reader
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: bstein-dev-home
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-nextcloud-mail-sync
|
|
||||||
namespace: nextcloud
|
|
||||||
rules:
|
|
||||||
- apiGroups: ["batch"]
|
|
||||||
resources: ["cronjobs"]
|
|
||||||
verbs: ["get"]
|
|
||||||
resourceNames: ["nextcloud-mail-sync"]
|
|
||||||
- apiGroups: ["batch"]
|
|
||||||
resources: ["jobs"]
|
|
||||||
verbs: ["create", "get", "list", "watch"]
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["pods"]
|
|
||||||
verbs: ["get", "list"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: bstein-dev-home-nextcloud-mail-sync
|
|
||||||
namespace: nextcloud
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: bstein-dev-home-nextcloud-mail-sync
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: bstein-dev-home
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
@ -1,70 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
||||||
from urllib import request, error
|
|
||||||
|
|
||||||
UPSTREAM = os.environ.get("UPSTREAM_URL", "http://bstein-dev-home-backend/api/chat")
|
|
||||||
KEY_MATRIX = os.environ.get("CHAT_KEY_MATRIX", "")
|
|
||||||
KEY_HOMEPAGE = os.environ.get("CHAT_KEY_HOMEPAGE", "")
|
|
||||||
|
|
||||||
ALLOWED = {k for k in (KEY_MATRIX, KEY_HOMEPAGE) if k}
|
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
|
||||||
def _send_json(self, code: int, payload: dict):
|
|
||||||
body = json.dumps(payload).encode()
|
|
||||||
self.send_response(code)
|
|
||||||
self.send_header("Content-Type", "application/json")
|
|
||||||
self.send_header("Content-Length", str(len(body)))
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(body)
|
|
||||||
|
|
||||||
def do_GET(self): # noqa: N802
|
|
||||||
if self.path in ("/healthz", "/"):
|
|
||||||
return self._send_json(200, {"ok": True})
|
|
||||||
return self._send_json(404, {"error": "not_found"})
|
|
||||||
|
|
||||||
def do_POST(self): # noqa: N802
|
|
||||||
if self.path != "/":
|
|
||||||
return self._send_json(404, {"error": "not_found"})
|
|
||||||
|
|
||||||
key = self.headers.get("x-api-key", "")
|
|
||||||
if not key or key not in ALLOWED:
|
|
||||||
return self._send_json(401, {"error": "unauthorized"})
|
|
||||||
|
|
||||||
length = int(self.headers.get("content-length", "0") or "0")
|
|
||||||
raw = self.rfile.read(length) if length else b"{}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
upstream_req = request.Request(
|
|
||||||
UPSTREAM,
|
|
||||||
data=raw,
|
|
||||||
headers={"Content-Type": "application/json"},
|
|
||||||
method="POST",
|
|
||||||
)
|
|
||||||
with request.urlopen(upstream_req, timeout=90) as resp:
|
|
||||||
data = resp.read()
|
|
||||||
self.send_response(resp.status)
|
|
||||||
for k, v in resp.headers.items():
|
|
||||||
if k.lower() in ("content-length", "connection", "server", "date"):
|
|
||||||
continue
|
|
||||||
self.send_header(k, v)
|
|
||||||
self.send_header("Content-Length", str(len(data)))
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(data)
|
|
||||||
except error.HTTPError as e:
|
|
||||||
data = e.read() if hasattr(e, "read") else b""
|
|
||||||
self.send_response(e.code)
|
|
||||||
self.send_header("Content-Type", "application/json")
|
|
||||||
self.send_header("Content-Length", str(len(data)))
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(data)
|
|
||||||
except Exception:
|
|
||||||
return self._send_json(502, {"error": "bad_gateway"})
|
|
||||||
|
|
||||||
def main():
|
|
||||||
port = int(os.environ.get("PORT", "8080"))
|
|
||||||
httpd = HTTPServer(("0.0.0.0", port), Handler)
|
|
||||||
httpd.serve_forever()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,428 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import email
|
|
||||||
import http.client
|
|
||||||
import imaplib
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import ssl
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
|
|
||||||
|
|
||||||
def _env(name: str, default: str | None = None) -> str:
|
|
||||||
value = os.environ.get(name, default)
|
|
||||||
if value is None or value == "":
|
|
||||||
raise SystemExit(f"missing required env var: {name}")
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
def _post_json(url: str, payload: dict, timeout_s: int = 30) -> dict:
|
|
||||||
body = json.dumps(payload).encode()
|
|
||||||
req = urllib.request.Request(
|
|
||||||
url,
|
|
||||||
data=body,
|
|
||||||
headers={"Content-Type": "application/json"},
|
|
||||||
method="POST",
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
|
||||||
raw = resp.read().decode()
|
|
||||||
return json.loads(raw) if raw else {}
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
raw = exc.read().decode(errors="replace")
|
|
||||||
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
|
||||||
|
|
||||||
|
|
||||||
def _post_form(url: str, data: dict[str, str], timeout_s: int = 30) -> dict:
|
|
||||||
body = urllib.parse.urlencode(data).encode()
|
|
||||||
req = urllib.request.Request(
|
|
||||||
url,
|
|
||||||
data=body,
|
|
||||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
||||||
method="POST",
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
|
||||||
raw = resp.read().decode()
|
|
||||||
return json.loads(raw) if raw else {}
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
raw = exc.read().decode(errors="replace")
|
|
||||||
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
|
||||||
|
|
||||||
|
|
||||||
def _get_json(url: str, headers: dict[str, str] | None = None, timeout_s: int = 30) -> object:
|
|
||||||
req = urllib.request.Request(url, headers=headers or {}, method="GET")
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
|
||||||
raw = resp.read().decode()
|
|
||||||
return json.loads(raw) if raw else None
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
raw = exc.read().decode(errors="replace")
|
|
||||||
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
|
||||||
|
|
||||||
|
|
||||||
def _request_json(
|
|
||||||
method: str,
|
|
||||||
url: str,
|
|
||||||
token: str,
|
|
||||||
payload: dict | None = None,
|
|
||||||
timeout_s: int = 30,
|
|
||||||
) -> dict:
|
|
||||||
data = None
|
|
||||||
headers = {"Authorization": f"Bearer {token}"}
|
|
||||||
if payload is not None:
|
|
||||||
data = json.dumps(payload).encode()
|
|
||||||
headers["Content-Type"] = "application/json"
|
|
||||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
|
||||||
raw = resp.read().decode()
|
|
||||||
return json.loads(raw) if raw else {}
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
raw = exc.read().decode(errors="replace")
|
|
||||||
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_client_token(keycloak_base: str, realm: str, client_id: str, client_secret: str) -> str:
|
|
||||||
token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token"
|
|
||||||
payload = _post_form(
|
|
||||||
token_url,
|
|
||||||
{
|
|
||||||
"grant_type": "client_credentials",
|
|
||||||
"client_id": client_id,
|
|
||||||
"client_secret": client_secret,
|
|
||||||
},
|
|
||||||
timeout_s=20,
|
|
||||||
)
|
|
||||||
token = payload.get("access_token")
|
|
||||||
if not isinstance(token, str) or not token:
|
|
||||||
raise SystemExit("keycloak token response missing access_token")
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_token_exchange(
|
|
||||||
*,
|
|
||||||
keycloak_base: str,
|
|
||||||
realm: str,
|
|
||||||
client_id: str,
|
|
||||||
client_secret: str,
|
|
||||||
subject_token: str,
|
|
||||||
requested_subject: str,
|
|
||||||
audience: str,
|
|
||||||
) -> str:
|
|
||||||
token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token"
|
|
||||||
payload = _post_form(
|
|
||||||
token_url,
|
|
||||||
{
|
|
||||||
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
|
|
||||||
"client_id": client_id,
|
|
||||||
"client_secret": client_secret,
|
|
||||||
"subject_token": subject_token,
|
|
||||||
"requested_subject": requested_subject,
|
|
||||||
"audience": audience,
|
|
||||||
},
|
|
||||||
timeout_s=20,
|
|
||||||
)
|
|
||||||
token = payload.get("access_token")
|
|
||||||
if not isinstance(token, str) or not token:
|
|
||||||
raise SystemExit("keycloak token exchange response missing access_token")
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_find_user(keycloak_base: str, realm: str, token: str, username: str) -> dict | None:
|
|
||||||
url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users?{urllib.parse.urlencode({'username': username, 'exact': 'true', 'max': '1'})}"
|
|
||||||
users = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20)
|
|
||||||
if not isinstance(users, list) or not users:
|
|
||||||
return None
|
|
||||||
user = users[0]
|
|
||||||
return user if isinstance(user, dict) else None
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_get_user(keycloak_base: str, realm: str, token: str, user_id: str) -> dict:
|
|
||||||
url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users/{urllib.parse.quote(user_id, safe='')}"
|
|
||||||
data = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20)
|
|
||||||
if not isinstance(data, dict):
|
|
||||||
raise SystemExit("unexpected keycloak user payload")
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_attr(attributes: object, key: str) -> str:
|
|
||||||
if not isinstance(attributes, dict):
|
|
||||||
return ""
|
|
||||||
value = attributes.get(key)
|
|
||||||
if isinstance(value, list) and value and isinstance(value[0], str):
|
|
||||||
return value[0]
|
|
||||||
if isinstance(value, str):
|
|
||||||
return value
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _imap_wait_for_verify_token(
|
|
||||||
*,
|
|
||||||
host: str,
|
|
||||||
port: int,
|
|
||||||
username: str,
|
|
||||||
password: str,
|
|
||||||
request_code: str,
|
|
||||||
deadline_sec: int,
|
|
||||||
) -> str:
|
|
||||||
ssl_context = ssl._create_unverified_context()
|
|
||||||
deadline_at = time.monotonic() + deadline_sec
|
|
||||||
|
|
||||||
with imaplib.IMAP4_SSL(host, port, ssl_context=ssl_context) as client:
|
|
||||||
client.login(username, password)
|
|
||||||
client.select("INBOX")
|
|
||||||
|
|
||||||
while time.monotonic() < deadline_at:
|
|
||||||
status, data = client.search(None, "TEXT", request_code)
|
|
||||||
if status == "OK" and data and data[0]:
|
|
||||||
ids = data[0].split()
|
|
||||||
msg_id = ids[-1]
|
|
||||||
fetch_status, msg_data = client.fetch(msg_id, "(RFC822)")
|
|
||||||
if fetch_status != "OK" or not msg_data:
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
|
|
||||||
raw = msg_data[0][1] if isinstance(msg_data[0], tuple) and len(msg_data[0]) > 1 else None
|
|
||||||
if not isinstance(raw, (bytes, bytearray)):
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
|
|
||||||
message = email.message_from_bytes(raw)
|
|
||||||
body = None
|
|
||||||
if message.is_multipart():
|
|
||||||
for part in message.walk():
|
|
||||||
if part.get_content_type() == "text/plain":
|
|
||||||
payload = part.get_payload(decode=True)
|
|
||||||
if isinstance(payload, (bytes, bytearray)):
|
|
||||||
body = payload.decode(errors="replace")
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
payload = message.get_payload(decode=True)
|
|
||||||
if isinstance(payload, (bytes, bytearray)):
|
|
||||||
body = payload.decode(errors="replace")
|
|
||||||
|
|
||||||
if not body:
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = None
|
|
||||||
for line in body.splitlines():
|
|
||||||
candidate = line.strip()
|
|
||||||
if "verify=" in candidate and candidate.startswith("http"):
|
|
||||||
url = candidate
|
|
||||||
break
|
|
||||||
if not url:
|
|
||||||
match = re.search(r"https?://\\S+verify=\\S+", body)
|
|
||||||
url = match.group(0) if match else None
|
|
||||||
if not url:
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
|
|
||||||
parsed = urllib.parse.urlparse(url)
|
|
||||||
query = urllib.parse.parse_qs(parsed.query)
|
|
||||||
token = query.get("verify", [""])[0]
|
|
||||||
if isinstance(token, str) and token:
|
|
||||||
return token
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
raise SystemExit("verification email not found before deadline")
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
portal_base = _env("PORTAL_BASE_URL").rstrip("/")
|
|
||||||
|
|
||||||
keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/")
|
|
||||||
realm = _env("KEYCLOAK_REALM", "atlas")
|
|
||||||
kc_admin_client_id = _env("KEYCLOAK_ADMIN_CLIENT_ID")
|
|
||||||
kc_admin_client_secret = _env("KEYCLOAK_ADMIN_CLIENT_SECRET")
|
|
||||||
portal_e2e_client_id = _env("PORTAL_E2E_CLIENT_ID")
|
|
||||||
portal_e2e_client_secret = _env("PORTAL_E2E_CLIENT_SECRET")
|
|
||||||
portal_target_client_id = os.environ.get("PORTAL_TARGET_CLIENT_ID", "bstein-dev-home").strip() or "bstein-dev-home"
|
|
||||||
portal_admin_username = os.environ.get("E2E_PORTAL_ADMIN_USERNAME", "bstein").strip() or "bstein"
|
|
||||||
|
|
||||||
contact_email = os.environ.get("E2E_CONTACT_EMAIL", "robotuser@bstein.dev").strip()
|
|
||||||
if not contact_email:
|
|
||||||
raise SystemExit("E2E_CONTACT_EMAIL must not be empty")
|
|
||||||
|
|
||||||
imap_host = os.environ.get("E2E_IMAP_HOST", "mailu-front.mailu-mailserver.svc.cluster.local").strip()
|
|
||||||
imap_port = int(os.environ.get("E2E_IMAP_PORT", "993"))
|
|
||||||
imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip()
|
|
||||||
imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90"))
|
|
||||||
|
|
||||||
try:
|
|
||||||
token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret)
|
|
||||||
except SystemExit as exc:
|
|
||||||
raise SystemExit(f"failed to fetch keycloak token for admin client {kc_admin_client_id!r}: {exc}")
|
|
||||||
mailbox_user = _keycloak_find_user(keycloak_base, realm, token, imap_keycloak_username)
|
|
||||||
if not mailbox_user:
|
|
||||||
raise SystemExit(f"unable to locate Keycloak mailbox user {imap_keycloak_username!r}")
|
|
||||||
mailbox_user_id = mailbox_user.get("id")
|
|
||||||
if not isinstance(mailbox_user_id, str) or not mailbox_user_id:
|
|
||||||
raise SystemExit("mailbox user missing id")
|
|
||||||
|
|
||||||
mailbox_full = _keycloak_get_user(keycloak_base, realm, token, mailbox_user_id)
|
|
||||||
mailbox_attrs = mailbox_full.get("attributes")
|
|
||||||
mailu_email = _extract_attr(mailbox_attrs, "mailu_email")
|
|
||||||
if not mailu_email:
|
|
||||||
mailu_email = contact_email
|
|
||||||
mailu_password = _extract_attr(mailbox_attrs, "mailu_app_password")
|
|
||||||
if not mailu_password:
|
|
||||||
raise SystemExit(f"Keycloak user {imap_keycloak_username!r} missing mailu_app_password attribute")
|
|
||||||
|
|
||||||
username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user")
|
|
||||||
now = int(time.time())
|
|
||||||
username = f"{username_prefix}-{now}"
|
|
||||||
|
|
||||||
submit_url = f"{portal_base}/api/access/request"
|
|
||||||
submit_payload = {"username": username, "email": contact_email, "note": "portal onboarding e2e"}
|
|
||||||
submit = None
|
|
||||||
for attempt in range(1, 6):
|
|
||||||
try:
|
|
||||||
submit = _post_json(submit_url, submit_payload, timeout_s=20)
|
|
||||||
break
|
|
||||||
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
|
|
||||||
if attempt == 5:
|
|
||||||
raise SystemExit(f"portal submit failed after {attempt} attempts: {exc}")
|
|
||||||
time.sleep(2)
|
|
||||||
if not isinstance(submit, dict):
|
|
||||||
raise SystemExit("portal submit did not return json")
|
|
||||||
|
|
||||||
request_code = submit.get("request_code")
|
|
||||||
if not isinstance(request_code, str) or not request_code:
|
|
||||||
raise SystemExit(f"request submit did not return request_code: {submit}")
|
|
||||||
|
|
||||||
verify_token = _imap_wait_for_verify_token(
|
|
||||||
host=imap_host,
|
|
||||||
port=imap_port,
|
|
||||||
username=mailu_email,
|
|
||||||
password=mailu_password,
|
|
||||||
request_code=request_code,
|
|
||||||
deadline_sec=imap_wait_sec,
|
|
||||||
)
|
|
||||||
verify_resp = _post_json(
|
|
||||||
f"{portal_base}/api/access/request/verify",
|
|
||||||
{"request_code": request_code, "token": verify_token},
|
|
||||||
timeout_s=30,
|
|
||||||
)
|
|
||||||
if not isinstance(verify_resp, dict) or verify_resp.get("ok") is not True:
|
|
||||||
raise SystemExit(f"unexpected verify response: {verify_resp}")
|
|
||||||
|
|
||||||
portal_admin = _keycloak_find_user(keycloak_base, realm, token, portal_admin_username)
|
|
||||||
if not portal_admin:
|
|
||||||
raise SystemExit(f"unable to locate portal admin user {portal_admin_username!r} via Keycloak admin API")
|
|
||||||
portal_admin_user_id = portal_admin.get("id")
|
|
||||||
if not isinstance(portal_admin_user_id, str) or not portal_admin_user_id:
|
|
||||||
raise SystemExit("portal admin user missing id")
|
|
||||||
|
|
||||||
try:
|
|
||||||
e2e_subject_token = _keycloak_client_token(keycloak_base, realm, portal_e2e_client_id, portal_e2e_client_secret)
|
|
||||||
except SystemExit as exc:
|
|
||||||
raise SystemExit(f"failed to fetch keycloak token for E2E client {portal_e2e_client_id!r}: {exc}")
|
|
||||||
try:
|
|
||||||
portal_bearer = _keycloak_token_exchange(
|
|
||||||
keycloak_base=keycloak_base,
|
|
||||||
realm=realm,
|
|
||||||
client_id=portal_e2e_client_id,
|
|
||||||
client_secret=portal_e2e_client_secret,
|
|
||||||
subject_token=e2e_subject_token,
|
|
||||||
requested_subject=portal_admin_user_id,
|
|
||||||
audience=portal_target_client_id,
|
|
||||||
)
|
|
||||||
except SystemExit as exc:
|
|
||||||
raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}")
|
|
||||||
|
|
||||||
approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve"
|
|
||||||
approve_timeout_s = int(os.environ.get("E2E_APPROVE_TIMEOUT_SECONDS", "180"))
|
|
||||||
approve_attempts = int(os.environ.get("E2E_APPROVE_ATTEMPTS", "3"))
|
|
||||||
approve_resp = None
|
|
||||||
approve_error = None
|
|
||||||
for attempt in range(1, approve_attempts + 1):
|
|
||||||
try:
|
|
||||||
approve_resp = _request_json("POST", approve_url, portal_bearer, payload=None, timeout_s=approve_timeout_s)
|
|
||||||
approve_error = None
|
|
||||||
break
|
|
||||||
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
|
|
||||||
approve_error = str(exc)
|
|
||||||
if attempt == approve_attempts:
|
|
||||||
break
|
|
||||||
time.sleep(3)
|
|
||||||
if approve_resp is None:
|
|
||||||
print(
|
|
||||||
"WARNING: portal approval request did not return a response; "
|
|
||||||
f"continuing to poll status (last_error={approve_error})"
|
|
||||||
)
|
|
||||||
elif not isinstance(approve_resp, dict) or approve_resp.get("ok") is not True:
|
|
||||||
raise SystemExit(f"unexpected approval response: {approve_resp}")
|
|
||||||
|
|
||||||
status_url = f"{portal_base}/api/access/request/status"
|
|
||||||
deadline_s = int(os.environ.get("E2E_DEADLINE_SECONDS", "600"))
|
|
||||||
interval_s = int(os.environ.get("E2E_POLL_SECONDS", "10"))
|
|
||||||
deadline_at = time.monotonic() + deadline_s
|
|
||||||
|
|
||||||
last_status = None
|
|
||||||
last_error = None
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
status_payload = _post_json(status_url, {"request_code": request_code}, timeout_s=60)
|
|
||||||
last_error = None
|
|
||||||
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
|
|
||||||
last_error = str(exc)
|
|
||||||
if time.monotonic() >= deadline_at:
|
|
||||||
raise SystemExit(f"timed out waiting for provisioning to finish (last error={last_error})")
|
|
||||||
time.sleep(interval_s)
|
|
||||||
continue
|
|
||||||
status = status_payload.get("status")
|
|
||||||
if isinstance(status, str):
|
|
||||||
last_status = status
|
|
||||||
|
|
||||||
if status in ("awaiting_onboarding", "ready"):
|
|
||||||
break
|
|
||||||
if status in ("denied", "unknown"):
|
|
||||||
raise SystemExit(f"request transitioned to unexpected terminal status: {status_payload}")
|
|
||||||
if time.monotonic() >= deadline_at:
|
|
||||||
suffix = f" (last error={last_error})" if last_error else ""
|
|
||||||
raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status}){suffix}")
|
|
||||||
time.sleep(interval_s)
|
|
||||||
|
|
||||||
# Refresh admin token (it may expire during the provisioning wait).
|
|
||||||
token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret)
|
|
||||||
|
|
||||||
user = _keycloak_find_user(keycloak_base, realm, token, username)
|
|
||||||
if not user:
|
|
||||||
raise SystemExit("expected Keycloak user was not created")
|
|
||||||
user_id = user.get("id")
|
|
||||||
if not isinstance(user_id, str) or not user_id:
|
|
||||||
raise SystemExit("created user missing id")
|
|
||||||
|
|
||||||
full = _keycloak_get_user(keycloak_base, realm, token, user_id)
|
|
||||||
required_actions = full.get("requiredActions") or []
|
|
||||||
required: set[str] = set()
|
|
||||||
if isinstance(required_actions, list):
|
|
||||||
required = {a for a in required_actions if isinstance(a, str)}
|
|
||||||
|
|
||||||
unexpected = sorted(required.intersection({"UPDATE_PASSWORD", "VERIFY_EMAIL", "CONFIGURE_TOTP"}))
|
|
||||||
if unexpected:
|
|
||||||
raise SystemExit(
|
|
||||||
"Keycloak user should not require actions at first login "
|
|
||||||
f"(Vaultwarden-first onboarding): unexpected requiredActions={unexpected} full={sorted(required)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
email_verified = full.get("emailVerified")
|
|
||||||
if email_verified is not True:
|
|
||||||
raise SystemExit(f"Keycloak user should have emailVerified=true: emailVerified={email_verified!r}")
|
|
||||||
|
|
||||||
kc_email = full.get("email")
|
|
||||||
if isinstance(kc_email, str) and contact_email and kc_email != contact_email:
|
|
||||||
raise SystemExit(f"Keycloak user email mismatch: expected {contact_email!r} got {kc_email!r}")
|
|
||||||
|
|
||||||
print(f"PASS: onboarding provisioning completed for {request_code} ({username})")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,193 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from typing import Any, Iterable
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from atlas_portal import settings
|
|
||||||
from atlas_portal.keycloak import admin_client
|
|
||||||
from atlas_portal.vaultwarden import invite_user
|
|
||||||
|
|
||||||
|
|
||||||
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
|
|
||||||
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
|
|
||||||
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
|
|
||||||
|
|
||||||
|
|
||||||
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
|
|
||||||
client = admin_client()
|
|
||||||
if not client.ready():
|
|
||||||
raise RuntimeError("keycloak admin client not configured")
|
|
||||||
|
|
||||||
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
|
||||||
first = 0
|
|
||||||
while True:
|
|
||||||
headers = client.headers()
|
|
||||||
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
|
|
||||||
# brief representation which may omit these.
|
|
||||||
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
|
|
||||||
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
|
||||||
resp = http.get(url, params=params, headers=headers)
|
|
||||||
resp.raise_for_status()
|
|
||||||
payload = resp.json()
|
|
||||||
|
|
||||||
if not isinstance(payload, list) or not payload:
|
|
||||||
return
|
|
||||||
|
|
||||||
for item in payload:
|
|
||||||
if isinstance(item, dict):
|
|
||||||
yield item
|
|
||||||
|
|
||||||
if len(payload) < page_size:
|
|
||||||
return
|
|
||||||
first += page_size
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_attr(attrs: Any, key: str) -> str:
|
|
||||||
if not isinstance(attrs, dict):
|
|
||||||
return ""
|
|
||||||
raw = attrs.get(key)
|
|
||||||
if isinstance(raw, list):
|
|
||||||
for item in raw:
|
|
||||||
if isinstance(item, str) and item.strip():
|
|
||||||
return item.strip()
|
|
||||||
return ""
|
|
||||||
if isinstance(raw, str) and raw.strip():
|
|
||||||
return raw.strip()
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
|
|
||||||
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
|
|
||||||
username = username.strip()
|
|
||||||
if not username:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
attrs = user.get("attributes")
|
|
||||||
vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR)
|
|
||||||
if vaultwarden_email:
|
|
||||||
return vaultwarden_email
|
|
||||||
|
|
||||||
mailu_email = _extract_attr(attrs, "mailu_email")
|
|
||||||
if mailu_email:
|
|
||||||
return mailu_email
|
|
||||||
|
|
||||||
email = (user.get("email") if isinstance(user.get("email"), str) else "") or ""
|
|
||||||
email = email.strip()
|
|
||||||
if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"):
|
|
||||||
return email
|
|
||||||
|
|
||||||
# Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email.
|
|
||||||
# This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient).
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None:
|
|
||||||
value = (value or "").strip()
|
|
||||||
if not value:
|
|
||||||
return
|
|
||||||
existing = _extract_attr(user.get("attributes"), key)
|
|
||||||
if existing:
|
|
||||||
return
|
|
||||||
admin_client().set_user_attribute(username, key, value)
|
|
||||||
|
|
||||||
|
|
||||||
def _set_user_attribute(username: str, key: str, value: str) -> None:
|
|
||||||
value = (value or "").strip()
|
|
||||||
if not value:
|
|
||||||
return
|
|
||||||
admin_client().set_user_attribute(username, key, value)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
processed = 0
|
|
||||||
created = 0
|
|
||||||
skipped = 0
|
|
||||||
failures = 0
|
|
||||||
|
|
||||||
for user in _iter_keycloak_users():
|
|
||||||
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
|
|
||||||
username = username.strip()
|
|
||||||
if not username:
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
enabled = user.get("enabled")
|
|
||||||
if enabled is False:
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if user.get("serviceAccountClientId") or username.startswith("service-account-"):
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Fetch the full user payload so we can reliably read attributes (and skip re-invites).
|
|
||||||
user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or ""
|
|
||||||
user_id = user_id.strip()
|
|
||||||
full_user = user
|
|
||||||
if user_id:
|
|
||||||
try:
|
|
||||||
full_user = admin_client().get_user(user_id)
|
|
||||||
except Exception:
|
|
||||||
full_user = user
|
|
||||||
|
|
||||||
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
|
|
||||||
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
|
|
||||||
email = _vaultwarden_email_for_user(full_user)
|
|
||||||
if not email:
|
|
||||||
print(f"skip {username}: missing email", file=sys.stderr)
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
_set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# If we've already successfully invited or confirmed presence, do not re-invite on every cron run.
|
|
||||||
# Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits.
|
|
||||||
if current_status in {"invited", "already_present"}:
|
|
||||||
if not current_synced_at:
|
|
||||||
try:
|
|
||||||
_set_user_attribute(
|
|
||||||
username,
|
|
||||||
VAULTWARDEN_SYNCED_AT_ATTR,
|
|
||||||
time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
processed += 1
|
|
||||||
result = invite_user(email)
|
|
||||||
if result.ok:
|
|
||||||
created += 1
|
|
||||||
print(f"ok {username}: {result.status}")
|
|
||||||
try:
|
|
||||||
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
|
|
||||||
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
failures += 1
|
|
||||||
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
|
|
||||||
try:
|
|
||||||
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
|
|
||||||
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
print(
|
|
||||||
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
return 0 if failures == 0 else 2
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main())
|
|
||||||
@ -1,59 +0,0 @@
|
|||||||
# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
|
|
||||||
apiVersion: batch/v1
|
|
||||||
kind: CronJob
|
|
||||||
metadata:
|
|
||||||
name: vaultwarden-cred-sync
|
|
||||||
namespace: bstein-dev-home
|
|
||||||
spec:
|
|
||||||
schedule: "*/15 * * * *"
|
|
||||||
concurrencyPolicy: Forbid
|
|
||||||
successfulJobsHistoryLimit: 1
|
|
||||||
failedJobsHistoryLimit: 3
|
|
||||||
jobTemplate:
|
|
||||||
spec:
|
|
||||||
backoffLimit: 0
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
serviceAccountName: bstein-dev-home
|
|
||||||
restartPolicy: Never
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/arch: arm64
|
|
||||||
node-role.kubernetes.io/worker: "true"
|
|
||||||
imagePullSecrets:
|
|
||||||
- name: harbor-bstein-robot
|
|
||||||
containers:
|
|
||||||
- name: sync
|
|
||||||
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
|
||||||
imagePullPolicy: Always
|
|
||||||
command:
|
|
||||||
- python
|
|
||||||
- /scripts/vaultwarden_cred_sync.py
|
|
||||||
env:
|
|
||||||
- name: PYTHONPATH
|
|
||||||
value: /app
|
|
||||||
- name: KEYCLOAK_ENABLED
|
|
||||||
value: "true"
|
|
||||||
- name: KEYCLOAK_REALM
|
|
||||||
value: atlas
|
|
||||||
- name: KEYCLOAK_ADMIN_URL
|
|
||||||
value: http://keycloak.sso.svc.cluster.local
|
|
||||||
- name: KEYCLOAK_ADMIN_REALM
|
|
||||||
value: atlas
|
|
||||||
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
|
||||||
value: bstein-dev-home-admin
|
|
||||||
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: bstein-dev-home-keycloak-admin
|
|
||||||
key: client_secret
|
|
||||||
- name: HTTP_CHECK_TIMEOUT_SEC
|
|
||||||
value: "20"
|
|
||||||
volumeMounts:
|
|
||||||
- name: vaultwarden-cred-sync-script
|
|
||||||
mountPath: /scripts
|
|
||||||
readOnly: true
|
|
||||||
volumes:
|
|
||||||
- name: vaultwarden-cred-sync-script
|
|
||||||
configMap:
|
|
||||||
name: vaultwarden-cred-sync-script
|
|
||||||
defaultMode: 0555
|
|
||||||
31
services/ci-demo/deployment.yaml
Normal file
31
services/ci-demo/deployment.yaml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# services/ci-demo/deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: ci-demo
|
||||||
|
namespace: ci-demo
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: ci-demo
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: ci-demo
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
hardware: rpi4
|
||||||
|
containers:
|
||||||
|
- name: ci-demo
|
||||||
|
image: registry.bstein.dev/infra/ci-demo:latest
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 5
|
||||||
|
|
||||||
24
services/ci-demo/image.yaml
Normal file
24
services/ci-demo/image.yaml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# services/ci-demo/image.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageRepository
|
||||||
|
metadata:
|
||||||
|
name: ci-demo
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
image: registry.bstein.dev/infra/ci-demo
|
||||||
|
interval: 1m0s
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImagePolicy
|
||||||
|
metadata:
|
||||||
|
name: ci-demo
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
imageRepositoryRef:
|
||||||
|
name: ci-demo
|
||||||
|
filterTags:
|
||||||
|
pattern: '^v(?P<version>0\.0\.0-\d+)$'
|
||||||
|
extract: '$version'
|
||||||
|
policy:
|
||||||
|
semver:
|
||||||
|
range: ">=0.0.0-0"
|
||||||
11
services/ci-demo/kustomization.yaml
Normal file
11
services/ci-demo/kustomization.yaml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# services/ci-demo/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- image.yaml
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
images:
|
||||||
|
- name: registry.bstein.dev/infra/ci-demo
|
||||||
|
newTag: registry.bstein.dev/infra/ci-demo:v0.0.0-3 # {"$imagepolicy": "flux-system:ci-demo"}
|
||||||
6
services/ci-demo/namespace.yaml
Normal file
6
services/ci-demo/namespace.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# services/ci-demo/namespace.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: ci-demo
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user