From 7e46ffc075601924e01626a75c5aeb243ff2888d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 17:46:52 -0300 Subject: [PATCH 01/30] keycloak: add raw manifests backed by shared postgres --- services/keycloak/README.md | 27 +++++++++ services/keycloak/deployment.yaml | 89 ++++++++++++++++++++++++++++ services/keycloak/ingress.yaml | 24 ++++++++ services/keycloak/kustomization.yaml | 10 ++++ services/keycloak/namespace.yaml | 5 ++ services/keycloak/pvc.yaml | 12 ++++ services/keycloak/service.yaml | 15 +++++ 7 files changed, 182 insertions(+) create mode 100644 services/keycloak/README.md create mode 100644 services/keycloak/deployment.yaml create mode 100644 services/keycloak/ingress.yaml create mode 100644 services/keycloak/kustomization.yaml create mode 100644 services/keycloak/namespace.yaml create mode 100644 services/keycloak/pvc.yaml create mode 100644 services/keycloak/service.yaml diff --git a/services/keycloak/README.md b/services/keycloak/README.md new file mode 100644 index 00000000..bf7c21b0 --- /dev/null +++ b/services/keycloak/README.md @@ -0,0 +1,27 @@ +# services/keycloak + +Keycloak is deployed via raw manifests and backed by the shared Postgres (`postgres-service.postgres.svc.cluster.local:5432`). Create these secrets before applying: + +```bash +# DB creds (per-service DB/user in shared Postgres) +kubectl -n sso create secret generic keycloak-db \ + --from-literal=username=keycloak \ + --from-literal=password='' \ + --from-literal=database=keycloak + +# Admin console creds (maps to KC admin user) +kubectl -n sso create secret generic keycloak-admin \ + --from-literal=username=brad@bstein.dev \ + --from-literal=password='' +``` + +Apply: + +```bash +kubectl apply -k services/keycloak +``` + +Notes +- Service: `keycloak.sso.svc:80` (Ingress `sso.bstein.dev`, TLS via cert-manager). +- Uses Postgres schema `public`; DB/user should be provisioned in the shared Postgres instance. +- Health endpoints on :9000 are wired for probes. diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml new file mode 100644 index 00000000..fb70b400 --- /dev/null +++ b/services/keycloak/deployment.yaml @@ -0,0 +1,89 @@ +# services/keycloak/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: keycloak + namespace: sso + labels: + app: keycloak +spec: + replicas: 1 + selector: + matchLabels: + app: keycloak + template: + metadata: + labels: + app: keycloak + spec: + containers: + - name: keycloak + image: quay.io/keycloak/keycloak:26.0.7 + imagePullPolicy: IfNotPresent + args: + - start + - --optimized + env: + - name: KC_DB + value: postgres + - name: KC_DB_URL_HOST + value: postgres-service.postgres.svc.cluster.local + - name: KC_DB_URL_DATABASE + valueFrom: + secretKeyRef: + name: keycloak-db + key: database + - name: KC_DB_USERNAME + valueFrom: + secretKeyRef: + name: keycloak-db + key: username + - name: KC_DB_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-db + key: password + - name: KC_DB_SCHEMA + value: public + - name: KC_HOSTNAME + value: sso.bstein.dev + - name: KC_PROXY + value: edge + - name: KC_HTTP_ENABLED + value: "true" + - name: KEYCLOAK_ADMIN + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + ports: + - containerPort: 8080 + name: http + - containerPort: 9000 + name: metrics + readinessProbe: + httpGet: + path: /health/ready + port: 9000 + initialDelaySeconds: 15 + periodSeconds: 10 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /health/live + port: 9000 + initialDelaySeconds: 60 + periodSeconds: 15 + failureThreshold: 6 + volumeMounts: + - name: data + mountPath: /opt/keycloak/data + volumes: + - name: data + persistentVolumeClaim: + claimName: keycloak-data diff --git a/services/keycloak/ingress.yaml b/services/keycloak/ingress.yaml new file mode 100644 index 00000000..39f6cb02 --- /dev/null +++ b/services/keycloak/ingress.yaml @@ -0,0 +1,24 @@ +# services/keycloak/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: keycloak + namespace: sso + annotations: + cert-manager.io/cluster-issuer: letsencrypt +spec: + ingressClassName: traefik + rules: + - host: sso.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: keycloak + port: + number: 80 + tls: + - hosts: [sso.bstein.dev] + secretName: keycloak-tls diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml new file mode 100644 index 00000000..a65715c9 --- /dev/null +++ b/services/keycloak/kustomization.yaml @@ -0,0 +1,10 @@ +# services/keycloak/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: sso +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + - ingress.yaml diff --git a/services/keycloak/namespace.yaml b/services/keycloak/namespace.yaml new file mode 100644 index 00000000..b4c731d7 --- /dev/null +++ b/services/keycloak/namespace.yaml @@ -0,0 +1,5 @@ +# services/keycloak/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: sso diff --git a/services/keycloak/pvc.yaml b/services/keycloak/pvc.yaml new file mode 100644 index 00000000..b57ec614 --- /dev/null +++ b/services/keycloak/pvc.yaml @@ -0,0 +1,12 @@ +# services/keycloak/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: keycloak-data + namespace: sso +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + storageClassName: astreae diff --git a/services/keycloak/service.yaml b/services/keycloak/service.yaml new file mode 100644 index 00000000..5d93ef63 --- /dev/null +++ b/services/keycloak/service.yaml @@ -0,0 +1,15 @@ +# services/keycloak/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: keycloak + namespace: sso + labels: + app: keycloak +spec: + selector: + app: keycloak + ports: + - name: http + port: 80 + targetPort: http From 46b6d471eb1b4317b2b9648da8cb49f52ea055a1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 18:00:49 -0300 Subject: [PATCH 02/30] flux: track feature/sso --- clusters/atlas/flux-system/gotk-sync.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 46f65d33..4076ef64 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -8,7 +8,7 @@ metadata: spec: interval: 1m0s ref: - branch: feature/atlas-monitoring + branch: feature/sso secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git From 7210c0784d25dc7c368a793d37d637c3bd608738 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 18:10:20 -0300 Subject: [PATCH 03/30] flux: add keycloak kustomization --- .../applications/keycloak/kustomization.yaml | 15 +++++++++++++++ .../flux-system/applications/kustomization.yaml | 1 + 2 files changed, 16 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/keycloak/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml b/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml new file mode 100644 index 00000000..4634b5c9 --- /dev/null +++ b/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml @@ -0,0 +1,15 @@ +# clusters/atlas/flux-system/applications/keycloak/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: keycloak + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/keycloak + targetNamespace: sso + timeout: 2m diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 7d2f8eeb..6b5124e9 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -13,3 +13,4 @@ resources: - jellyfin/kustomization.yaml - xmr-miner/kustomization.yaml - sui-metrics/kustomization.yaml + - keycloak/kustomization.yaml From 2906e3e5d9976ecab213379e9dbd88078f3686a2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 20:28:35 -0300 Subject: [PATCH 04/30] monitoring: show GPU share over dashboard range --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-gpu.json | 2 +- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-gpu.yaml | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 93de0068..f577eab3 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -232,7 +232,7 @@ NAMESPACE_GPU_ALLOC = ( ' or kube_pod_container_resource_limits{namespace!="",resource="nvidia.com/gpu"})) by (namespace)' ) NAMESPACE_GPU_USAGE_SHARE = ( - 'sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))' + 'sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))' ) NAMESPACE_GPU_USAGE_INSTANT = 'sum(DCGM_FI_DEV_GPU_UTIL{namespace!="",pod!=""}) by (namespace)' NAMESPACE_GPU_RAW = ( diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index e67b3d29..9071b0a2 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 9eda81d3..beb676e1 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -975,7 +975,7 @@ }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 3af87176..b5c2c184 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 928098e8..ef17ebf7 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -984,7 +984,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } From 1e8de601989516572412864560d50d72e7d5532a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 3 Dec 2025 12:28:45 -0300 Subject: [PATCH 05/30] notes: capture GPU share change and flux branch --- AGENTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AGENTS.md b/AGENTS.md index a8d49c8a..611ed066 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -46,6 +46,7 @@ Repository Guidelines - Atlas Overview is the Grafana home (1h range, 1m refresh), Overview folder UID `overview`, internal folder `atlas-internal` (oceanus-internal stub). - Panels standardized via generator; hottest row compressed, worker/control rows taller, root disk row taller and top12 bar gauge with labels. GPU share pie uses 1h avg_over_time to persist idle activity. - Internal dashboards are provisioned without Viewer role; if anonymous still sees them, restart Grafana and tighten auth if needed. +- GPU share panel updated (feature/sso) to use `max_over_time(…[$__range])`, so longer ranges (e.g., 12h) keep recent activity visible. Flux tracking `feature/sso`. ## Upcoming priorities (SSO/storage/mail) - Establish SSO (Keycloak or similar) and federate Grafana, Gitea, Zot, Nextcloud, Pegasus/Jellyfin; keep Vaultwarden separate until safe. From b9202b6829de6de134ae81db93e83474ff7010c4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:43:17 -0300 Subject: [PATCH 06/30] chore: drop AGENTS.md from repo --- AGENTS.md | 69 ------------------------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 611ed066..00000000 --- a/AGENTS.md +++ /dev/null @@ -1,69 +0,0 @@ - - -Repository Guidelines - -## Project Structure & Module Organization -- `infrastructure/`: cluster-scoped building blocks (core, flux-system, traefik, longhorn). Add new platform features by mirroring this layout. -- `services/`: workload manifests per app (`services/gitea/`, etc.) with `kustomization.yaml` plus one file per kind; keep diffs small and focused. -- `dockerfiles/` hosts bespoke images, while `scripts/` stores operational Fish/Bash helpers—extend these directories instead of relying on ad-hoc commands. - -## Build, Test, and Development Commands -- `kustomize build services/` (or `kubectl kustomize ...`) renders manifests exactly as Flux will. -- `kubectl apply --server-side --dry-run=client -k services/` checks schema compatibility without touching the cluster. -- `flux reconcile kustomization --namespace flux-system --with-source` pulls the latest Git state after merges or hotfixes. -- `fish scripts/flux_hammer.fish --help` explains the recovery tool; read it before running against production workloads. - -## Coding Style & Naming Conventions -- YAML uses two-space indents; retain the leading path comment (e.g. `# services/gitea/deployment.yaml`) to speed code review. -- Keep resource names lowercase kebab-case, align labels/selectors, and mirror namespaces with directory names. -- List resources in `kustomization.yaml` from namespace/config, through storage, then workloads and networking for predictable diffs. -- Scripts start with `#!/usr/bin/env fish` or bash, stay executable, and follow snake_case names such as `flux_hammer.fish`. - -## Testing Guidelines -- Run `kustomize build` and the dry-run apply for every service you touch; capture failures before opening a PR. -- `flux diff kustomization --path services/` previews reconciliations—link notable output when behavior shifts. -- Docker edits: `docker build -f dockerfiles/Dockerfile.monerod .` (swap the file you changed) to verify image builds. - -## Commit & Pull Request Guidelines -- Keep commit subjects short, present-tense, and optionally scoped (`gpu(titan-24): add RuntimeClass`); squash fixups before review. -- Describe linked issues, affected services, and required operator steps (e.g. `flux reconcile kustomization services-gitea`) in the PR body. -- Focus each PR on one kustomization or service and update `infrastructure/flux-system` when Flux must track new folders. -- Record the validation you ran (dry-runs, diffs, builds) and add screenshots only when ingress or UI behavior changes. - -## Security & Configuration Tips -- Never commit credentials; use Vault workflows (`services/vault/`) or SOPS-encrypted manifests wired through `infrastructure/flux-system`. -- Node selectors and tolerations gate workloads to hardware like `hardware: rpi4`; confirm labels before scaling or renaming nodes. -- Pin external images by digest or rely on Flux image automation to follow approved tags and avoid drift. - -## Dashboard roadmap / context (2025-12-02) -- Atlas dashboards are generated via `scripts/dashboards_render_atlas.py --build`, which writes JSON under `services/monitoring/dashboards/` and ConfigMaps under `services/monitoring/`. Keep the Grafana manifests in sync by regenerating after edits. -- Atlas Overview panels are paired with internal dashboards (pods, nodes, storage, network, GPU). A new `atlas-gpu` internal dashboard holds the detailed GPU metrics that feed the overview share pie. -- Old Grafana folders (`Atlas Storage`, `Atlas SRE`, `Atlas Public`, `Atlas Nodes`) should be removed in Grafana UI when convenient; only `Atlas Overview` and `Atlas Internal` should remain provisioned. -- Future work: add a separate generator (e.g., `dashboards_render_oceanus.py`) for SUI/oceanus validation dashboards, mirroring the atlas pattern of internal dashboards feeding a public overview. - -## Monitoring state (2025-12-03) -- dcgm-exporter DaemonSet pulls `registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04` with nvidia runtime/imagePullSecret; titan-24 exports metrics, titan-22 remains NotReady. -- Atlas Overview is the Grafana home (1h range, 1m refresh), Overview folder UID `overview`, internal folder `atlas-internal` (oceanus-internal stub). -- Panels standardized via generator; hottest row compressed, worker/control rows taller, root disk row taller and top12 bar gauge with labels. GPU share pie uses 1h avg_over_time to persist idle activity. -- Internal dashboards are provisioned without Viewer role; if anonymous still sees them, restart Grafana and tighten auth if needed. -- GPU share panel updated (feature/sso) to use `max_over_time(…[$__range])`, so longer ranges (e.g., 12h) keep recent activity visible. Flux tracking `feature/sso`. - -## Upcoming priorities (SSO/storage/mail) -- Establish SSO (Keycloak or similar) and federate Grafana, Gitea, Zot, Nextcloud, Pegasus/Jellyfin; keep Vaultwarden separate until safe. -- Add Nextcloud (limit to rpi5 workers) with office suite; integrate with SSO; plan storage class and ingress. -- Plan mail: mostly self-hosted, relay through trusted provider for outbound; integrate with services (Nextcloud, Vaultwarden, etc.) for notifications and account flows. - -## SSO plan sketch (2025-12-03) -- IdP: use Keycloak (preferred) in a new `sso` namespace, Bitnami or codecentric chart with Postgres backing store (single PVC), ingress `sso.bstein.dev`, admin user bound to brad@bstein.dev; stick with local DB initially (no external IdP). -- Auth flow goals: Grafana (OIDC), Gitea (OAuth2/Keycloak), Zot (via Traefik forward-auth/oauth2-proxy), Jellyfin/Pegasus via Jellyfin OAuth/OpenID plugin (map existing usernames; run migration to pre-create users in Keycloak with same usernames/emails and temporary passwords), Pegasus keeps using Jellyfin tokens. -- Steps to implement: - 1) Add service folder `services/keycloak/` (namespace, PVC, HelmRelease, ingress, secret for admin creds). Verify with kustomize + Flux reconcile. - 2) Seed realm `atlas` with users (import CSV/realm). Create client for Grafana (public/implicit), Gitea (confidential), and a “jellyfin” client for the OAuth plugin; set email for brad@bstein.dev as admin. - 3) Reconfigure Grafana to OIDC (disable anonymous to internal folders, leave Overview public via folder permissions). Reconfigure Gitea to OIDC (app.ini). - 4) Add Traefik forward-auth (oauth2-proxy) in front of Zot and any other services needing headers-based auth. - 5) Deploy Jellyfin OpenID plugin; map Keycloak users to existing Jellyfin usernames; communicate password reset path. -- Migration caution: do not delete existing local creds until SSO validated; keep Pegasus working via Jellyfin tokens during transition. - -## Postgres centralization (2025-12-03) -- Prefer a shared in-cluster Postgres deployment with per-service databases to reduce resource sprawl on Pi nodes. Use it for services that can easily point at an external DB. -- Candidates to migrate to shared Postgres: Keycloak (realm DB), Gitea (git DB), Nextcloud (app DB), possibly Grafana (if persistence needed beyond current provisioner), Jitsi prosody/JVB state (if external DB supported). Keep tightly-coupled or lightweight embedded DBs as-is when migration is painful or not supported. From 65d898627980d8a382f8019b58e599977d23bd32 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:43:24 -0300 Subject: [PATCH 07/30] keycloak: remove optimized flag for first start --- services/keycloak/deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index fb70b400..22c795d0 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -22,7 +22,6 @@ spec: imagePullPolicy: IfNotPresent args: - start - - --optimized env: - name: KC_DB value: postgres From b1b39c4dcd71e6361fe2d7e6094946bff4763590 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:49:17 -0300 Subject: [PATCH 08/30] keycloak: set fsGroup for data volume --- services/keycloak/deployment.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 22c795d0..3beec0a2 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -16,6 +16,11 @@ spec: labels: app: keycloak spec: + securityContext: + runAsUser: 1000 + runAsGroup: 0 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch containers: - name: keycloak image: quay.io/keycloak/keycloak:26.0.7 From 9f5e61ebeda96106e53c7667d45858e161f0d20a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:51:47 -0300 Subject: [PATCH 09/30] keycloak: enable health/metrics management port --- services/keycloak/deployment.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 3beec0a2..ca44a32d 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -55,6 +55,14 @@ spec: value: edge - name: KC_HTTP_ENABLED value: "true" + - name: KC_HTTP_MANAGEMENT_PORT + value: "9000" + - name: KC_HTTP_MANAGEMENT_BIND_ADDRESS + value: 0.0.0.0 + - name: KC_HEALTH_ENABLED + value: "true" + - name: KC_METRICS_ENABLED + value: "true" - name: KEYCLOAK_ADMIN valueFrom: secretKeyRef: From 127d09755eac4822e85c7d8ad6b5bc2c6a615c1e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:23:07 -0300 Subject: [PATCH 10/30] keycloak: honor xforwarded headers and hostname url --- services/keycloak/deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index ca44a32d..f5409fcb 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -51,8 +51,12 @@ spec: value: public - name: KC_HOSTNAME value: sso.bstein.dev + - name: KC_HOSTNAME_URL + value: https://sso.bstein.dev - name: KC_PROXY value: edge + - name: KC_PROXY_HEADERS + value: xforwarded - name: KC_HTTP_ENABLED value: "true" - name: KC_HTTP_MANAGEMENT_PORT From f2d496c6c0d6e40a4afc3749930801db13b6e7e4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:36:33 -0300 Subject: [PATCH 11/30] keycloak: prefer rpi nodes, avoid titan-24 --- services/keycloak/deployment.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index f5409fcb..2fb4bbb6 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -16,6 +16,27 @@ spec: labels: app: keycloak spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + - weight: 50 + preference: + matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: ["titan-24"] securityContext: runAsUser: 1000 runAsGroup: 0 From 2122ce3e31a8142447338fc805e2bda8ae1b93dd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:40:24 -0300 Subject: [PATCH 12/30] keycloak: require rpi nodes with titan-24 fallback --- services/keycloak/deployment.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 2fb4bbb6..406364d5 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -18,6 +18,16 @@ spec: spec: affinity: nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: ["titan-24"] preferredDuringSchedulingIgnoredDuringExecution: - weight: 90 preference: @@ -31,12 +41,6 @@ spec: - key: hardware operator: In values: ["rpi4"] - - weight: 50 - preference: - matchExpressions: - - key: kubernetes.io/hostname - operator: NotIn - values: ["titan-24"] securityContext: runAsUser: 1000 runAsGroup: 0 From de727eee07d60ca7e50a3fe058de8aa2ebd2701c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:44:23 -0300 Subject: [PATCH 13/30] keycloak: restrict to worker rpis with titan-24 fallback --- services/keycloak/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 406364d5..af7839f0 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -24,6 +24,8 @@ spec: - key: hardware operator: In values: ["rpi5","rpi4"] + - key: node-role.kubernetes.io/worker + operator: Exists - matchExpressions: - key: kubernetes.io/hostname operator: In From c7b73555c4956917c0c5adf59c3b0ac173083971 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 14:42:24 -0300 Subject: [PATCH 14/30] add oauth2-proxy for SSO forward-auth --- .../applications/kustomization.yaml | 1 + .../oauth2-proxy/kustomization.yaml | 15 ++++ services/oauth2-proxy/deployment.yaml | 71 +++++++++++++++++++ services/oauth2-proxy/ingress.yaml | 24 +++++++ services/oauth2-proxy/kustomization.yaml | 9 +++ services/oauth2-proxy/middleware.yaml | 15 ++++ services/oauth2-proxy/service.yaml | 15 ++++ 7 files changed, 150 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml create mode 100644 services/oauth2-proxy/deployment.yaml create mode 100644 services/oauth2-proxy/ingress.yaml create mode 100644 services/oauth2-proxy/kustomization.yaml create mode 100644 services/oauth2-proxy/middleware.yaml create mode 100644 services/oauth2-proxy/service.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 6b5124e9..1bc2700c 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -14,3 +14,4 @@ resources: - xmr-miner/kustomization.yaml - sui-metrics/kustomization.yaml - keycloak/kustomization.yaml + - oauth2-proxy/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml b/clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml new file mode 100644 index 00000000..187572de --- /dev/null +++ b/clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml @@ -0,0 +1,15 @@ +# clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: oauth2-proxy + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/oauth2-proxy + targetNamespace: sso + timeout: 2m diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml new file mode 100644 index 00000000..8754cb53 --- /dev/null +++ b/services/oauth2-proxy/deployment.yaml @@ -0,0 +1,71 @@ +# services/oauth2-proxy/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy + namespace: sso + labels: + app: oauth2-proxy +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy + template: + metadata: + labels: + app: oauth2-proxy + spec: + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + imagePullPolicy: IfNotPresent + args: + - --provider=oidc + - --redirect-url=https://auth.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --upstream=static://200 + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 diff --git a/services/oauth2-proxy/ingress.yaml b/services/oauth2-proxy/ingress.yaml new file mode 100644 index 00000000..96d16ea1 --- /dev/null +++ b/services/oauth2-proxy/ingress.yaml @@ -0,0 +1,24 @@ +# services/oauth2-proxy/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: oauth2-proxy + namespace: sso + annotations: + cert-manager.io/cluster-issuer: letsencrypt +spec: + ingressClassName: traefik + rules: + - host: auth.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: oauth2-proxy + port: + number: 80 + tls: + - hosts: [auth.bstein.dev] + secretName: auth-tls diff --git a/services/oauth2-proxy/kustomization.yaml b/services/oauth2-proxy/kustomization.yaml new file mode 100644 index 00000000..e79ae66d --- /dev/null +++ b/services/oauth2-proxy/kustomization.yaml @@ -0,0 +1,9 @@ +# services/oauth2-proxy/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: sso +resources: + - deployment.yaml + - service.yaml + - ingress.yaml + - middleware.yaml diff --git a/services/oauth2-proxy/middleware.yaml b/services/oauth2-proxy/middleware.yaml new file mode 100644 index 00000000..db5f3a40 --- /dev/null +++ b/services/oauth2-proxy/middleware.yaml @@ -0,0 +1,15 @@ +# services/oauth2-proxy/middleware.yaml +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: oauth2-proxy-forward-auth + namespace: sso +spec: + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups diff --git a/services/oauth2-proxy/service.yaml b/services/oauth2-proxy/service.yaml new file mode 100644 index 00000000..1eb5481e --- /dev/null +++ b/services/oauth2-proxy/service.yaml @@ -0,0 +1,15 @@ +# services/oauth2-proxy/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy + namespace: sso + labels: + app: oauth2-proxy +spec: + selector: + app: oauth2-proxy + ports: + - name: http + port: 80 + targetPort: 4180 From 7525289a0cfec1d862db9c3bdfb75f1b137a18e2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 02:01:21 -0300 Subject: [PATCH 15/30] auth: wire oauth2-proxy and enable grafana oidc --- .../longhorn/ui-ingress/ingress.yaml | 2 +- services/monitoring/helmrelease.yaml | 22 +++++++++++++++++-- services/oauth2-proxy/deployment.yaml | 1 + services/vault/ingress.yaml | 2 +- services/zot/ingress.yaml | 2 +- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index 6250cfaf..e9905ba6 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-basicauth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 2546dc1e..d7d75799 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -249,9 +249,27 @@ spec: service: type: ClusterIP env: - GF_AUTH_ANONYMOUS_ENABLED: "true" - GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer + GF_AUTH_ANONYMOUS_ENABLED: "false" GF_SECURITY_ALLOW_EMBEDDING: "true" + GF_AUTH_GENERIC_OAUTH_ENABLED: "true" + GF_AUTH_GENERIC_OAUTH_NAME: "Keycloak" + GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP: "true" + GF_AUTH_GENERIC_OAUTH_SCOPES: "openid profile email groups" + GF_AUTH_GENERIC_OAUTH_AUTH_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth" + GF_AUTH_GENERIC_OAUTH_TOKEN_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token" + GF_AUTH_GENERIC_OAUTH_API_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo" + GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: "contains(groups, 'admin') && 'Admin' || 'Viewer'" + GF_AUTH_GENERIC_OAUTH_TLS_SKIP_VERIFY_INSECURE: "false" + GF_AUTH_SIGNOUT_REDIRECT_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://metrics.bstein.dev/" + envValueFrom: + GF_AUTH_GENERIC_OAUTH_CLIENT_ID: + secretKeyRef: + name: grafana-oidc + key: client_id + GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: + secretKeyRef: + name: grafana-oidc + key: client_secret grafana.ini: server: domain: metrics.bstein.dev diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 8754cb53..1457c52a 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -25,6 +25,7 @@ spec: - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --scope=openid profile email groups + - --allowed-group=admin - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 306556da..fa43bc27 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-basicauth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 3425535c..10a07436 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: From 571bf759a2bca14bba454621b96e356c7207fc79 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 10:25:44 -0300 Subject: [PATCH 16/30] auth: add namespace-local forward-auth middlewares --- infrastructure/longhorn/ui-ingress/ingress.yaml | 2 +- .../longhorn/ui-ingress/middleware.yaml | 17 +++++++++++++++++ services/vault/ingress.yaml | 2 +- services/vault/middleware.yaml | 12 +++++++++--- services/zot/ingress.yaml | 2 +- services/zot/middleware.yaml | 17 +++++++++++++++++ 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index e9905ba6..8f55b825 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index c670cef4..abc2a643 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -20,3 +20,20 @@ spec: headers: customRequestHeaders: X-Forwarded-Proto: "https" + +--- + +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: longhorn-forward-auth + namespace: longhorn-system +spec: + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index fa43bc27..6115e386 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 0a419612..0f4388e0 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -2,8 +2,14 @@ apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: - name: vault-basicauth + name: vault-forward-auth namespace: vault spec: - basicAuth: - secret: vault-basic-auth + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 10a07436..75ec998d 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index 166b070d..a7a294d2 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -24,3 +24,20 @@ spec: - PUT - PATCH - DELETE + +--- + +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: zot-forward-auth + namespace: zot +spec: + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups From 1de9d94138b8c60703c72aba6dc54b1be47e9f92 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 10:42:13 -0300 Subject: [PATCH 17/30] oauth2-proxy: temporarily drop group restriction --- services/oauth2-proxy/deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 1457c52a..8754cb53 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -25,7 +25,6 @@ spec: - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --scope=openid profile email groups - - --allowed-group=admin - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true From ec1d33f1ca493ed46cedccfe0c72d1c821e617dd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 11:09:09 -0300 Subject: [PATCH 18/30] auth: point forward-auth to external auth host --- infrastructure/longhorn/ui-ingress/middleware.yaml | 2 +- services/vault/middleware.yaml | 2 +- services/zot/middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index abc2a643..3bf2ff5c 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -30,7 +30,7 @@ metadata: namespace: longhorn-system spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 0f4388e0..8a39bf96 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -6,7 +6,7 @@ metadata: namespace: vault spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index a7a294d2..cc76d5f7 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -34,7 +34,7 @@ metadata: namespace: zot spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization From 876ec19543d84ba0a562f607e4b8bf05173289ed Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 11:14:25 -0300 Subject: [PATCH 19/30] auth: add 401 redirect middleware to oauth2-proxy --- infrastructure/longhorn/ui-ingress/ingress.yaml | 2 +- services/oauth2-proxy/kustomization.yaml | 1 + services/oauth2-proxy/middleware-errors.yaml | 14 ++++++++++++++ services/vault/ingress.yaml | 2 +- services/zot/ingress.yaml | 2 +- 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 services/oauth2-proxy/middleware-errors.yaml diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index 8f55b825..ac684718 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/oauth2-proxy/kustomization.yaml b/services/oauth2-proxy/kustomization.yaml index e79ae66d..ff4705a0 100644 --- a/services/oauth2-proxy/kustomization.yaml +++ b/services/oauth2-proxy/kustomization.yaml @@ -7,3 +7,4 @@ resources: - service.yaml - ingress.yaml - middleware.yaml + - middleware-errors.yaml diff --git a/services/oauth2-proxy/middleware-errors.yaml b/services/oauth2-proxy/middleware-errors.yaml new file mode 100644 index 00000000..ee0c7864 --- /dev/null +++ b/services/oauth2-proxy/middleware-errors.yaml @@ -0,0 +1,14 @@ +# services/oauth2-proxy/middleware-errors.yaml +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: oauth2-proxy-errors + namespace: sso +spec: + errors: + status: + - "401" + service: + name: oauth2-proxy + port: 80 + query: /oauth2/start?rd={url} diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 6115e386..1c274fbb 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 75ec998d..1d9307a0 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: From 84aa870cdaf9f0e302421d6551e6b8fbbd7aeee3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 11:25:29 -0300 Subject: [PATCH 20/30] auth: use internal oauth2-proxy svc for forward-auth --- infrastructure/longhorn/ui-ingress/middleware.yaml | 2 +- services/vault/middleware.yaml | 2 +- services/zot/middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index 3bf2ff5c..abc2a643 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -30,7 +30,7 @@ metadata: namespace: longhorn-system spec: forwardAuth: - address: https://auth.bstein.dev/oauth2/auth + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 8a39bf96..0f4388e0 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -6,7 +6,7 @@ metadata: namespace: vault spec: forwardAuth: - address: https://auth.bstein.dev/oauth2/auth + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index cc76d5f7..a7a294d2 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -34,7 +34,7 @@ metadata: namespace: zot spec: forwardAuth: - address: https://auth.bstein.dev/oauth2/auth + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization From 96a8d271a970c79bf67dc8e5a8850d9ff504d4c7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 12:03:14 -0300 Subject: [PATCH 21/30] oauth2-proxy: ensure error middleware on auth ingress --- services/oauth2-proxy/ingress.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/oauth2-proxy/ingress.yaml b/services/oauth2-proxy/ingress.yaml index 96d16ea1..0f5830cb 100644 --- a/services/oauth2-proxy/ingress.yaml +++ b/services/oauth2-proxy/ingress.yaml @@ -6,6 +6,7 @@ metadata: namespace: sso annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-errors@kubernetescrd spec: ingressClassName: traefik rules: From 84e4dc06169ca167a61502b0da190a244e21efd9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 12:49:38 -0300 Subject: [PATCH 22/30] oauth2-proxy: schedule on worker rpis --- services/oauth2-proxy/deployment.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 8754cb53..7c22a937 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -16,6 +16,17 @@ spec: labels: app: oauth2-proxy spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] containers: - name: oauth2-proxy image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 From 088fed6720214aa69657f52ca8e14096c001b692 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 13:03:29 -0300 Subject: [PATCH 23/30] auth: forward-auth via external auth host (svc traffic flaky) --- infrastructure/longhorn/ui-ingress/middleware.yaml | 2 +- services/vault/middleware.yaml | 2 +- services/zot/middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index abc2a643..3bf2ff5c 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -30,7 +30,7 @@ metadata: namespace: longhorn-system spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 0f4388e0..8a39bf96 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -6,7 +6,7 @@ metadata: namespace: vault spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index a7a294d2..cc76d5f7 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -34,7 +34,7 @@ metadata: namespace: zot spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization From 7ae8bf9705e6c24d269c699978c740692bb3ffea Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 13:09:29 -0300 Subject: [PATCH 24/30] oauth2-proxy: drop groups scope to avoid invalid_scope --- services/oauth2-proxy/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 7c22a937..03d30c16 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -35,7 +35,7 @@ spec: - --provider=oidc - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email groups + - --scope=openid profile email - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true From e44def25f85256f6a13eca8e595ca76284c164d4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 13:19:45 -0300 Subject: [PATCH 25/30] auth: remove error middleware to allow redirect --- infrastructure/longhorn/ui-ingress/ingress.yaml | 2 +- services/vault/ingress.yaml | 2 +- services/zot/ingress.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index ac684718..8f55b825 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 1c274fbb..6115e386 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 1d9307a0..75ec998d 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: From 88db462f8fac90fb43b02b181cf2ed2677d04a52 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 19:44:02 -0300 Subject: [PATCH 26/30] longhorn/vault: gate via oauth2-proxy --- .../longhorn/ui-ingress/ingress.yaml | 4 +- .../longhorn/ui-ingress/kustomization.yaml | 1 + .../ui-ingress/oauth2-proxy-longhorn.yaml | 102 ++++++++++++++++++ services/oauth2-proxy/deployment.yaml | 2 +- services/oauth2-proxy/middleware-errors.yaml | 1 + services/vault/ingress.yaml | 8 +- services/vault/kustomization.yaml | 1 + services/vault/oauth2-proxy-vault.yaml | 102 ++++++++++++++++++ 8 files changed, 213 insertions(+), 8 deletions(-) create mode 100644 infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml create mode 100644 services/vault/oauth2-proxy-vault.yaml diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index 8f55b825..94daeeda 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: "" spec: ingressClassName: traefik tls: @@ -21,6 +21,6 @@ spec: pathType: Prefix backend: service: - name: longhorn-frontend + name: oauth2-proxy-longhorn port: number: 80 diff --git a/infrastructure/longhorn/ui-ingress/kustomization.yaml b/infrastructure/longhorn/ui-ingress/kustomization.yaml index 1d497dc1..a2ae5f37 100644 --- a/infrastructure/longhorn/ui-ingress/kustomization.yaml +++ b/infrastructure/longhorn/ui-ingress/kustomization.yaml @@ -4,3 +4,4 @@ kind: Kustomization resources: - middleware.yaml - ingress.yaml + - oauth2-proxy-longhorn.yaml diff --git a/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml new file mode 100644 index 00000000..b8d4f349 --- /dev/null +++ b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml @@ -0,0 +1,102 @@ +# infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy-longhorn + namespace: longhorn-system + labels: + app: oauth2-proxy-longhorn +spec: + ports: + - name: http + port: 80 + targetPort: 4180 + selector: + app: oauth2-proxy-longhorn + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy-longhorn + namespace: longhorn-system + labels: + app: oauth2-proxy-longhorn +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy-longhorn + template: + metadata: + labels: + app: oauth2-proxy-longhorn + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + imagePullPolicy: IfNotPresent + args: + - --provider=oidc + - --redirect-url=https://longhorn.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --allowed-group=admin + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --insecure-oidc-allow-unverified-email=true + - --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + - --cookie-domain=longhorn.bstein.dev + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-longhorn-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-longhorn-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-longhorn-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 03d30c16..7c22a937 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -35,7 +35,7 @@ spec: - --provider=oidc - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email + - --scope=openid profile email groups - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true diff --git a/services/oauth2-proxy/middleware-errors.yaml b/services/oauth2-proxy/middleware-errors.yaml index ee0c7864..55e092a7 100644 --- a/services/oauth2-proxy/middleware-errors.yaml +++ b/services/oauth2-proxy/middleware-errors.yaml @@ -8,6 +8,7 @@ spec: errors: status: - "401" + - "403" service: name: oauth2-proxy port: 80 diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 6115e386..d61d4bc3 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,9 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd - traefik.ingress.kubernetes.io/service.serversscheme: https - traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: "" spec: ingressClassName: traefik tls: @@ -23,6 +21,6 @@ spec: pathType: Prefix backend: service: - name: vault-ui + name: oauth2-proxy-vault port: - number: 8200 + number: 80 diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 4c3fbc5e..4c0f07e6 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -9,3 +9,4 @@ resources: - ingress.yaml - middleware.yaml - serverstransport.yaml + - oauth2-proxy-vault.yaml diff --git a/services/vault/oauth2-proxy-vault.yaml b/services/vault/oauth2-proxy-vault.yaml new file mode 100644 index 00000000..e79a142e --- /dev/null +++ b/services/vault/oauth2-proxy-vault.yaml @@ -0,0 +1,102 @@ +# services/vault/oauth2-proxy-vault.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy-vault + labels: + app: oauth2-proxy-vault +spec: + ports: + - name: http + port: 80 + targetPort: 4180 + selector: + app: oauth2-proxy-vault + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy-vault + labels: + app: oauth2-proxy-vault +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy-vault + template: + metadata: + labels: + app: oauth2-proxy-vault + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 80 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + - arm + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + args: + - --provider=oidc + - --redirect-url=https://secret.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --insecure-oidc-allow-unverified-email=true + - --upstream=https://vault-ui.vault.svc.cluster.local:8200 + - --ssl-insecure-skip-verify=true + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + - --allowed-group=admin + - --cookie-domain=secret.bstein.dev + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-vault-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-vault-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-vault-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 From 3852ebc0f1b73e2c9741f51a152bcc46a2a63ecf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:04:19 -0300 Subject: [PATCH 27/30] zot,vault: remove oauth2-proxy sso --- services/vault/ingress.yaml | 8 +- services/vault/kustomization.yaml | 1 - services/vault/middleware.yaml | 12 +-- services/vault/oauth2-proxy-vault.yaml | 102 ------------------------- services/zot/ingress.yaml | 2 +- services/zot/middleware.yaml | 17 ----- 6 files changed, 9 insertions(+), 133 deletions(-) delete mode 100644 services/vault/oauth2-proxy-vault.yaml diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index d61d4bc3..306556da 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,9 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: "" + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-basicauth@kubernetescrd + traefik.ingress.kubernetes.io/service.serversscheme: https + traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: @@ -21,6 +23,6 @@ spec: pathType: Prefix backend: service: - name: oauth2-proxy-vault + name: vault-ui port: - number: 80 + number: 8200 diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 4c0f07e6..4c3fbc5e 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -9,4 +9,3 @@ resources: - ingress.yaml - middleware.yaml - serverstransport.yaml - - oauth2-proxy-vault.yaml diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 8a39bf96..0a419612 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -2,14 +2,8 @@ apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: - name: vault-forward-auth + name: vault-basicauth namespace: vault spec: - forwardAuth: - address: https://auth.bstein.dev/oauth2/auth - trustForwardHeader: true - authResponseHeaders: - - Authorization - - X-Auth-Request-Email - - X-Auth-Request-User - - X-Auth-Request-Groups + basicAuth: + secret: vault-basic-auth diff --git a/services/vault/oauth2-proxy-vault.yaml b/services/vault/oauth2-proxy-vault.yaml deleted file mode 100644 index e79a142e..00000000 --- a/services/vault/oauth2-proxy-vault.yaml +++ /dev/null @@ -1,102 +0,0 @@ -# services/vault/oauth2-proxy-vault.yaml -apiVersion: v1 -kind: Service -metadata: - name: oauth2-proxy-vault - labels: - app: oauth2-proxy-vault -spec: - ports: - - name: http - port: 80 - targetPort: 4180 - selector: - app: oauth2-proxy-vault - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: oauth2-proxy-vault - labels: - app: oauth2-proxy-vault -spec: - replicas: 2 - selector: - matchLabels: - app: oauth2-proxy-vault - template: - metadata: - labels: - app: oauth2-proxy-vault - spec: - nodeSelector: - node-role.kubernetes.io/worker: "true" - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 80 - preference: - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - arm - containers: - - name: oauth2-proxy - image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 - args: - - --provider=oidc - - --redirect-url=https://secret.bstein.dev/oauth2/callback - - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email groups - - --email-domain=* - - --set-xauthrequest=true - - --pass-access-token=true - - --set-authorization-header=true - - --cookie-secure=true - - --cookie-samesite=lax - - --cookie-refresh=20m - - --cookie-expire=168h - - --insecure-oidc-allow-unverified-email=true - - --upstream=https://vault-ui.vault.svc.cluster.local:8200 - - --ssl-insecure-skip-verify=true - - --http-address=0.0.0.0:4180 - - --skip-provider-button=true - - --skip-jwt-bearer-tokens=true - - --oidc-groups-claim=groups - - --allowed-group=admin - - --cookie-domain=secret.bstein.dev - env: - - name: OAUTH2_PROXY_CLIENT_ID - valueFrom: - secretKeyRef: - name: oauth2-proxy-vault-oidc - key: client_id - - name: OAUTH2_PROXY_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-vault-oidc - key: client_secret - - name: OAUTH2_PROXY_COOKIE_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-vault-oidc - key: cookie_secret - ports: - - containerPort: 4180 - name: http - readinessProbe: - httpGet: - path: /ping - port: 4180 - initialDelaySeconds: 5 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /ping - port: 4180 - initialDelaySeconds: 20 - periodSeconds: 20 diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 75ec998d..3425535c 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index cc76d5f7..166b070d 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -24,20 +24,3 @@ spec: - PUT - PATCH - DELETE - ---- - -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: zot-forward-auth - namespace: zot -spec: - forwardAuth: - address: https://auth.bstein.dev/oauth2/auth - trustForwardHeader: true - authResponseHeaders: - - Authorization - - X-Auth-Request-Email - - X-Auth-Request-User - - X-Auth-Request-Groups From efd258fc7140d1d8724bef42d49016d653731836 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:09:05 -0300 Subject: [PATCH 28/30] vault: drop traefik basicauth --- services/vault/ingress.yaml | 1 - services/vault/kustomization.yaml | 1 - services/vault/middleware.yaml | 9 --------- 3 files changed, 11 deletions(-) delete mode 100644 services/vault/middleware.yaml diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 306556da..91d9ca4b 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,6 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-basicauth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 4c3fbc5e..1d7af87e 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -7,5 +7,4 @@ resources: - helmrelease.yaml - certificate.yaml - ingress.yaml - - middleware.yaml - serverstransport.yaml diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml deleted file mode 100644 index 0a419612..00000000 --- a/services/vault/middleware.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# services/vault/middleware.yaml -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: vault-basicauth - namespace: vault -spec: - basicAuth: - secret: vault-basic-auth From d5a526c5fa6937d5cf1498f0a07034682f6cc258 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:22:16 -0300 Subject: [PATCH 29/30] zot: revert to unauthenticated registry --- services/zot/configmap.yaml | 20 +------------------- services/zot/deployment.yaml | 7 ------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/services/zot/configmap.yaml b/services/zot/configmap.yaml index 0261fc19..d4fe5444 100644 --- a/services/zot/configmap.yaml +++ b/services/zot/configmap.yaml @@ -18,25 +18,7 @@ data: "address": "0.0.0.0", "port": "5000", "realm": "zot-registry", - "compat": ["docker2s2"], - "auth": { - "htpasswd": { "path": "/etc/zot/htpasswd" } - }, - "accessControl": { - "repositories": { - "**": { - "policies": [ - { "users": ["bstein"], "actions": ["read", "create", "update", "delete"] } - ], - "defaultPolicy": [], - "anonymousPolicy": [] - } - }, - "adminPolicy": { - "users": ["bstein"], - "actions": ["read", "create", "update", "delete"] - } - } + "compat": ["docker2s2"] }, "log": { "level": "info" }, "extensions": { diff --git a/services/zot/deployment.yaml b/services/zot/deployment.yaml index e4fdc1f5..45fca5e8 100644 --- a/services/zot/deployment.yaml +++ b/services/zot/deployment.yaml @@ -42,10 +42,6 @@ spec: mountPath: /etc/zot/config.json subPath: config.json readOnly: true - - name: htpasswd - mountPath: /etc/zot/htpasswd - subPath: htpasswd - readOnly: true - name: zot-data mountPath: /var/lib/registry readinessProbe: @@ -64,9 +60,6 @@ spec: - name: cfg configMap: name: zot-config - - name: htpasswd - secret: - secretName: zot-htpasswd - name: zot-data persistentVolumeClaim: claimName: zot-data From 55fa2cbce48ee8b54d26a157576895853258fc90 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:26:15 -0300 Subject: [PATCH 30/30] zot: restore main branch config --- services/zot/configmap.yaml | 20 +++++++++++++++++++- services/zot/deployment.yaml | 7 +++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/services/zot/configmap.yaml b/services/zot/configmap.yaml index d4fe5444..0261fc19 100644 --- a/services/zot/configmap.yaml +++ b/services/zot/configmap.yaml @@ -18,7 +18,25 @@ data: "address": "0.0.0.0", "port": "5000", "realm": "zot-registry", - "compat": ["docker2s2"] + "compat": ["docker2s2"], + "auth": { + "htpasswd": { "path": "/etc/zot/htpasswd" } + }, + "accessControl": { + "repositories": { + "**": { + "policies": [ + { "users": ["bstein"], "actions": ["read", "create", "update", "delete"] } + ], + "defaultPolicy": [], + "anonymousPolicy": [] + } + }, + "adminPolicy": { + "users": ["bstein"], + "actions": ["read", "create", "update", "delete"] + } + } }, "log": { "level": "info" }, "extensions": { diff --git a/services/zot/deployment.yaml b/services/zot/deployment.yaml index 45fca5e8..e4fdc1f5 100644 --- a/services/zot/deployment.yaml +++ b/services/zot/deployment.yaml @@ -42,6 +42,10 @@ spec: mountPath: /etc/zot/config.json subPath: config.json readOnly: true + - name: htpasswd + mountPath: /etc/zot/htpasswd + subPath: htpasswd + readOnly: true - name: zot-data mountPath: /var/lib/registry readinessProbe: @@ -60,6 +64,9 @@ spec: - name: cfg configMap: name: zot-config + - name: htpasswd + secret: + secretName: zot-htpasswd - name: zot-data persistentVolumeClaim: claimName: zot-data