From 2db550afddd0d86001fa5c51c997d368d768a9d8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 17:46:52 -0300 Subject: [PATCH 01/30] keycloak: add raw manifests backed by shared postgres --- services/keycloak/README.md | 27 +++++++++ services/keycloak/deployment.yaml | 89 ++++++++++++++++++++++++++++ services/keycloak/ingress.yaml | 24 ++++++++ services/keycloak/kustomization.yaml | 10 ++++ services/keycloak/namespace.yaml | 5 ++ services/keycloak/pvc.yaml | 12 ++++ services/keycloak/service.yaml | 15 +++++ 7 files changed, 182 insertions(+) create mode 100644 services/keycloak/README.md create mode 100644 services/keycloak/deployment.yaml create mode 100644 services/keycloak/ingress.yaml create mode 100644 services/keycloak/kustomization.yaml create mode 100644 services/keycloak/namespace.yaml create mode 100644 services/keycloak/pvc.yaml create mode 100644 services/keycloak/service.yaml diff --git a/services/keycloak/README.md b/services/keycloak/README.md new file mode 100644 index 0000000..bf7c21b --- /dev/null +++ b/services/keycloak/README.md @@ -0,0 +1,27 @@ +# services/keycloak + +Keycloak is deployed via raw manifests and backed by the shared Postgres (`postgres-service.postgres.svc.cluster.local:5432`). Create these secrets before applying: + +```bash +# DB creds (per-service DB/user in shared Postgres) +kubectl -n sso create secret generic keycloak-db \ + --from-literal=username=keycloak \ + --from-literal=password='' \ + --from-literal=database=keycloak + +# Admin console creds (maps to KC admin user) +kubectl -n sso create secret generic keycloak-admin \ + --from-literal=username=brad@bstein.dev \ + --from-literal=password='' +``` + +Apply: + +```bash +kubectl apply -k services/keycloak +``` + +Notes +- Service: `keycloak.sso.svc:80` (Ingress `sso.bstein.dev`, TLS via cert-manager). +- Uses Postgres schema `public`; DB/user should be provisioned in the shared Postgres instance. +- Health endpoints on :9000 are wired for probes. diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml new file mode 100644 index 0000000..fb70b40 --- /dev/null +++ b/services/keycloak/deployment.yaml @@ -0,0 +1,89 @@ +# services/keycloak/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: keycloak + namespace: sso + labels: + app: keycloak +spec: + replicas: 1 + selector: + matchLabels: + app: keycloak + template: + metadata: + labels: + app: keycloak + spec: + containers: + - name: keycloak + image: quay.io/keycloak/keycloak:26.0.7 + imagePullPolicy: IfNotPresent + args: + - start + - --optimized + env: + - name: KC_DB + value: postgres + - name: KC_DB_URL_HOST + value: postgres-service.postgres.svc.cluster.local + - name: KC_DB_URL_DATABASE + valueFrom: + secretKeyRef: + name: keycloak-db + key: database + - name: KC_DB_USERNAME + valueFrom: + secretKeyRef: + name: keycloak-db + key: username + - name: KC_DB_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-db + key: password + - name: KC_DB_SCHEMA + value: public + - name: KC_HOSTNAME + value: sso.bstein.dev + - name: KC_PROXY + value: edge + - name: KC_HTTP_ENABLED + value: "true" + - name: KEYCLOAK_ADMIN + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + ports: + - containerPort: 8080 + name: http + - containerPort: 9000 + name: metrics + readinessProbe: + httpGet: + path: /health/ready + port: 9000 + initialDelaySeconds: 15 + periodSeconds: 10 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /health/live + port: 9000 + initialDelaySeconds: 60 + periodSeconds: 15 + failureThreshold: 6 + volumeMounts: + - name: data + mountPath: /opt/keycloak/data + volumes: + - name: data + persistentVolumeClaim: + claimName: keycloak-data diff --git a/services/keycloak/ingress.yaml b/services/keycloak/ingress.yaml new file mode 100644 index 0000000..39f6cb0 --- /dev/null +++ b/services/keycloak/ingress.yaml @@ -0,0 +1,24 @@ +# services/keycloak/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: keycloak + namespace: sso + annotations: + cert-manager.io/cluster-issuer: letsencrypt +spec: + ingressClassName: traefik + rules: + - host: sso.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: keycloak + port: + number: 80 + tls: + - hosts: [sso.bstein.dev] + secretName: keycloak-tls diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml new file mode 100644 index 0000000..a65715c --- /dev/null +++ b/services/keycloak/kustomization.yaml @@ -0,0 +1,10 @@ +# services/keycloak/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: sso +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + - ingress.yaml diff --git a/services/keycloak/namespace.yaml b/services/keycloak/namespace.yaml new file mode 100644 index 0000000..b4c731d --- /dev/null +++ b/services/keycloak/namespace.yaml @@ -0,0 +1,5 @@ +# services/keycloak/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: sso diff --git a/services/keycloak/pvc.yaml b/services/keycloak/pvc.yaml new file mode 100644 index 0000000..b57ec61 --- /dev/null +++ b/services/keycloak/pvc.yaml @@ -0,0 +1,12 @@ +# services/keycloak/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: keycloak-data + namespace: sso +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + storageClassName: astreae diff --git a/services/keycloak/service.yaml b/services/keycloak/service.yaml new file mode 100644 index 0000000..5d93ef6 --- /dev/null +++ b/services/keycloak/service.yaml @@ -0,0 +1,15 @@ +# services/keycloak/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: keycloak + namespace: sso + labels: + app: keycloak +spec: + selector: + app: keycloak + ports: + - name: http + port: 80 + targetPort: http -- 2.47.2 From 61c5db5c9958d062ae21b7ba27d4825b9fda8b40 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 18:00:49 -0300 Subject: [PATCH 02/30] flux: track feature/sso --- clusters/atlas/flux-system/gotk-sync.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 46f65d3..4076ef6 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -8,7 +8,7 @@ metadata: spec: interval: 1m0s ref: - branch: feature/atlas-monitoring + branch: feature/sso secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git -- 2.47.2 From f64e60c5a221c0b4dc932e18e859069291a33162 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 18:10:20 -0300 Subject: [PATCH 03/30] flux: add keycloak kustomization --- .../applications/keycloak/kustomization.yaml | 15 +++++++++++++++ .../flux-system/applications/kustomization.yaml | 1 + 2 files changed, 16 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/keycloak/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml b/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml new file mode 100644 index 0000000..4634b5c --- /dev/null +++ b/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml @@ -0,0 +1,15 @@ +# clusters/atlas/flux-system/applications/keycloak/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: keycloak + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/keycloak + targetNamespace: sso + timeout: 2m diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 7d2f8ee..6b5124e 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -13,3 +13,4 @@ resources: - jellyfin/kustomization.yaml - xmr-miner/kustomization.yaml - sui-metrics/kustomization.yaml + - keycloak/kustomization.yaml -- 2.47.2 From 0db149605de8c1d4a1e40d72be786c9b9f9f8068 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 2 Dec 2025 20:28:35 -0300 Subject: [PATCH 04/30] monitoring: show GPU share over dashboard range --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-gpu.json | 2 +- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-gpu.yaml | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 93de006..f577eab 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -232,7 +232,7 @@ NAMESPACE_GPU_ALLOC = ( ' or kube_pod_container_resource_limits{namespace!="",resource="nvidia.com/gpu"})) by (namespace)' ) NAMESPACE_GPU_USAGE_SHARE = ( - 'sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))' + 'sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))' ) NAMESPACE_GPU_USAGE_INSTANT = 'sum(DCGM_FI_DEV_GPU_UTIL{namespace!="",pod!=""}) by (namespace)' NAMESPACE_GPU_RAW = ( diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index e67b3d2..9071b0a 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 9eda81d..beb676e 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -975,7 +975,7 @@ }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 3af8717..b5c2c18 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 928098e..ef17ebf 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -984,7 +984,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[1h]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } -- 2.47.2 From 47caf08885e3d0d3f790a0142a5df4faa6e7ad9e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 3 Dec 2025 12:28:45 -0300 Subject: [PATCH 05/30] notes: capture GPU share change and flux branch --- AGENTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AGENTS.md b/AGENTS.md index a8d49c8..611ed06 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -46,6 +46,7 @@ Repository Guidelines - Atlas Overview is the Grafana home (1h range, 1m refresh), Overview folder UID `overview`, internal folder `atlas-internal` (oceanus-internal stub). - Panels standardized via generator; hottest row compressed, worker/control rows taller, root disk row taller and top12 bar gauge with labels. GPU share pie uses 1h avg_over_time to persist idle activity. - Internal dashboards are provisioned without Viewer role; if anonymous still sees them, restart Grafana and tighten auth if needed. +- GPU share panel updated (feature/sso) to use `max_over_time(…[$__range])`, so longer ranges (e.g., 12h) keep recent activity visible. Flux tracking `feature/sso`. ## Upcoming priorities (SSO/storage/mail) - Establish SSO (Keycloak or similar) and federate Grafana, Gitea, Zot, Nextcloud, Pegasus/Jellyfin; keep Vaultwarden separate until safe. -- 2.47.2 From b14a9dcb98573bf90f06c332ffaecdcaccd53fe6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:43:17 -0300 Subject: [PATCH 06/30] chore: drop AGENTS.md from repo --- AGENTS.md | 69 ------------------------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 611ed06..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,69 +0,0 @@ - - -Repository Guidelines - -## Project Structure & Module Organization -- `infrastructure/`: cluster-scoped building blocks (core, flux-system, traefik, longhorn). Add new platform features by mirroring this layout. -- `services/`: workload manifests per app (`services/gitea/`, etc.) with `kustomization.yaml` plus one file per kind; keep diffs small and focused. -- `dockerfiles/` hosts bespoke images, while `scripts/` stores operational Fish/Bash helpers—extend these directories instead of relying on ad-hoc commands. - -## Build, Test, and Development Commands -- `kustomize build services/` (or `kubectl kustomize ...`) renders manifests exactly as Flux will. -- `kubectl apply --server-side --dry-run=client -k services/` checks schema compatibility without touching the cluster. -- `flux reconcile kustomization --namespace flux-system --with-source` pulls the latest Git state after merges or hotfixes. -- `fish scripts/flux_hammer.fish --help` explains the recovery tool; read it before running against production workloads. - -## Coding Style & Naming Conventions -- YAML uses two-space indents; retain the leading path comment (e.g. `# services/gitea/deployment.yaml`) to speed code review. -- Keep resource names lowercase kebab-case, align labels/selectors, and mirror namespaces with directory names. -- List resources in `kustomization.yaml` from namespace/config, through storage, then workloads and networking for predictable diffs. -- Scripts start with `#!/usr/bin/env fish` or bash, stay executable, and follow snake_case names such as `flux_hammer.fish`. - -## Testing Guidelines -- Run `kustomize build` and the dry-run apply for every service you touch; capture failures before opening a PR. -- `flux diff kustomization --path services/` previews reconciliations—link notable output when behavior shifts. -- Docker edits: `docker build -f dockerfiles/Dockerfile.monerod .` (swap the file you changed) to verify image builds. - -## Commit & Pull Request Guidelines -- Keep commit subjects short, present-tense, and optionally scoped (`gpu(titan-24): add RuntimeClass`); squash fixups before review. -- Describe linked issues, affected services, and required operator steps (e.g. `flux reconcile kustomization services-gitea`) in the PR body. -- Focus each PR on one kustomization or service and update `infrastructure/flux-system` when Flux must track new folders. -- Record the validation you ran (dry-runs, diffs, builds) and add screenshots only when ingress or UI behavior changes. - -## Security & Configuration Tips -- Never commit credentials; use Vault workflows (`services/vault/`) or SOPS-encrypted manifests wired through `infrastructure/flux-system`. -- Node selectors and tolerations gate workloads to hardware like `hardware: rpi4`; confirm labels before scaling or renaming nodes. -- Pin external images by digest or rely on Flux image automation to follow approved tags and avoid drift. - -## Dashboard roadmap / context (2025-12-02) -- Atlas dashboards are generated via `scripts/dashboards_render_atlas.py --build`, which writes JSON under `services/monitoring/dashboards/` and ConfigMaps under `services/monitoring/`. Keep the Grafana manifests in sync by regenerating after edits. -- Atlas Overview panels are paired with internal dashboards (pods, nodes, storage, network, GPU). A new `atlas-gpu` internal dashboard holds the detailed GPU metrics that feed the overview share pie. -- Old Grafana folders (`Atlas Storage`, `Atlas SRE`, `Atlas Public`, `Atlas Nodes`) should be removed in Grafana UI when convenient; only `Atlas Overview` and `Atlas Internal` should remain provisioned. -- Future work: add a separate generator (e.g., `dashboards_render_oceanus.py`) for SUI/oceanus validation dashboards, mirroring the atlas pattern of internal dashboards feeding a public overview. - -## Monitoring state (2025-12-03) -- dcgm-exporter DaemonSet pulls `registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04` with nvidia runtime/imagePullSecret; titan-24 exports metrics, titan-22 remains NotReady. -- Atlas Overview is the Grafana home (1h range, 1m refresh), Overview folder UID `overview`, internal folder `atlas-internal` (oceanus-internal stub). -- Panels standardized via generator; hottest row compressed, worker/control rows taller, root disk row taller and top12 bar gauge with labels. GPU share pie uses 1h avg_over_time to persist idle activity. -- Internal dashboards are provisioned without Viewer role; if anonymous still sees them, restart Grafana and tighten auth if needed. -- GPU share panel updated (feature/sso) to use `max_over_time(…[$__range])`, so longer ranges (e.g., 12h) keep recent activity visible. Flux tracking `feature/sso`. - -## Upcoming priorities (SSO/storage/mail) -- Establish SSO (Keycloak or similar) and federate Grafana, Gitea, Zot, Nextcloud, Pegasus/Jellyfin; keep Vaultwarden separate until safe. -- Add Nextcloud (limit to rpi5 workers) with office suite; integrate with SSO; plan storage class and ingress. -- Plan mail: mostly self-hosted, relay through trusted provider for outbound; integrate with services (Nextcloud, Vaultwarden, etc.) for notifications and account flows. - -## SSO plan sketch (2025-12-03) -- IdP: use Keycloak (preferred) in a new `sso` namespace, Bitnami or codecentric chart with Postgres backing store (single PVC), ingress `sso.bstein.dev`, admin user bound to brad@bstein.dev; stick with local DB initially (no external IdP). -- Auth flow goals: Grafana (OIDC), Gitea (OAuth2/Keycloak), Zot (via Traefik forward-auth/oauth2-proxy), Jellyfin/Pegasus via Jellyfin OAuth/OpenID plugin (map existing usernames; run migration to pre-create users in Keycloak with same usernames/emails and temporary passwords), Pegasus keeps using Jellyfin tokens. -- Steps to implement: - 1) Add service folder `services/keycloak/` (namespace, PVC, HelmRelease, ingress, secret for admin creds). Verify with kustomize + Flux reconcile. - 2) Seed realm `atlas` with users (import CSV/realm). Create client for Grafana (public/implicit), Gitea (confidential), and a “jellyfin” client for the OAuth plugin; set email for brad@bstein.dev as admin. - 3) Reconfigure Grafana to OIDC (disable anonymous to internal folders, leave Overview public via folder permissions). Reconfigure Gitea to OIDC (app.ini). - 4) Add Traefik forward-auth (oauth2-proxy) in front of Zot and any other services needing headers-based auth. - 5) Deploy Jellyfin OpenID plugin; map Keycloak users to existing Jellyfin usernames; communicate password reset path. -- Migration caution: do not delete existing local creds until SSO validated; keep Pegasus working via Jellyfin tokens during transition. - -## Postgres centralization (2025-12-03) -- Prefer a shared in-cluster Postgres deployment with per-service databases to reduce resource sprawl on Pi nodes. Use it for services that can easily point at an external DB. -- Candidates to migrate to shared Postgres: Keycloak (realm DB), Gitea (git DB), Nextcloud (app DB), possibly Grafana (if persistence needed beyond current provisioner), Jitsi prosody/JVB state (if external DB supported). Keep tightly-coupled or lightweight embedded DBs as-is when migration is painful or not supported. -- 2.47.2 From 1d346edd28d1fc6e82630a4cde89529e395077b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:43:24 -0300 Subject: [PATCH 07/30] keycloak: remove optimized flag for first start --- services/keycloak/deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index fb70b40..22c795d 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -22,7 +22,6 @@ spec: imagePullPolicy: IfNotPresent args: - start - - --optimized env: - name: KC_DB value: postgres -- 2.47.2 From 1b01052eda38393c84378eca69254417ec99d1c8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:49:17 -0300 Subject: [PATCH 08/30] keycloak: set fsGroup for data volume --- services/keycloak/deployment.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 22c795d..3beec0a 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -16,6 +16,11 @@ spec: labels: app: keycloak spec: + securityContext: + runAsUser: 1000 + runAsGroup: 0 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch containers: - name: keycloak image: quay.io/keycloak/keycloak:26.0.7 -- 2.47.2 From f0a8f6d35e73445ac74998df88b4e78ed00d5447 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 00:51:47 -0300 Subject: [PATCH 09/30] keycloak: enable health/metrics management port --- services/keycloak/deployment.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 3beec0a..ca44a32 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -55,6 +55,14 @@ spec: value: edge - name: KC_HTTP_ENABLED value: "true" + - name: KC_HTTP_MANAGEMENT_PORT + value: "9000" + - name: KC_HTTP_MANAGEMENT_BIND_ADDRESS + value: 0.0.0.0 + - name: KC_HEALTH_ENABLED + value: "true" + - name: KC_METRICS_ENABLED + value: "true" - name: KEYCLOAK_ADMIN valueFrom: secretKeyRef: -- 2.47.2 From 141c05b08f4f23e875efbc705da0a524eb5eab93 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:23:07 -0300 Subject: [PATCH 10/30] keycloak: honor xforwarded headers and hostname url --- services/keycloak/deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index ca44a32..f5409fc 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -51,8 +51,12 @@ spec: value: public - name: KC_HOSTNAME value: sso.bstein.dev + - name: KC_HOSTNAME_URL + value: https://sso.bstein.dev - name: KC_PROXY value: edge + - name: KC_PROXY_HEADERS + value: xforwarded - name: KC_HTTP_ENABLED value: "true" - name: KC_HTTP_MANAGEMENT_PORT -- 2.47.2 From f4da27271edd476041336db59bd8975e19457b9a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:36:33 -0300 Subject: [PATCH 11/30] keycloak: prefer rpi nodes, avoid titan-24 --- services/keycloak/deployment.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index f5409fc..2fb4bbb 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -16,6 +16,27 @@ spec: labels: app: keycloak spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + - weight: 50 + preference: + matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: ["titan-24"] securityContext: runAsUser: 1000 runAsGroup: 0 -- 2.47.2 From 88c7a1c2aaed6f30b71eaf246fce261127c5c0dd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:40:24 -0300 Subject: [PATCH 12/30] keycloak: require rpi nodes with titan-24 fallback --- services/keycloak/deployment.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 2fb4bbb..406364d 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -18,6 +18,16 @@ spec: spec: affinity: nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: ["titan-24"] preferredDuringSchedulingIgnoredDuringExecution: - weight: 90 preference: @@ -31,12 +41,6 @@ spec: - key: hardware operator: In values: ["rpi4"] - - weight: 50 - preference: - matchExpressions: - - key: kubernetes.io/hostname - operator: NotIn - values: ["titan-24"] securityContext: runAsUser: 1000 runAsGroup: 0 -- 2.47.2 From 598bdfc72720ee7b84c3d6a2288aae3102c50c36 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 01:44:23 -0300 Subject: [PATCH 13/30] keycloak: restrict to worker rpis with titan-24 fallback --- services/keycloak/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 406364d..af7839f 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -24,6 +24,8 @@ spec: - key: hardware operator: In values: ["rpi5","rpi4"] + - key: node-role.kubernetes.io/worker + operator: Exists - matchExpressions: - key: kubernetes.io/hostname operator: In -- 2.47.2 From a55502fe2707c859afac26a90ecf9678de504772 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 6 Dec 2025 14:42:24 -0300 Subject: [PATCH 14/30] add oauth2-proxy for SSO forward-auth --- .../applications/kustomization.yaml | 1 + .../oauth2-proxy/kustomization.yaml | 15 ++++ services/oauth2-proxy/deployment.yaml | 71 +++++++++++++++++++ services/oauth2-proxy/ingress.yaml | 24 +++++++ services/oauth2-proxy/kustomization.yaml | 9 +++ services/oauth2-proxy/middleware.yaml | 15 ++++ services/oauth2-proxy/service.yaml | 15 ++++ 7 files changed, 150 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml create mode 100644 services/oauth2-proxy/deployment.yaml create mode 100644 services/oauth2-proxy/ingress.yaml create mode 100644 services/oauth2-proxy/kustomization.yaml create mode 100644 services/oauth2-proxy/middleware.yaml create mode 100644 services/oauth2-proxy/service.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 6b5124e..1bc2700 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -14,3 +14,4 @@ resources: - xmr-miner/kustomization.yaml - sui-metrics/kustomization.yaml - keycloak/kustomization.yaml + - oauth2-proxy/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml b/clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml new file mode 100644 index 0000000..187572d --- /dev/null +++ b/clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml @@ -0,0 +1,15 @@ +# clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: oauth2-proxy + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/oauth2-proxy + targetNamespace: sso + timeout: 2m diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml new file mode 100644 index 0000000..8754cb5 --- /dev/null +++ b/services/oauth2-proxy/deployment.yaml @@ -0,0 +1,71 @@ +# services/oauth2-proxy/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy + namespace: sso + labels: + app: oauth2-proxy +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy + template: + metadata: + labels: + app: oauth2-proxy + spec: + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + imagePullPolicy: IfNotPresent + args: + - --provider=oidc + - --redirect-url=https://auth.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --upstream=static://200 + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 diff --git a/services/oauth2-proxy/ingress.yaml b/services/oauth2-proxy/ingress.yaml new file mode 100644 index 0000000..96d16ea --- /dev/null +++ b/services/oauth2-proxy/ingress.yaml @@ -0,0 +1,24 @@ +# services/oauth2-proxy/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: oauth2-proxy + namespace: sso + annotations: + cert-manager.io/cluster-issuer: letsencrypt +spec: + ingressClassName: traefik + rules: + - host: auth.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: oauth2-proxy + port: + number: 80 + tls: + - hosts: [auth.bstein.dev] + secretName: auth-tls diff --git a/services/oauth2-proxy/kustomization.yaml b/services/oauth2-proxy/kustomization.yaml new file mode 100644 index 0000000..e79ae66 --- /dev/null +++ b/services/oauth2-proxy/kustomization.yaml @@ -0,0 +1,9 @@ +# services/oauth2-proxy/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: sso +resources: + - deployment.yaml + - service.yaml + - ingress.yaml + - middleware.yaml diff --git a/services/oauth2-proxy/middleware.yaml b/services/oauth2-proxy/middleware.yaml new file mode 100644 index 0000000..db5f3a4 --- /dev/null +++ b/services/oauth2-proxy/middleware.yaml @@ -0,0 +1,15 @@ +# services/oauth2-proxy/middleware.yaml +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: oauth2-proxy-forward-auth + namespace: sso +spec: + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups diff --git a/services/oauth2-proxy/service.yaml b/services/oauth2-proxy/service.yaml new file mode 100644 index 0000000..1eb5481 --- /dev/null +++ b/services/oauth2-proxy/service.yaml @@ -0,0 +1,15 @@ +# services/oauth2-proxy/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy + namespace: sso + labels: + app: oauth2-proxy +spec: + selector: + app: oauth2-proxy + ports: + - name: http + port: 80 + targetPort: 4180 -- 2.47.2 From 8d5e6c267cf16d976b8fcf8b024fb4123b30c179 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 02:01:21 -0300 Subject: [PATCH 15/30] auth: wire oauth2-proxy and enable grafana oidc --- .../longhorn/ui-ingress/ingress.yaml | 2 +- services/monitoring/helmrelease.yaml | 22 +++++++++++++++++-- services/oauth2-proxy/deployment.yaml | 1 + services/vault/ingress.yaml | 2 +- services/zot/ingress.yaml | 2 +- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index 6250cfa..e9905ba 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-basicauth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 2546dc1..d7d7579 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -249,9 +249,27 @@ spec: service: type: ClusterIP env: - GF_AUTH_ANONYMOUS_ENABLED: "true" - GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer + GF_AUTH_ANONYMOUS_ENABLED: "false" GF_SECURITY_ALLOW_EMBEDDING: "true" + GF_AUTH_GENERIC_OAUTH_ENABLED: "true" + GF_AUTH_GENERIC_OAUTH_NAME: "Keycloak" + GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP: "true" + GF_AUTH_GENERIC_OAUTH_SCOPES: "openid profile email groups" + GF_AUTH_GENERIC_OAUTH_AUTH_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth" + GF_AUTH_GENERIC_OAUTH_TOKEN_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token" + GF_AUTH_GENERIC_OAUTH_API_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo" + GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: "contains(groups, 'admin') && 'Admin' || 'Viewer'" + GF_AUTH_GENERIC_OAUTH_TLS_SKIP_VERIFY_INSECURE: "false" + GF_AUTH_SIGNOUT_REDIRECT_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://metrics.bstein.dev/" + envValueFrom: + GF_AUTH_GENERIC_OAUTH_CLIENT_ID: + secretKeyRef: + name: grafana-oidc + key: client_id + GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: + secretKeyRef: + name: grafana-oidc + key: client_secret grafana.ini: server: domain: metrics.bstein.dev diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 8754cb5..1457c52 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -25,6 +25,7 @@ spec: - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --scope=openid profile email groups + - --allowed-group=admin - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 306556d..fa43bc2 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-basicauth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 3425535..10a0743 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From 27e5c9391c093ef08694205b1e310a566b2ada33 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 10:25:44 -0300 Subject: [PATCH 16/30] auth: add namespace-local forward-auth middlewares --- infrastructure/longhorn/ui-ingress/ingress.yaml | 2 +- .../longhorn/ui-ingress/middleware.yaml | 17 +++++++++++++++++ services/vault/ingress.yaml | 2 +- services/vault/middleware.yaml | 12 +++++++++--- services/zot/ingress.yaml | 2 +- services/zot/middleware.yaml | 17 +++++++++++++++++ 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index e9905ba..8f55b82 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index c670cef..abc2a64 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -20,3 +20,20 @@ spec: headers: customRequestHeaders: X-Forwarded-Proto: "https" + +--- + +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: longhorn-forward-auth + namespace: longhorn-system +spec: + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index fa43bc2..6115e38 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 0a41961..0f4388e 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -2,8 +2,14 @@ apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: - name: vault-basicauth + name: vault-forward-auth namespace: vault spec: - basicAuth: - secret: vault-basic-auth + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 10a0743..75ec998 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index 166b070..a7a294d 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -24,3 +24,20 @@ spec: - PUT - PATCH - DELETE + +--- + +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: zot-forward-auth + namespace: zot +spec: + forwardAuth: + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + trustForwardHeader: true + authResponseHeaders: + - Authorization + - X-Auth-Request-Email + - X-Auth-Request-User + - X-Auth-Request-Groups -- 2.47.2 From dbede55ad44e19ccd3df5e4e0e9650db8dbc766e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 10:42:13 -0300 Subject: [PATCH 17/30] oauth2-proxy: temporarily drop group restriction --- services/oauth2-proxy/deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 1457c52..8754cb5 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -25,7 +25,6 @@ spec: - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --scope=openid profile email groups - - --allowed-group=admin - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true -- 2.47.2 From 5e59f20bc31d43fb831d446f7686825b988bcf81 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 11:09:09 -0300 Subject: [PATCH 18/30] auth: point forward-auth to external auth host --- infrastructure/longhorn/ui-ingress/middleware.yaml | 2 +- services/vault/middleware.yaml | 2 +- services/zot/middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index abc2a64..3bf2ff5 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -30,7 +30,7 @@ metadata: namespace: longhorn-system spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 0f4388e..8a39bf9 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -6,7 +6,7 @@ metadata: namespace: vault spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index a7a294d..cc76d5f 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -34,7 +34,7 @@ metadata: namespace: zot spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization -- 2.47.2 From 20bb7766253dfa806780e93275dfa4a090e7fc8f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 11:14:25 -0300 Subject: [PATCH 19/30] auth: add 401 redirect middleware to oauth2-proxy --- infrastructure/longhorn/ui-ingress/ingress.yaml | 2 +- services/oauth2-proxy/kustomization.yaml | 1 + services/oauth2-proxy/middleware-errors.yaml | 14 ++++++++++++++ services/vault/ingress.yaml | 2 +- services/zot/ingress.yaml | 2 +- 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 services/oauth2-proxy/middleware-errors.yaml diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index 8f55b82..ac68471 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/oauth2-proxy/kustomization.yaml b/services/oauth2-proxy/kustomization.yaml index e79ae66..ff4705a 100644 --- a/services/oauth2-proxy/kustomization.yaml +++ b/services/oauth2-proxy/kustomization.yaml @@ -7,3 +7,4 @@ resources: - service.yaml - ingress.yaml - middleware.yaml + - middleware-errors.yaml diff --git a/services/oauth2-proxy/middleware-errors.yaml b/services/oauth2-proxy/middleware-errors.yaml new file mode 100644 index 0000000..ee0c786 --- /dev/null +++ b/services/oauth2-proxy/middleware-errors.yaml @@ -0,0 +1,14 @@ +# services/oauth2-proxy/middleware-errors.yaml +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: oauth2-proxy-errors + namespace: sso +spec: + errors: + status: + - "401" + service: + name: oauth2-proxy + port: 80 + query: /oauth2/start?rd={url} diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 6115e38..1c274fb 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 75ec998..1d9307a 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From 4a089876bac49abcc306e68b436b46633808eb75 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 11:25:29 -0300 Subject: [PATCH 20/30] auth: use internal oauth2-proxy svc for forward-auth --- infrastructure/longhorn/ui-ingress/middleware.yaml | 2 +- services/vault/middleware.yaml | 2 +- services/zot/middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index 3bf2ff5..abc2a64 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -30,7 +30,7 @@ metadata: namespace: longhorn-system spec: forwardAuth: - address: https://auth.bstein.dev/oauth2/auth + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 8a39bf9..0f4388e 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -6,7 +6,7 @@ metadata: namespace: vault spec: forwardAuth: - address: https://auth.bstein.dev/oauth2/auth + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index cc76d5f..a7a294d 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -34,7 +34,7 @@ metadata: namespace: zot spec: forwardAuth: - address: https://auth.bstein.dev/oauth2/auth + address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization -- 2.47.2 From 25ee698021f48e9c36db9bdc57b5de25b94aba74 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 12:03:14 -0300 Subject: [PATCH 21/30] oauth2-proxy: ensure error middleware on auth ingress --- services/oauth2-proxy/ingress.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/oauth2-proxy/ingress.yaml b/services/oauth2-proxy/ingress.yaml index 96d16ea..0f5830c 100644 --- a/services/oauth2-proxy/ingress.yaml +++ b/services/oauth2-proxy/ingress.yaml @@ -6,6 +6,7 @@ metadata: namespace: sso annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-errors@kubernetescrd spec: ingressClassName: traefik rules: -- 2.47.2 From 04aa32a762b896f5b71163d2340e4f29370807f7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 12:49:38 -0300 Subject: [PATCH 22/30] oauth2-proxy: schedule on worker rpis --- services/oauth2-proxy/deployment.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 8754cb5..7c22a93 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -16,6 +16,17 @@ spec: labels: app: oauth2-proxy spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] containers: - name: oauth2-proxy image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 -- 2.47.2 From 24fbaad0409f2214655a0a6abeb59faf50ff5fe5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 13:03:29 -0300 Subject: [PATCH 23/30] auth: forward-auth via external auth host (svc traffic flaky) --- infrastructure/longhorn/ui-ingress/middleware.yaml | 2 +- services/vault/middleware.yaml | 2 +- services/zot/middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/middleware.yaml b/infrastructure/longhorn/ui-ingress/middleware.yaml index abc2a64..3bf2ff5 100644 --- a/infrastructure/longhorn/ui-ingress/middleware.yaml +++ b/infrastructure/longhorn/ui-ingress/middleware.yaml @@ -30,7 +30,7 @@ metadata: namespace: longhorn-system spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 0f4388e..8a39bf9 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -6,7 +6,7 @@ metadata: namespace: vault spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index a7a294d..cc76d5f 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -34,7 +34,7 @@ metadata: namespace: zot spec: forwardAuth: - address: http://oauth2-proxy.sso.svc.cluster.local:4180/oauth2/auth + address: https://auth.bstein.dev/oauth2/auth trustForwardHeader: true authResponseHeaders: - Authorization -- 2.47.2 From ceb692f7ee9d8f37876338b4a798446d3e180c51 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 13:09:29 -0300 Subject: [PATCH 24/30] oauth2-proxy: drop groups scope to avoid invalid_scope --- services/oauth2-proxy/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 7c22a93..03d30c1 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -35,7 +35,7 @@ spec: - --provider=oidc - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email groups + - --scope=openid profile email - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true -- 2.47.2 From a7e9f1f7d82a30ef71109f50a88fc58392a3fe1d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 13:19:45 -0300 Subject: [PATCH 25/30] auth: remove error middleware to allow redirect --- infrastructure/longhorn/ui-ingress/ingress.yaml | 2 +- services/vault/ingress.yaml | 2 +- services/zot/ingress.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index ac68471..8f55b82 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 1c274fb..6115e38 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 1d9307a..75ec998 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From 6c62d42f7ab057ffac208878d4f3235279172115 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 7 Dec 2025 19:44:02 -0300 Subject: [PATCH 26/30] longhorn/vault: gate via oauth2-proxy --- .../longhorn/ui-ingress/ingress.yaml | 4 +- .../longhorn/ui-ingress/kustomization.yaml | 1 + .../ui-ingress/oauth2-proxy-longhorn.yaml | 102 ++++++++++++++++++ services/oauth2-proxy/deployment.yaml | 2 +- services/oauth2-proxy/middleware-errors.yaml | 1 + services/vault/ingress.yaml | 8 +- services/vault/kustomization.yaml | 1 + services/vault/oauth2-proxy-vault.yaml | 102 ++++++++++++++++++ 8 files changed, 213 insertions(+), 8 deletions(-) create mode 100644 infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml create mode 100644 services/vault/oauth2-proxy-vault.yaml diff --git a/infrastructure/longhorn/ui-ingress/ingress.yaml b/infrastructure/longhorn/ui-ingress/ingress.yaml index 8f55b82..94daeed 100644 --- a/infrastructure/longhorn/ui-ingress/ingress.yaml +++ b/infrastructure/longhorn/ui-ingress/ingress.yaml @@ -7,7 +7,7 @@ metadata: annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-forward-auth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: "" spec: ingressClassName: traefik tls: @@ -21,6 +21,6 @@ spec: pathType: Prefix backend: service: - name: longhorn-frontend + name: oauth2-proxy-longhorn port: number: 80 diff --git a/infrastructure/longhorn/ui-ingress/kustomization.yaml b/infrastructure/longhorn/ui-ingress/kustomization.yaml index 1d497dc..a2ae5f3 100644 --- a/infrastructure/longhorn/ui-ingress/kustomization.yaml +++ b/infrastructure/longhorn/ui-ingress/kustomization.yaml @@ -4,3 +4,4 @@ kind: Kustomization resources: - middleware.yaml - ingress.yaml + - oauth2-proxy-longhorn.yaml diff --git a/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml new file mode 100644 index 0000000..b8d4f34 --- /dev/null +++ b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml @@ -0,0 +1,102 @@ +# infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy-longhorn + namespace: longhorn-system + labels: + app: oauth2-proxy-longhorn +spec: + ports: + - name: http + port: 80 + targetPort: 4180 + selector: + app: oauth2-proxy-longhorn + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy-longhorn + namespace: longhorn-system + labels: + app: oauth2-proxy-longhorn +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy-longhorn + template: + metadata: + labels: + app: oauth2-proxy-longhorn + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + imagePullPolicy: IfNotPresent + args: + - --provider=oidc + - --redirect-url=https://longhorn.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --allowed-group=admin + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --insecure-oidc-allow-unverified-email=true + - --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + - --cookie-domain=longhorn.bstein.dev + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-longhorn-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-longhorn-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-longhorn-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 03d30c1..7c22a93 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -35,7 +35,7 @@ spec: - --provider=oidc - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email + - --scope=openid profile email groups - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true diff --git a/services/oauth2-proxy/middleware-errors.yaml b/services/oauth2-proxy/middleware-errors.yaml index ee0c786..55e092a 100644 --- a/services/oauth2-proxy/middleware-errors.yaml +++ b/services/oauth2-proxy/middleware-errors.yaml @@ -8,6 +8,7 @@ spec: errors: status: - "401" + - "403" service: name: oauth2-proxy port: 80 diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 6115e38..d61d4bc 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,9 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-forward-auth@kubernetescrd - traefik.ingress.kubernetes.io/service.serversscheme: https - traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: "" spec: ingressClassName: traefik tls: @@ -23,6 +21,6 @@ spec: pathType: Prefix backend: service: - name: vault-ui + name: oauth2-proxy-vault port: - number: 8200 + number: 80 diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 4c3fbc5..4c0f07e 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -9,3 +9,4 @@ resources: - ingress.yaml - middleware.yaml - serverstransport.yaml + - oauth2-proxy-vault.yaml diff --git a/services/vault/oauth2-proxy-vault.yaml b/services/vault/oauth2-proxy-vault.yaml new file mode 100644 index 0000000..e79a142 --- /dev/null +++ b/services/vault/oauth2-proxy-vault.yaml @@ -0,0 +1,102 @@ +# services/vault/oauth2-proxy-vault.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy-vault + labels: + app: oauth2-proxy-vault +spec: + ports: + - name: http + port: 80 + targetPort: 4180 + selector: + app: oauth2-proxy-vault + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy-vault + labels: + app: oauth2-proxy-vault +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy-vault + template: + metadata: + labels: + app: oauth2-proxy-vault + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 80 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + - arm + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + args: + - --provider=oidc + - --redirect-url=https://secret.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --insecure-oidc-allow-unverified-email=true + - --upstream=https://vault-ui.vault.svc.cluster.local:8200 + - --ssl-insecure-skip-verify=true + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + - --allowed-group=admin + - --cookie-domain=secret.bstein.dev + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-vault-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-vault-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-vault-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 -- 2.47.2 From 2f368f697564d50e7bbe60b791672e94dba0856f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:04:19 -0300 Subject: [PATCH 27/30] zot,vault: remove oauth2-proxy sso --- services/vault/ingress.yaml | 8 +- services/vault/kustomization.yaml | 1 - services/vault/middleware.yaml | 12 +-- services/vault/oauth2-proxy-vault.yaml | 102 ------------------------- services/zot/ingress.yaml | 2 +- services/zot/middleware.yaml | 17 ----- 6 files changed, 9 insertions(+), 133 deletions(-) delete mode 100644 services/vault/oauth2-proxy-vault.yaml diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index d61d4bc..306556d 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,9 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: "" + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-basicauth@kubernetescrd + traefik.ingress.kubernetes.io/service.serversscheme: https + traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: @@ -21,6 +23,6 @@ spec: pathType: Prefix backend: service: - name: oauth2-proxy-vault + name: vault-ui port: - number: 80 + number: 8200 diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 4c0f07e..4c3fbc5 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -9,4 +9,3 @@ resources: - ingress.yaml - middleware.yaml - serverstransport.yaml - - oauth2-proxy-vault.yaml diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml index 8a39bf9..0a41961 100644 --- a/services/vault/middleware.yaml +++ b/services/vault/middleware.yaml @@ -2,14 +2,8 @@ apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: - name: vault-forward-auth + name: vault-basicauth namespace: vault spec: - forwardAuth: - address: https://auth.bstein.dev/oauth2/auth - trustForwardHeader: true - authResponseHeaders: - - Authorization - - X-Auth-Request-Email - - X-Auth-Request-User - - X-Auth-Request-Groups + basicAuth: + secret: vault-basic-auth diff --git a/services/vault/oauth2-proxy-vault.yaml b/services/vault/oauth2-proxy-vault.yaml deleted file mode 100644 index e79a142..0000000 --- a/services/vault/oauth2-proxy-vault.yaml +++ /dev/null @@ -1,102 +0,0 @@ -# services/vault/oauth2-proxy-vault.yaml -apiVersion: v1 -kind: Service -metadata: - name: oauth2-proxy-vault - labels: - app: oauth2-proxy-vault -spec: - ports: - - name: http - port: 80 - targetPort: 4180 - selector: - app: oauth2-proxy-vault - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: oauth2-proxy-vault - labels: - app: oauth2-proxy-vault -spec: - replicas: 2 - selector: - matchLabels: - app: oauth2-proxy-vault - template: - metadata: - labels: - app: oauth2-proxy-vault - spec: - nodeSelector: - node-role.kubernetes.io/worker: "true" - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 80 - preference: - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - arm - containers: - - name: oauth2-proxy - image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 - args: - - --provider=oidc - - --redirect-url=https://secret.bstein.dev/oauth2/callback - - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email groups - - --email-domain=* - - --set-xauthrequest=true - - --pass-access-token=true - - --set-authorization-header=true - - --cookie-secure=true - - --cookie-samesite=lax - - --cookie-refresh=20m - - --cookie-expire=168h - - --insecure-oidc-allow-unverified-email=true - - --upstream=https://vault-ui.vault.svc.cluster.local:8200 - - --ssl-insecure-skip-verify=true - - --http-address=0.0.0.0:4180 - - --skip-provider-button=true - - --skip-jwt-bearer-tokens=true - - --oidc-groups-claim=groups - - --allowed-group=admin - - --cookie-domain=secret.bstein.dev - env: - - name: OAUTH2_PROXY_CLIENT_ID - valueFrom: - secretKeyRef: - name: oauth2-proxy-vault-oidc - key: client_id - - name: OAUTH2_PROXY_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-vault-oidc - key: client_secret - - name: OAUTH2_PROXY_COOKIE_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-vault-oidc - key: cookie_secret - ports: - - containerPort: 4180 - name: http - readinessProbe: - httpGet: - path: /ping - port: 4180 - initialDelaySeconds: 5 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /ping - port: 4180 - initialDelaySeconds: 20 - periodSeconds: 20 diff --git a/services/zot/ingress.yaml b/services/zot/ingress.yaml index 75ec998..3425535 100644 --- a/services/zot/ingress.yaml +++ b/services/zot/ingress.yaml @@ -8,7 +8,7 @@ metadata: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: zot-zot-forward-auth@kubernetescrd,zot-zot-resp-headers@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: zot-zot-resp-headers@kubernetescrd spec: ingressClassName: traefik tls: diff --git a/services/zot/middleware.yaml b/services/zot/middleware.yaml index cc76d5f..166b070 100644 --- a/services/zot/middleware.yaml +++ b/services/zot/middleware.yaml @@ -24,20 +24,3 @@ spec: - PUT - PATCH - DELETE - ---- - -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: zot-forward-auth - namespace: zot -spec: - forwardAuth: - address: https://auth.bstein.dev/oauth2/auth - trustForwardHeader: true - authResponseHeaders: - - Authorization - - X-Auth-Request-Email - - X-Auth-Request-User - - X-Auth-Request-Groups -- 2.47.2 From 20cd185c0b9365b2564fea846b7286589a5dd6a1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:09:05 -0300 Subject: [PATCH 28/30] vault: drop traefik basicauth --- services/vault/ingress.yaml | 1 - services/vault/kustomization.yaml | 1 - services/vault/middleware.yaml | 9 --------- 3 files changed, 11 deletions(-) delete mode 100644 services/vault/middleware.yaml diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 306556d..91d9ca4 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,6 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-basicauth@kubernetescrd traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 4c3fbc5..1d7af87 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -7,5 +7,4 @@ resources: - helmrelease.yaml - certificate.yaml - ingress.yaml - - middleware.yaml - serverstransport.yaml diff --git a/services/vault/middleware.yaml b/services/vault/middleware.yaml deleted file mode 100644 index 0a41961..0000000 --- a/services/vault/middleware.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# services/vault/middleware.yaml -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: vault-basicauth - namespace: vault -spec: - basicAuth: - secret: vault-basic-auth -- 2.47.2 From cb2b2ec1cdc442d209ed94e972580eaaaf5a2f9b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:22:16 -0300 Subject: [PATCH 29/30] zot: revert to unauthenticated registry --- services/zot/configmap.yaml | 20 +------------------- services/zot/deployment.yaml | 7 ------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/services/zot/configmap.yaml b/services/zot/configmap.yaml index 0261fc1..d4fe544 100644 --- a/services/zot/configmap.yaml +++ b/services/zot/configmap.yaml @@ -18,25 +18,7 @@ data: "address": "0.0.0.0", "port": "5000", "realm": "zot-registry", - "compat": ["docker2s2"], - "auth": { - "htpasswd": { "path": "/etc/zot/htpasswd" } - }, - "accessControl": { - "repositories": { - "**": { - "policies": [ - { "users": ["bstein"], "actions": ["read", "create", "update", "delete"] } - ], - "defaultPolicy": [], - "anonymousPolicy": [] - } - }, - "adminPolicy": { - "users": ["bstein"], - "actions": ["read", "create", "update", "delete"] - } - } + "compat": ["docker2s2"] }, "log": { "level": "info" }, "extensions": { diff --git a/services/zot/deployment.yaml b/services/zot/deployment.yaml index e4fdc1f..45fca5e 100644 --- a/services/zot/deployment.yaml +++ b/services/zot/deployment.yaml @@ -42,10 +42,6 @@ spec: mountPath: /etc/zot/config.json subPath: config.json readOnly: true - - name: htpasswd - mountPath: /etc/zot/htpasswd - subPath: htpasswd - readOnly: true - name: zot-data mountPath: /var/lib/registry readinessProbe: @@ -64,9 +60,6 @@ spec: - name: cfg configMap: name: zot-config - - name: htpasswd - secret: - secretName: zot-htpasswd - name: zot-data persistentVolumeClaim: claimName: zot-data -- 2.47.2 From 319b5158827e7689b725d70015c8989611750546 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 11 Dec 2025 17:26:15 -0300 Subject: [PATCH 30/30] zot: restore main branch config --- services/zot/configmap.yaml | 20 +++++++++++++++++++- services/zot/deployment.yaml | 7 +++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/services/zot/configmap.yaml b/services/zot/configmap.yaml index d4fe544..0261fc1 100644 --- a/services/zot/configmap.yaml +++ b/services/zot/configmap.yaml @@ -18,7 +18,25 @@ data: "address": "0.0.0.0", "port": "5000", "realm": "zot-registry", - "compat": ["docker2s2"] + "compat": ["docker2s2"], + "auth": { + "htpasswd": { "path": "/etc/zot/htpasswd" } + }, + "accessControl": { + "repositories": { + "**": { + "policies": [ + { "users": ["bstein"], "actions": ["read", "create", "update", "delete"] } + ], + "defaultPolicy": [], + "anonymousPolicy": [] + } + }, + "adminPolicy": { + "users": ["bstein"], + "actions": ["read", "create", "update", "delete"] + } + } }, "log": { "level": "info" }, "extensions": { diff --git a/services/zot/deployment.yaml b/services/zot/deployment.yaml index 45fca5e..e4fdc1f 100644 --- a/services/zot/deployment.yaml +++ b/services/zot/deployment.yaml @@ -42,6 +42,10 @@ spec: mountPath: /etc/zot/config.json subPath: config.json readOnly: true + - name: htpasswd + mountPath: /etc/zot/htpasswd + subPath: htpasswd + readOnly: true - name: zot-data mountPath: /var/lib/registry readinessProbe: @@ -60,6 +64,9 @@ spec: - name: cfg configMap: name: zot-config + - name: htpasswd + secret: + secretName: zot-htpasswd - name: zot-data persistentVolumeClaim: claimName: zot-data -- 2.47.2