diff --git a/services/harbor/helmrelease.yaml b/services/harbor/helmrelease.yaml index 49577e00..f1bfc176 100644 --- a/services/harbor/helmrelease.yaml +++ b/services/harbor/helmrelease.yaml @@ -245,6 +245,17 @@ spec: image: repository: registry.bstein.dev/infra/harbor-registry tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-registry:tag"} + extraEnvVars: + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_NAME + value: harbor-core + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_URL + value: http://harbor-registry:8080/service/notifications + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_TIMEOUT + value: 5s + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_THRESHOLD + value: "5" + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_BACKOFF + value: 1s controller: image: repository: registry.bstein.dev/infra/harbor-registryctl @@ -263,6 +274,10 @@ spec: export REGISTRY_HTTP_SECRET="{{ .Data.data.REGISTRY_HTTP_SECRET }}" export REGISTRY_REDIS_PASSWORD="{{ .Data.data.REGISTRY_REDIS_PASSWORD }}" {{ end }} + {{ with secret "kv/data/atlas/harbor/harbor-jobservice" }} + export JOBSERVICE_SECRET="{{ .Data.data.JOBSERVICE_SECRET }}" + export REGISTRY_NOTIFICATIONS_ENDPOINTS_0_HEADERS_Authorization="Harbor-Secret ${JOBSERVICE_SECRET}" + {{ end }} vault.hashicorp.com/agent-inject-secret-harbor-registryctl-env.sh: "kv/data/atlas/harbor/harbor-registry" vault.hashicorp.com/agent-inject-template-harbor-registryctl-env.sh: | {{ with secret "kv/data/atlas/harbor/harbor-core" }} @@ -397,10 +412,10 @@ spec: patch: |- - op: replace path: /spec/rules/0/http/paths/2/backend/service/name - value: harbor-registry + value: harbor-core - op: replace path: /spec/rules/0/http/paths/2/backend/service/port/number - value: 5000 + value: 80 - target: kind: Deployment name: harbor-jobservice @@ -422,7 +437,8 @@ spec: - $patch: replace - name: VAULT_ENV_FILE value: /vault/secrets/harbor-jobservice-env.sh - envFrom: [] + envFrom: + - $patch: replace - configMapRef: name: harbor-jobservice-env volumeMounts: @@ -463,8 +479,17 @@ spec: value: /vault/secrets/harbor-registry-env.sh - name: VAULT_COPY_FILES value: /vault/secrets/harbor-registry-htpasswd:/etc/registry/passwd - envFrom: - - $patch: replace + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_NAME + value: harbor-core + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_URL + value: http://harbor-registry:8080/service/notifications + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_TIMEOUT + value: 5s + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_THRESHOLD + value: "5" + - name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_BACKOFF + value: 1s + envFrom: [] volumeMounts: - $patch: replace - name: harbor-vault-entrypoint diff --git a/services/maintenance/metis-deployment.yaml b/services/maintenance/metis-deployment.yaml index d4747c86..8dd77435 100644 --- a/services/maintenance/metis-deployment.yaml +++ b/services/maintenance/metis-deployment.yaml @@ -20,10 +20,11 @@ spec: prometheus.io/path: "/metrics" spec: serviceAccountName: metis + terminationGracePeriodSeconds: 30 nodeSelector: kubernetes.io/hostname: titan-22 kubernetes.io/arch: amd64 - node-role.kubernetes.io/worker: "true" + node-role.kubernetes.io/accelerator: "true" containers: - name: metis image: registry.bstein.dev/bstein/metis:latest @@ -31,17 +32,61 @@ spec: envFrom: - configMapRef: name: metis + env: + - name: METIS_K3S_TOKEN + valueFrom: + secretKeyRef: + name: metis-runtime + key: k3s_token + optional: true ports: - name: http containerPort: 8080 + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + volumeMounts: + - name: metis-data + mountPath: /var/lib/metis + - name: host-dev + mountPath: /dev + - name: host-sys + mountPath: /sys + readOnly: true + - name: host-udev + mountPath: /run/udev + readOnly: true resources: requests: - cpu: 100m - memory: 128Mi + cpu: 150m + memory: 256Mi limits: - cpu: 500m - memory: 512Mi + cpu: "1" + memory: 1Gi securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] + privileged: true + runAsUser: 0 + volumes: + - name: metis-data + persistentVolumeClaim: + claimName: metis-data + - name: host-dev + hostPath: + path: /dev + - name: host-sys + hostPath: + path: /sys + - name: host-udev + hostPath: + path: /run/udev diff --git a/services/maintenance/metis-ingress.yaml b/services/maintenance/metis-ingress.yaml index 4d257781..468584c5 100644 --- a/services/maintenance/metis-ingress.yaml +++ b/services/maintenance/metis-ingress.yaml @@ -11,6 +11,7 @@ metadata: traefik.ingress.kubernetes.io/router.tls: "true" traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd spec: + ingressClassName: traefik tls: - hosts: ["metis.bstein.dev"] secretName: metis-tls diff --git a/services/maintenance/metis-sentinel-daemonset.yaml b/services/maintenance/metis-sentinel-daemonset.yaml index e9127c13..d83976ba 100644 --- a/services/maintenance/metis-sentinel-daemonset.yaml +++ b/services/maintenance/metis-sentinel-daemonset.yaml @@ -14,108 +14,25 @@ spec: metadata: labels: app: metis-sentinel - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "8080" - prometheus.io/path: "/metrics" spec: - serviceAccountName: metis + automountServiceAccountToken: false + hostPID: true + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule nodeSelector: kubernetes.io/os: linux - node-role.kubernetes.io/worker: "true" containers: - name: metis-sentinel image: registry.bstein.dev/bstein/metis-sentinel:latest imagePullPolicy: Always - command: - - /bin/sh - - -c - args: - - | - set -eu - out_dir="${METIS_SENTINEL_OUT:-/var/run/metis-sentinel}" - interval="${METIS_SENTINEL_INTERVAL_SEC:-120}" - mkdir -p "${out_dir}" - while true; do - ts="$(date -u +%Y%m%dT%H%M%SZ)" - node="${METIS_SENTINEL_NODE:-unknown}" - tmp="${out_dir}/${node}-${ts}.json.tmp" - out="${out_dir}/${node}-${ts}.json" - if metis-sentinel > "${tmp}"; then - mv "${tmp}" "${out}" - else - rm -f "${tmp}" || true - fi - sleep "${interval}" - done envFrom: - configMapRef: name: metis - env: - - name: METIS_SENTINEL_NODE - valueFrom: - fieldRef: - fieldPath: spec.nodeName - ports: - - name: http - containerPort: 8080 - volumeMounts: - - name: sentinel-output - mountPath: /var/run/metis-sentinel - resources: - requests: - cpu: 25m - memory: 64Mi - limits: - cpu: 250m - memory: 256Mi - securityContext: - allowPrivilegeEscalation: false - runAsUser: 0 - capabilities: - drop: ["ALL"] - - name: sentinel-pusher - image: curlimages/curl:8.12.1 - imagePullPolicy: IfNotPresent - command: - - /bin/sh - - -c - args: - - | - set -eu - out_dir="${METIS_SENTINEL_OUT:-/var/run/metis-sentinel}" - push_url="${METIS_SENTINEL_PUSH_URL:-}" - interval="${METIS_SENTINEL_PUSH_INTERVAL_SEC:-120}" - timeout="${METIS_SENTINEL_PUSH_TIMEOUT_SEC:-10}" - mkdir -p "${out_dir}" - while true; do - for snapshot in "${out_dir}"/*.json; do - [ -f "${snapshot}" ] || continue - if [ -z "${push_url}" ]; then - break - fi - if curl -fsS --connect-timeout "${timeout}" --max-time "${timeout}" \ - -X POST \ - -H "Content-Type: application/json" \ - -H "X-Metis-Node: ${METIS_SENTINEL_NODE:-unknown}" \ - --data-binary "@${snapshot}" \ - "${push_url}"; then - rm -f "${snapshot}" - fi - done - sleep "${interval}" - done - envFrom: - - configMapRef: - name: metis - env: - - name: METIS_SENTINEL_NODE - valueFrom: - fieldRef: - fieldPath: spec.nodeName - volumeMounts: - - name: sentinel-output - mountPath: /var/run/metis-sentinel resources: requests: cpu: 10m @@ -124,10 +41,5 @@ spec: cpu: 100m memory: 128Mi securityContext: - allowPrivilegeEscalation: false + privileged: true runAsUser: 0 - capabilities: - drop: ["ALL"] - volumes: - - name: sentinel-output - emptyDir: {}