From 04a80c11686c06b70fab135039120b2d62353ba9 Mon Sep 17 00:00:00 2001 From: jenkins Date: Fri, 24 Apr 2026 17:24:37 -0300 Subject: [PATCH] recovery(metis): seed per-node vault password slots --- services/keycloak/kustomization.yaml | 1 + ...etis-node-passwords-secret-ensure-job.yaml | 114 ++++++++++++++++++ .../vault/scripts/vault_k8s_auth_configure.sh | 26 +++- 3 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 24d897a8..d0be9172 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -26,6 +26,7 @@ resources: - oneoffs/soteria-oidc-secret-ensure-job.yaml - oneoffs/quality-oidc-secret-ensure-job.yaml - oneoffs/metis-ssh-keys-secret-ensure-job.yaml + - oneoffs/metis-node-passwords-secret-ensure-job.yaml - oneoffs/harbor-oidc-secret-ensure-job.yaml - oneoffs/vault-oidc-secret-ensure-job.yaml - oneoffs/actual-oidc-secret-ensure-job.yaml diff --git a/services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml b/services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml new file mode 100644 index 00000000..270c7c38 --- /dev/null +++ b/services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml @@ -0,0 +1,114 @@ +# services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml +# One-off job for sso/metis-node-passwords-secret-ensure-1. +# Purpose: ensure per-node Metis recovery password placeholders exist in Vault. +# Existing values are preserved; only missing fields are initialized. +apiVersion: batch/v1 +kind: Job +metadata: + name: metis-node-passwords-secret-ensure-1 + namespace: sso +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + containers: + - name: apply + image: registry.bstein.dev/bstein/kubectl:1.35.0 + command: ["/bin/sh", "-c"] + args: + - | + set -eu + + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-sso-secrets}" + + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 + fi + + nodes=" + titan-04 + titan-05 + titan-06 + titan-07 + titan-08 + titan-09 + titan-0a + titan-0b + titan-0c + titan-10 + titan-11 + titan-12 + titan-13 + titan-14 + titan-15 + titan-16 + titan-17 + titan-18 + titan-19 + titan-20 + titan-21 + titan-22 + titan-23 + titan-24 + " + + ensured=0 + for node in ${nodes}; do + secret_path="secret/data/nodes/${node}" + read_status="$(curl -sS -o /tmp/node-read.json -w "%{http_code}" -H "X-Vault-Token: ${vault_token}" "${vault_addr}/v1/${secret_path}" || true)" + if [ "${read_status}" = "200" ]; then + ssh_password="$(jq -r '.data.data.ssh_password // empty' /tmp/node-read.json)" + ssh_password_hash="$(jq -r '.data.data.ssh_password_hash // empty' /tmp/node-read.json)" + atlas_password="$(jq -r '.data.data.atlas_password // empty' /tmp/node-read.json)" + atlas_password_hash="$(jq -r '.data.data.atlas_password_hash // empty' /tmp/node-read.json)" + root_password="$(jq -r '.data.data.root_password // empty' /tmp/node-read.json)" + root_password_hash="$(jq -r '.data.data.root_password_hash // empty' /tmp/node-read.json)" + elif [ "${read_status}" = "404" ]; then + ssh_password="" + ssh_password_hash="" + atlas_password="" + atlas_password_hash="" + root_password="" + root_password_hash="" + else + echo "Vault read failed for ${node} (status ${read_status})" >&2 + cat /tmp/node-read.json >&2 || true + exit 1 + fi + + payload="$(jq -nc --arg hostname "${node}" --arg ssh_password "${ssh_password}" --arg ssh_password_hash "${ssh_password_hash}" --arg atlas_password "${atlas_password}" --arg atlas_password_hash "${atlas_password_hash}" --arg root_password "${root_password}" --arg root_password_hash "${root_password_hash}" '{data:{hostname:$hostname,ssh_password:$ssh_password,ssh_password_hash:$ssh_password_hash,atlas_password:$atlas_password,atlas_password_hash:$atlas_password_hash,root_password:$root_password,root_password_hash:$root_password_hash}}')" + + write_status="$(curl -sS -o /tmp/node-write.json -w "%{http_code}" -X POST -H "X-Vault-Token: ${vault_token}" -H 'Content-Type: application/json' -d "${payload}" "${vault_addr}/v1/${secret_path}")" + if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then + echo "Vault write failed for ${node} (status ${write_status})" >&2 + cat /tmp/node-write.json >&2 || true + exit 1 + fi + + ensured=$((ensured + 1)) + echo "Ensured node secret placeholder for ${node}" + done + + echo "Ensured ${ensured} Metis node password placeholders in Vault" diff --git a/services/vault/scripts/vault_k8s_auth_configure.sh b/services/vault/scripts/vault_k8s_auth_configure.sh index 02356475..b21ac3aa 100644 --- a/services/vault/scripts/vault_k8s_auth_configure.sh +++ b/services/vault/scripts/vault_k8s_auth_configure.sh @@ -87,6 +87,7 @@ write_policy_and_role() { service_accounts="$3" read_paths="$4" write_paths="$5" + extra_rules="${6:-}" policy_body="" for path in ${read_paths}; do @@ -109,6 +110,11 @@ path \"kv/metadata/atlas/${path}\" { } " done + if [ -n "${extra_rules}" ]; then + policy_body="${policy_body} +${extra_rules} +" + fi log "writing policy ${role}" printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" - @@ -231,7 +237,15 @@ write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \ write_policy_and_role "health" "health" "health-vault-sync" \ "health/*" "" write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \ - "maintenance/ariadne-db maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull harbor/harbor-core" "" + "maintenance/ariadne-db maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull harbor/harbor-core" "" \ + ' +path "secret/data/nodes/*" { + capabilities = ["read"] +} +path "secret/metadata/nodes/*" { + capabilities = ["list"] +} +' write_policy_and_role "maintenance-metis-token-sync" "maintenance" "metis-token-sync" \ "" \ "maintenance/metis-runtime" @@ -249,7 +263,15 @@ write_policy_and_role "vault" "vault" "vault" \ write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \ "shared/keycloak-admin maintenance/metis-ssh-keys" \ - "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys" + "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys" \ + ' +path "secret/data/nodes/*" { + capabilities = ["create", "update", "read"] +} +path "secret/metadata/nodes/*" { + capabilities = ["list"] +} +' write_policy_and_role "crypto-secrets" "crypto" "crypto-secrets-ensure" \ "" \ "crypto/wallet-monero-temp-rpc-auth"