hecate(startup): auto-unseal vault during layered recovery

This commit is contained in:
Brad Stein 2026-04-04 06:29:16 -03:00
parent 6b88fb305f
commit 58a7947223
2 changed files with 207 additions and 1 deletions

View File

@ -2,9 +2,12 @@ package cluster
import ( import (
"context" "context"
"encoding/base64"
"encoding/json"
"fmt" "fmt"
"log" "log"
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"sort" "sort"
"strconv" "strconv"
@ -118,7 +121,7 @@ func (o *Orchestrator) Startup(ctx context.Context, opts StartupOptions) (err er
} }
if !opts.SkipLocalBootstrap && needsLocalBootstrap { if !opts.SkipLocalBootstrap && needsLocalBootstrap {
if ready, err := o.waitForFluxSourceReady(ctx, 2*time.Minute); err != nil { if ready, err := o.waitForFluxSourceReady(ctx, 5*time.Minute); err != nil {
o.log.Printf("warning: flux source readiness wait failed before local bootstrap: %v", err) o.log.Printf("warning: flux source readiness wait failed before local bootstrap: %v", err)
} else if ready { } else if ready {
o.log.Printf("flux source became ready after targeted recovery; skipping local bootstrap") o.log.Printf("flux source became ready after targeted recovery; skipping local bootstrap")
@ -483,6 +486,26 @@ func (o *Orchestrator) run(ctx context.Context, timeout time.Duration, name stri
return o.runner.Run(runCtx, name, args...) return o.runner.Run(runCtx, name, args...)
} }
func (o *Orchestrator) runSensitive(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
runCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
cmd := exec.CommandContext(runCtx, name, args...)
cmd.Env = os.Environ()
if o.runner.Kubeconfig != "" {
cmd.Env = append(cmd.Env, "KUBECONFIG="+o.runner.Kubeconfig)
}
out, err := cmd.CombinedOutput()
trimmed := strings.TrimSpace(string(out))
if err != nil {
if trimmed == "" {
return "", fmt.Errorf("%s failed: %w", name, err)
}
return trimmed, fmt.Errorf("%s failed: %w", name, err)
}
return trimmed, nil
}
func lines(in string) []string { func lines(in string) []string {
in = strings.TrimSpace(in) in = strings.TrimSpace(in)
if in == "" { if in == "" {
@ -558,6 +581,10 @@ func (o *Orchestrator) ensureWorkloadReplicas(ctx context.Context, w startupWork
} }
func (o *Orchestrator) waitWorkloadReady(ctx context.Context, w startupWorkload) error { func (o *Orchestrator) waitWorkloadReady(ctx context.Context, w startupWorkload) error {
if w.Namespace == "vault" && w.Kind == "statefulset" && w.Name == "vault" {
return o.waitVaultReady(ctx, w)
}
timeout := "240s" timeout := "240s"
if w.Kind == "statefulset" { if w.Kind == "statefulset" {
timeout = "360s" timeout = "360s"
@ -578,6 +605,158 @@ func (o *Orchestrator) waitWorkloadReady(ctx context.Context, w startupWorkload)
return nil return nil
} }
func (o *Orchestrator) waitVaultReady(ctx context.Context, w startupWorkload) error {
if o.runner.DryRun {
return nil
}
deadline := time.Now().Add(7 * time.Minute)
var lastErr error
for time.Now().Before(deadline) {
ready, err := o.workloadReady(ctx, w)
if err == nil && ready {
return nil
}
if err != nil {
lastErr = err
}
if err := o.ensureVaultUnsealed(ctx); err != nil {
lastErr = err
o.log.Printf("warning: vault readiness still blocked: %v", err)
}
ready, err = o.workloadReady(ctx, w)
if err == nil && ready {
return nil
}
if err != nil {
lastErr = err
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(5 * time.Second):
}
}
if lastErr != nil {
return fmt.Errorf("wait ready %s/%s/%s: %w", w.Namespace, w.Kind, w.Name, lastErr)
}
return fmt.Errorf("wait ready %s/%s/%s: timeout", w.Namespace, w.Kind, w.Name)
}
func (o *Orchestrator) ensureVaultUnsealed(ctx context.Context) error {
if o.runner.DryRun {
return nil
}
phase, err := o.kubectl(ctx, 15*time.Second, "-n", "vault", "get", "pod", "vault-0", "-o", "jsonpath={.status.phase}")
if err != nil {
return fmt.Errorf("vault pod phase check failed: %w", err)
}
if strings.TrimSpace(phase) != "Running" {
return fmt.Errorf("vault-0 pod phase is %q", strings.TrimSpace(phase))
}
sealed, err := o.vaultSealed(ctx)
if err != nil {
return err
}
if !sealed {
return nil
}
unsealKey, err := o.vaultUnsealKey(ctx)
if err != nil {
return err
}
for attempt := 1; attempt <= 5; attempt++ {
o.log.Printf("vault is sealed; attempting auto-unseal (%d/5)", attempt)
if _, err := o.runSensitive(
ctx,
30*time.Second,
"kubectl",
"-n", "vault",
"exec", "vault-0", "--",
"vault", "operator", "unseal", unsealKey,
); err != nil {
return fmt.Errorf("vault unseal attempt %d failed: %w", attempt, err)
}
sealed, err = o.vaultSealed(ctx)
if err != nil {
return err
}
if !sealed {
o.log.Printf("vault auto-unseal succeeded")
return nil
}
time.Sleep(2 * time.Second)
}
return fmt.Errorf("vault remained sealed after 5 auto-unseal attempts")
}
func (o *Orchestrator) vaultSealed(ctx context.Context) (bool, error) {
out, err := o.kubectl(
ctx,
25*time.Second,
"-n", "vault",
"exec", "vault-0", "--",
"sh", "-lc",
"VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json 2>/dev/null || true",
)
if err != nil {
return false, fmt.Errorf("vault status check failed: %w", err)
}
sealed, err := parseVaultSealed(out)
if err != nil {
return false, fmt.Errorf("parse vault status: %w", err)
}
return sealed, nil
}
func parseVaultSealed(raw string) (bool, error) {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return false, fmt.Errorf("empty vault status output")
}
type vaultStatus struct {
Sealed bool `json:"sealed"`
}
var st vaultStatus
if err := json.Unmarshal([]byte(trimmed), &st); err != nil {
return false, err
}
return st.Sealed, nil
}
func (o *Orchestrator) vaultUnsealKey(ctx context.Context) (string, error) {
out, err := o.kubectl(
ctx,
15*time.Second,
"-n", "vault",
"get", "secret", "vault-init",
"-o", "jsonpath={.data.unseal_key_b64}",
)
if err != nil {
return "", fmt.Errorf("read vault-init secret: %w", err)
}
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(out))
if err != nil {
return "", fmt.Errorf("decode vault-init unseal_key_b64: %w", err)
}
key := strings.TrimSpace(string(decoded))
if key == "" {
return "", fmt.Errorf("vault-init unseal key is empty")
}
return key, nil
}
func (o *Orchestrator) workloadReady(ctx context.Context, w startupWorkload) (bool, error) { func (o *Orchestrator) workloadReady(ctx context.Context, w startupWorkload) (bool, error) {
out, err := o.kubectl( out, err := o.kubectl(
ctx, ctx,

View File

@ -0,0 +1,27 @@
package cluster
import "testing"
func TestParseVaultSealed(t *testing.T) {
sealed, err := parseVaultSealed(`{"initialized":true,"sealed":true}`)
if err != nil {
t.Fatalf("parse sealed=true: %v", err)
}
if !sealed {
t.Fatalf("expected sealed=true")
}
sealed, err = parseVaultSealed(`{"initialized":true,"sealed":false}`)
if err != nil {
t.Fatalf("parse sealed=false: %v", err)
}
if sealed {
t.Fatalf("expected sealed=false")
}
}
func TestParseVaultSealedRejectsEmpty(t *testing.T) {
if _, err := parseVaultSealed(" "); err == nil {
t.Fatalf("expected parse error for empty status payload")
}
}