hecate(startup): auto-unseal vault during layered recovery
This commit is contained in:
parent
6b88fb305f
commit
58a7947223
@ -2,9 +2,12 @@ package cluster
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -118,7 +121,7 @@ func (o *Orchestrator) Startup(ctx context.Context, opts StartupOptions) (err er
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !opts.SkipLocalBootstrap && needsLocalBootstrap {
|
if !opts.SkipLocalBootstrap && needsLocalBootstrap {
|
||||||
if ready, err := o.waitForFluxSourceReady(ctx, 2*time.Minute); err != nil {
|
if ready, err := o.waitForFluxSourceReady(ctx, 5*time.Minute); err != nil {
|
||||||
o.log.Printf("warning: flux source readiness wait failed before local bootstrap: %v", err)
|
o.log.Printf("warning: flux source readiness wait failed before local bootstrap: %v", err)
|
||||||
} else if ready {
|
} else if ready {
|
||||||
o.log.Printf("flux source became ready after targeted recovery; skipping local bootstrap")
|
o.log.Printf("flux source became ready after targeted recovery; skipping local bootstrap")
|
||||||
@ -483,6 +486,26 @@ func (o *Orchestrator) run(ctx context.Context, timeout time.Duration, name stri
|
|||||||
return o.runner.Run(runCtx, name, args...)
|
return o.runner.Run(runCtx, name, args...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) runSensitive(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
runCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(runCtx, name, args...)
|
||||||
|
cmd.Env = os.Environ()
|
||||||
|
if o.runner.Kubeconfig != "" {
|
||||||
|
cmd.Env = append(cmd.Env, "KUBECONFIG="+o.runner.Kubeconfig)
|
||||||
|
}
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
trimmed := strings.TrimSpace(string(out))
|
||||||
|
if err != nil {
|
||||||
|
if trimmed == "" {
|
||||||
|
return "", fmt.Errorf("%s failed: %w", name, err)
|
||||||
|
}
|
||||||
|
return trimmed, fmt.Errorf("%s failed: %w", name, err)
|
||||||
|
}
|
||||||
|
return trimmed, nil
|
||||||
|
}
|
||||||
|
|
||||||
func lines(in string) []string {
|
func lines(in string) []string {
|
||||||
in = strings.TrimSpace(in)
|
in = strings.TrimSpace(in)
|
||||||
if in == "" {
|
if in == "" {
|
||||||
@ -558,6 +581,10 @@ func (o *Orchestrator) ensureWorkloadReplicas(ctx context.Context, w startupWork
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (o *Orchestrator) waitWorkloadReady(ctx context.Context, w startupWorkload) error {
|
func (o *Orchestrator) waitWorkloadReady(ctx context.Context, w startupWorkload) error {
|
||||||
|
if w.Namespace == "vault" && w.Kind == "statefulset" && w.Name == "vault" {
|
||||||
|
return o.waitVaultReady(ctx, w)
|
||||||
|
}
|
||||||
|
|
||||||
timeout := "240s"
|
timeout := "240s"
|
||||||
if w.Kind == "statefulset" {
|
if w.Kind == "statefulset" {
|
||||||
timeout = "360s"
|
timeout = "360s"
|
||||||
@ -578,6 +605,158 @@ func (o *Orchestrator) waitWorkloadReady(ctx context.Context, w startupWorkload)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) waitVaultReady(ctx context.Context, w startupWorkload) error {
|
||||||
|
if o.runner.DryRun {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
deadline := time.Now().Add(7 * time.Minute)
|
||||||
|
var lastErr error
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
ready, err := o.workloadReady(ctx, w)
|
||||||
|
if err == nil && ready {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := o.ensureVaultUnsealed(ctx); err != nil {
|
||||||
|
lastErr = err
|
||||||
|
o.log.Printf("warning: vault readiness still blocked: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ready, err = o.workloadReady(ctx, w)
|
||||||
|
if err == nil && ready {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if lastErr != nil {
|
||||||
|
return fmt.Errorf("wait ready %s/%s/%s: %w", w.Namespace, w.Kind, w.Name, lastErr)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("wait ready %s/%s/%s: timeout", w.Namespace, w.Kind, w.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) ensureVaultUnsealed(ctx context.Context) error {
|
||||||
|
if o.runner.DryRun {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
phase, err := o.kubectl(ctx, 15*time.Second, "-n", "vault", "get", "pod", "vault-0", "-o", "jsonpath={.status.phase}")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("vault pod phase check failed: %w", err)
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(phase) != "Running" {
|
||||||
|
return fmt.Errorf("vault-0 pod phase is %q", strings.TrimSpace(phase))
|
||||||
|
}
|
||||||
|
|
||||||
|
sealed, err := o.vaultSealed(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !sealed {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
unsealKey, err := o.vaultUnsealKey(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for attempt := 1; attempt <= 5; attempt++ {
|
||||||
|
o.log.Printf("vault is sealed; attempting auto-unseal (%d/5)", attempt)
|
||||||
|
if _, err := o.runSensitive(
|
||||||
|
ctx,
|
||||||
|
30*time.Second,
|
||||||
|
"kubectl",
|
||||||
|
"-n", "vault",
|
||||||
|
"exec", "vault-0", "--",
|
||||||
|
"vault", "operator", "unseal", unsealKey,
|
||||||
|
); err != nil {
|
||||||
|
return fmt.Errorf("vault unseal attempt %d failed: %w", attempt, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sealed, err = o.vaultSealed(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !sealed {
|
||||||
|
o.log.Printf("vault auto-unseal succeeded")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("vault remained sealed after 5 auto-unseal attempts")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) vaultSealed(ctx context.Context) (bool, error) {
|
||||||
|
out, err := o.kubectl(
|
||||||
|
ctx,
|
||||||
|
25*time.Second,
|
||||||
|
"-n", "vault",
|
||||||
|
"exec", "vault-0", "--",
|
||||||
|
"sh", "-lc",
|
||||||
|
"VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json 2>/dev/null || true",
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("vault status check failed: %w", err)
|
||||||
|
}
|
||||||
|
sealed, err := parseVaultSealed(out)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("parse vault status: %w", err)
|
||||||
|
}
|
||||||
|
return sealed, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseVaultSealed(raw string) (bool, error) {
|
||||||
|
trimmed := strings.TrimSpace(raw)
|
||||||
|
if trimmed == "" {
|
||||||
|
return false, fmt.Errorf("empty vault status output")
|
||||||
|
}
|
||||||
|
|
||||||
|
type vaultStatus struct {
|
||||||
|
Sealed bool `json:"sealed"`
|
||||||
|
}
|
||||||
|
var st vaultStatus
|
||||||
|
if err := json.Unmarshal([]byte(trimmed), &st); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return st.Sealed, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) vaultUnsealKey(ctx context.Context) (string, error) {
|
||||||
|
out, err := o.kubectl(
|
||||||
|
ctx,
|
||||||
|
15*time.Second,
|
||||||
|
"-n", "vault",
|
||||||
|
"get", "secret", "vault-init",
|
||||||
|
"-o", "jsonpath={.data.unseal_key_b64}",
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read vault-init secret: %w", err)
|
||||||
|
}
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(out))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("decode vault-init unseal_key_b64: %w", err)
|
||||||
|
}
|
||||||
|
key := strings.TrimSpace(string(decoded))
|
||||||
|
if key == "" {
|
||||||
|
return "", fmt.Errorf("vault-init unseal key is empty")
|
||||||
|
}
|
||||||
|
return key, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (o *Orchestrator) workloadReady(ctx context.Context, w startupWorkload) (bool, error) {
|
func (o *Orchestrator) workloadReady(ctx context.Context, w startupWorkload) (bool, error) {
|
||||||
out, err := o.kubectl(
|
out, err := o.kubectl(
|
||||||
ctx,
|
ctx,
|
||||||
|
|||||||
27
internal/cluster/orchestrator_test.go
Normal file
27
internal/cluster/orchestrator_test.go
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package cluster
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseVaultSealed(t *testing.T) {
|
||||||
|
sealed, err := parseVaultSealed(`{"initialized":true,"sealed":true}`)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse sealed=true: %v", err)
|
||||||
|
}
|
||||||
|
if !sealed {
|
||||||
|
t.Fatalf("expected sealed=true")
|
||||||
|
}
|
||||||
|
|
||||||
|
sealed, err = parseVaultSealed(`{"initialized":true,"sealed":false}`)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse sealed=false: %v", err)
|
||||||
|
}
|
||||||
|
if sealed {
|
||||||
|
t.Fatalf("expected sealed=false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseVaultSealedRejectsEmpty(t *testing.T) {
|
||||||
|
if _, err := parseVaultSealed(" "); err == nil {
|
||||||
|
t.Fatalf("expected parse error for empty status payload")
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user