ananke/internal/cluster/orchestrator_autorepair_test.go

package cluster

import (
	"context"
	"encoding/base64"
	"errors"
	"io"
	"log"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"scm.bstein.dev/bstein/ananke/internal/config"
	"scm.bstein.dev/bstein/ananke/internal/execx"
	"scm.bstein.dev/bstein/ananke/internal/state"
)

// TestPostStartAutoHealRepairsVaultAndUnavailableNodes runs one orchestration or CLI step.
// Signature: TestPostStartAutoHealRepairsVaultAndUnavailableNodes(t *testing.T).
// Why: covers the new daemon-triggered repair path for late Vault reseals and
// stale terminating pods anchored to unavailable nodes.
func TestPostStartAutoHealRepairsVaultAndUnavailableNodes(t *testing.T) {
	cfg := config.Config{
		Startup: config.Startup{
			DeadNodeCleanupGraceSeconds: 300,
			RequiredNodeLabels: map[string]map[string]string{
				"titan-07": {"node-role.kubernetes.io/worker": "true"},
			},
		},
		State: config.State{
			Dir:            t.TempDir(),
			ReportsDir:     filepath.Join(t.TempDir(), "reports"),
			RunHistoryPath: filepath.Join(t.TempDir(), "runs.json"),
		},
	}
	orch := &Orchestrator{
		cfg:    cfg,
		runner: &execx.Runner{},
		store:  state.New(filepath.Join(t.TempDir(), "runs.json")),
		log:    log.New(io.Discard, "", 0),
	}

	oldDelete := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
	unsealCalls := 0
	jobCreated := false
	reconciled := false
	deleted := map[string]bool{}
	dispatch := func(_ context.Context, _ time.Duration, name string, args ...string) (string, error) {
		if name != "kubectl" {
			return "", nil
		}
		joined := strings.Join(args, " ")
		switch {
		case strings.Contains(joined, "label node titan-07 --overwrite node-role.kubernetes.io/worker=true"):
			return "", nil
		case strings.Contains(joined, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
			return "Running", nil
		case strings.Contains(joined, "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"):
			if unsealCalls == 0 {
				return `{"initialized":true,"sealed":true}`, nil
			}
			return `{"initialized":true,"sealed":false}`, nil
		case strings.Contains(joined, "-n vault get secret vault-init -o jsonpath={.data.unseal_key_b64}"):
			return base64.StdEncoding.EncodeToString([]byte("vault-unseal-key")), nil
		case strings.Contains(joined, "vault operator unseal"):
			unsealCalls++
			return "", nil
		case strings.Contains(joined, "-n vault create job --from=cronjob/vault-k8s-auth-config"):
			jobCreated = true
			return "", nil
		case strings.Contains(joined, "get nodes -o json"):
			return `{"items":[{"metadata":{"name":"titan-22"},"status":{"conditions":[{"type":"Ready","status":"Unknown"}]}},{"metadata":{"name":"titan-07"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`, nil
		case strings.Contains(joined, "get pods -A -o json"):
			return `{"items":[{"metadata":{"namespace":"maintenance","name":"stale-pod","deletionTimestamp":"` + oldDelete + `"},"spec":{"nodeName":"titan-22"}},{"metadata":{"namespace":"logging","name":"healthy-node-pod","deletionTimestamp":"` + oldDelete + `"},"spec":{"nodeName":"titan-18"}}]}`, nil
		case strings.Contains(joined, "-n maintenance delete pod stale-pod --grace-period=0 --force --wait=false"):
			deleted["maintenance/stale-pod"] = true
			return "", nil
		case strings.Contains(joined, "-n flux-system annotate gitrepository flux-system reconcile.fluxcd.io/requestedAt="):
			reconciled = true
			return "", nil
		case strings.Contains(joined, "-n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="):
			return "", nil
		case strings.Contains(joined, "annotate --all-namespaces helmreleases.helm.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="):
			return "", nil
		default:
			return "", nil
		}
	}
	orch.SetCommandOverrides(dispatch, dispatch)

	if err := orch.postStartAutoHeal(context.Background()); err != nil {
		t.Fatalf("postStartAutoHeal failed: %v", err)
	}
	if unsealCalls != 1 {
		t.Fatalf("expected one Vault unseal attempt, got %d", unsealCalls)
	}
	if !jobCreated {
		t.Fatalf("expected vault k8s auth config job to be created")
	}
	if !deleted["maintenance/stale-pod"] {
		t.Fatalf("expected stale unavailable-node pod to be deleted")
	}
	if !reconciled {
		t.Fatalf("expected flux reconcile request after repairs")
	}
	if deleted["logging/healthy-node-pod"] {
		t.Fatalf("did not expect terminating pod on healthy node to be deleted")
	}
}

// TestPostStartAutoHealSkipsWhenClusterIsHealthy runs one orchestration or CLI step.
// Signature: TestPostStartAutoHealSkipsWhenClusterIsHealthy(t *testing.T).
// Why: proves the new post-start repair loop stays quiet when the specific
// failure patterns are absent.
func TestPostStartAutoHealSkipsWhenClusterIsHealthy(t *testing.T) {
	cfg := config.Config{
		Startup: config.Startup{
			DeadNodeCleanupGraceSeconds: 300,
		},
		State: config.State{
			Dir:            t.TempDir(),
			ReportsDir:     filepath.Join(t.TempDir(), "reports"),
			RunHistoryPath: filepath.Join(t.TempDir(), "runs.json"),
		},
	}
	orch := &Orchestrator{
		cfg:    cfg,
		runner: &execx.Runner{},
		store:  state.New(filepath.Join(t.TempDir(), "runs.json")),
		log:    log.New(io.Discard, "", 0),
	}

	unsealCalls := 0
	jobCreated := false
	reconciled := false
	dispatch := func(_ context.Context, _ time.Duration, name string, args ...string) (string, error) {
		if name != "kubectl" {
			return "", nil
		}
		joined := strings.Join(args, " ")
		switch {
		case strings.Contains(joined, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
			return "Running", nil
		case strings.Contains(joined, "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"):
			return `{"initialized":true,"sealed":false}`, nil
		case strings.Contains(joined, "-n vault create job --from=cronjob/vault-k8s-auth-config"):
			jobCreated = true
			return "", nil
		case strings.Contains(joined, "vault operator unseal"):
			unsealCalls++
			return "", nil
		case strings.Contains(joined, "get nodes -o json"):
			return `{"items":[{"metadata":{"name":"titan-07"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`, nil
		case strings.Contains(joined, "get pods -A -o json"):
			return `{"items":[]}`, nil
		case strings.Contains(joined, "reconcile.fluxcd.io/requestedAt="):
			reconciled = true
			return "", nil
		default:
			return "", nil
		}
	}
	orch.SetCommandOverrides(dispatch, dispatch)

	if err := orch.postStartAutoHeal(context.Background()); err != nil {
		t.Fatalf("postStartAutoHeal failed: %v", err)
	}
	if unsealCalls != 0 {
		t.Fatalf("did not expect Vault unseal calls, got %d", unsealCalls)
	}
	if jobCreated {
		t.Fatalf("did not expect vault auth config job creation")
	}
	if reconciled {
		t.Fatalf("did not expect flux reconcile request for healthy cluster")
	}
}

// TestRunPostStartAutoHealDryRun runs one orchestration or CLI step.
// Signature: TestRunPostStartAutoHealDryRun(t *testing.T).
// Why: covers the exported wrapper and the top-level dry-run guard so daemon
// auto-heal never mutates cluster state during rehearsal runs.
func TestRunPostStartAutoHealDryRun(t *testing.T) {
	orch := buildOrchestratorWithStubs(t, config.Config{}, nil)
	orch.runner.DryRun = true

	if err := orch.RunPostStartAutoHeal(context.Background()); err != nil {
		t.Fatalf("RunPostStartAutoHeal dry-run failed: %v", err)
	}
}

// TestPostStartAutoHealAggregatesErrors runs one orchestration or CLI step.
// Signature: TestPostStartAutoHealAggregatesErrors(t *testing.T).
// Why: proves the daemon reports each failed sub-repair together instead of
// hiding later failures behind the first problem.
func TestPostStartAutoHealAggregatesErrors(t *testing.T) {
	cfg := config.Config{
		Startup: config.Startup{
			DeadNodeCleanupGraceSeconds: 300,
			RequiredNodeLabels: map[string]map[string]string{
				"titan-07": {"node-role.kubernetes.io/worker": "true"},
			},
		},
	}
	orch := buildOrchestratorWithStubs(t, cfg, []commandStub{
		{
			match: matchContains("kubectl", "label node titan-07 --overwrite node-role.kubernetes.io/worker=true"),
			err:   errors.New("label failed"),
		},
		{
			match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
			err:   errors.New("vault phase failed"),
		},
		{
			match: matchContains("kubectl", "get nodes -o json"),
			err:   errors.New("node query failed"),
		},
	})

	err := orch.postStartAutoHeal(context.Background())
	if err == nil {
		t.Fatalf("expected aggregated error")
	}
	msg := err.Error()
	for _, want := range []string{
		"required node labels:",
		"vault auto-recovery:",
		"dead-node terminating pod cleanup:",
	} {
		if !strings.Contains(msg, want) {
			t.Fatalf("expected %q in %q", want, msg)
		}
	}
}

// TestAutoRecoverSealedVaultBranches runs one orchestration or CLI step.
// Signature: TestAutoRecoverSealedVaultBranches(t *testing.T).
// Why: late Vault reseals are a high-risk failure path, so the daemon needs
// coverage across the quiet-skip, parse-failure, and unseal-failure branches.
func TestAutoRecoverSealedVaultBranches(t *testing.T) {
	t.Run("dry run skips", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, nil)
		orch.runner.DryRun = true

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if err != nil || recovered {
			t.Fatalf("expected dry-run skip, got recovered=%v err=%v", recovered, err)
		}
	})

	t.Run("pod missing is quiet", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
				err:   errors.New("vault-0 not found"),
			},
		})

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if err != nil || recovered {
			t.Fatalf("expected quiet skip, got recovered=%v err=%v", recovered, err)
		}
	})

	t.Run("phase check error surfaces", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
				err:   errors.New("phase check failed"),
			},
		})

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if recovered || err == nil || !strings.Contains(err.Error(), "vault pod phase check failed") {
			t.Fatalf("expected phase check error, got recovered=%v err=%v", recovered, err)
		}
	})

	t.Run("non-running pod defers", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
				out:   "Pending",
			},
		})

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if err != nil || recovered {
			t.Fatalf("expected pending pod skip, got recovered=%v err=%v", recovered, err)
		}
	})

	t.Run("status parse failure surfaces", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
				out:   "Running",
			},
			{
				match: matchContains("kubectl", "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"),
				out:   "garbage",
			},
		})

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if recovered || err == nil || !strings.Contains(err.Error(), "parse vault status") {
			t.Fatalf("expected parse error, got recovered=%v err=%v", recovered, err)
		}
	})

	t.Run("already unsealed stays quiet", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
				out:   "Running",
			},
			{
				match: matchContains("kubectl", "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"),
				out:   `{"sealed":false}`,
			},
		})

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if err != nil || recovered {
			t.Fatalf("expected already-unsealed skip, got recovered=%v err=%v", recovered, err)
		}
	})

	t.Run("unseal failure surfaces", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),
				out:   "Running",
			},
			{
				match: matchContains("kubectl", "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"),
				out:   `{"sealed":true}`,
			},
			{
				match: matchContains("kubectl", "-n vault get secret vault-init -o jsonpath={.data.unseal_key_b64}"),
				out:   base64.StdEncoding.EncodeToString([]byte("vault-unseal-key")),
			},
			{
				match: matchContains("kubectl", "vault operator unseal"),
				err:   errors.New("exec boom"),
			},
		})

		recovered, err := orch.autoRecoverSealedVault(context.Background())
		if recovered || err == nil || !strings.Contains(err.Error(), "vault unseal attempt 1 failed") {
			t.Fatalf("expected unseal failure, got recovered=%v err=%v", recovered, err)
		}
	})
}

// TestRerunVaultK8sAuthConfigJobBranches runs one orchestration or CLI step.
// Signature: TestRerunVaultK8sAuthConfigJobBranches(t *testing.T).
// Why: the post-unseal auth job is part of the production recovery chain, so
// dry-run and create-error behavior both need explicit coverage.
func TestRerunVaultK8sAuthConfigJobBranches(t *testing.T) {
	t.Run("dry run skips", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, nil)
		orch.runner.DryRun = true
		if err := orch.rerunVaultK8sAuthConfigJob(context.Background()); err != nil {
			t.Fatalf("dry-run rerunVaultK8sAuthConfigJob failed: %v", err)
		}
	})

	t.Run("create error surfaces", func(t *testing.T) {
		orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{
			{
				match: matchContains("kubectl", "-n vault create job --from=cronjob/vault-k8s-auth-config"),
				err:   errors.New("create failed"),
			},
		})
		err := orch.rerunVaultK8sAuthConfigJob(context.Background())
		if err == nil || !strings.Contains(err.Error(), "create job vault-k8s-auth-config-autoheal-") {
			t.Fatalf("expected create-job error, got %v", err)
		}
	})
}
recovery(ananke): auto-heal sealed vault and dead-node drift 2026-05-05 13:24:25 -03:00			`package cluster`

			`import (`
			`"context"`
			`"encoding/base64"`
			`"errors"`
			`"io"`
			`"log"`
			`"path/filepath"`
			`"strings"`
			`"testing"`
			`"time"`

			`"scm.bstein.dev/bstein/ananke/internal/config"`
			`"scm.bstein.dev/bstein/ananke/internal/execx"`
			`"scm.bstein.dev/bstein/ananke/internal/state"`
			`)`

			`// TestPostStartAutoHealRepairsVaultAndUnavailableNodes runs one orchestration or CLI step.`
			`// Signature: TestPostStartAutoHealRepairsVaultAndUnavailableNodes(t *testing.T).`
			`// Why: covers the new daemon-triggered repair path for late Vault reseals and`
			`// stale terminating pods anchored to unavailable nodes.`
			`func TestPostStartAutoHealRepairsVaultAndUnavailableNodes(t *testing.T) {`
			`cfg := config.Config{`
			`Startup: config.Startup{`
			`DeadNodeCleanupGraceSeconds: 300,`
			`RequiredNodeLabels: map[string]map[string]string{`
			`"titan-07": {"node-role.kubernetes.io/worker": "true"},`
			`},`
			`},`
			`State: config.State{`
			`Dir: t.TempDir(),`
			`ReportsDir: filepath.Join(t.TempDir(), "reports"),`
			`RunHistoryPath: filepath.Join(t.TempDir(), "runs.json"),`
			`},`
			`}`
			`orch := &Orchestrator{`
			`cfg: cfg,`
			`runner: &execx.Runner{},`
			`store: state.New(filepath.Join(t.TempDir(), "runs.json")),`
			`log: log.New(io.Discard, "", 0),`
			`}`

			`oldDelete := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)`
			`unsealCalls := 0`
			`jobCreated := false`
			`reconciled := false`
			`deleted := map[string]bool{}`
			`dispatch := func(_ context.Context, _ time.Duration, name string, args ...string) (string, error) {`
			`if name != "kubectl" {`
			`return "", nil`
			`}`
			`joined := strings.Join(args, " ")`
			`switch {`
			`case strings.Contains(joined, "label node titan-07 --overwrite node-role.kubernetes.io/worker=true"):`
			`return "", nil`
			`case strings.Contains(joined, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):`
			`return "Running", nil`
			`case strings.Contains(joined, "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"):`
			`if unsealCalls == 0 {`
			return `{"initialized":true,"sealed":true}`, nil
			`}`
			return `{"initialized":true,"sealed":false}`, nil
			`case strings.Contains(joined, "-n vault get secret vault-init -o jsonpath={.data.unseal_key_b64}"):`
			`return base64.StdEncoding.EncodeToString([]byte("vault-unseal-key")), nil`
			`case strings.Contains(joined, "vault operator unseal"):`
			`unsealCalls++`
			`return "", nil`
			`case strings.Contains(joined, "-n vault create job --from=cronjob/vault-k8s-auth-config"):`
			`jobCreated = true`
			`return "", nil`
			`case strings.Contains(joined, "get nodes -o json"):`
			return `{"items":[{"metadata":{"name":"titan-22"},"status":{"conditions":[{"type":"Ready","status":"Unknown"}]}},{"metadata":{"name":"titan-07"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`, nil
			`case strings.Contains(joined, "get pods -A -o json"):`
			return `{"items":[{"metadata":{"namespace":"maintenance","name":"stale-pod","deletionTimestamp":"` + oldDelete + `"},"spec":{"nodeName":"titan-22"}},{"metadata":{"namespace":"logging","name":"healthy-node-pod","deletionTimestamp":"` + oldDelete + `"},"spec":{"nodeName":"titan-18"}}]}`, nil
			`case strings.Contains(joined, "-n maintenance delete pod stale-pod --grace-period=0 --force --wait=false"):`
			`deleted["maintenance/stale-pod"] = true`
			`return "", nil`
			`case strings.Contains(joined, "-n flux-system annotate gitrepository flux-system reconcile.fluxcd.io/requestedAt="):`
			`reconciled = true`
			`return "", nil`
			`case strings.Contains(joined, "-n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="):`
			`return "", nil`
			`case strings.Contains(joined, "annotate --all-namespaces helmreleases.helm.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="):`
			`return "", nil`
			`default:`
			`return "", nil`
			`}`
			`}`
			`orch.SetCommandOverrides(dispatch, dispatch)`

			`if err := orch.postStartAutoHeal(context.Background()); err != nil {`
			`t.Fatalf("postStartAutoHeal failed: %v", err)`
			`}`
			`if unsealCalls != 1 {`
			`t.Fatalf("expected one Vault unseal attempt, got %d", unsealCalls)`
			`}`
			`if !jobCreated {`
			`t.Fatalf("expected vault k8s auth config job to be created")`
			`}`
			`if !deleted["maintenance/stale-pod"] {`
			`t.Fatalf("expected stale unavailable-node pod to be deleted")`
			`}`
			`if !reconciled {`
			`t.Fatalf("expected flux reconcile request after repairs")`
			`}`
			`if deleted["logging/healthy-node-pod"] {`
			`t.Fatalf("did not expect terminating pod on healthy node to be deleted")`
			`}`
			`}`

			`// TestPostStartAutoHealSkipsWhenClusterIsHealthy runs one orchestration or CLI step.`
			`// Signature: TestPostStartAutoHealSkipsWhenClusterIsHealthy(t *testing.T).`
			`// Why: proves the new post-start repair loop stays quiet when the specific`
			`// failure patterns are absent.`
			`func TestPostStartAutoHealSkipsWhenClusterIsHealthy(t *testing.T) {`
			`cfg := config.Config{`
			`Startup: config.Startup{`
			`DeadNodeCleanupGraceSeconds: 300,`
			`},`
			`State: config.State{`
			`Dir: t.TempDir(),`
			`ReportsDir: filepath.Join(t.TempDir(), "reports"),`
			`RunHistoryPath: filepath.Join(t.TempDir(), "runs.json"),`
			`},`
			`}`
			`orch := &Orchestrator{`
			`cfg: cfg,`
			`runner: &execx.Runner{},`
			`store: state.New(filepath.Join(t.TempDir(), "runs.json")),`
			`log: log.New(io.Discard, "", 0),`
			`}`

			`unsealCalls := 0`
			`jobCreated := false`
			`reconciled := false`
			`dispatch := func(_ context.Context, _ time.Duration, name string, args ...string) (string, error) {`
			`if name != "kubectl" {`
			`return "", nil`
			`}`
			`joined := strings.Join(args, " ")`
			`switch {`
			`case strings.Contains(joined, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):`
			`return "Running", nil`
			`case strings.Contains(joined, "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"):`
			return `{"initialized":true,"sealed":false}`, nil
			`case strings.Contains(joined, "-n vault create job --from=cronjob/vault-k8s-auth-config"):`
			`jobCreated = true`
			`return "", nil`
			`case strings.Contains(joined, "vault operator unseal"):`
			`unsealCalls++`
			`return "", nil`
			`case strings.Contains(joined, "get nodes -o json"):`
			return `{"items":[{"metadata":{"name":"titan-07"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`, nil
			`case strings.Contains(joined, "get pods -A -o json"):`
			return `{"items":[]}`, nil
			`case strings.Contains(joined, "reconcile.fluxcd.io/requestedAt="):`
			`reconciled = true`
			`return "", nil`
			`default:`
			`return "", nil`
			`}`
			`}`
			`orch.SetCommandOverrides(dispatch, dispatch)`

			`if err := orch.postStartAutoHeal(context.Background()); err != nil {`
			`t.Fatalf("postStartAutoHeal failed: %v", err)`
			`}`
			`if unsealCalls != 0 {`
			`t.Fatalf("did not expect Vault unseal calls, got %d", unsealCalls)`
			`}`
			`if jobCreated {`
			`t.Fatalf("did not expect vault auth config job creation")`
			`}`
			`if reconciled {`
			`t.Fatalf("did not expect flux reconcile request for healthy cluster")`
			`}`
			`}`

			`// TestRunPostStartAutoHealDryRun runs one orchestration or CLI step.`
			`// Signature: TestRunPostStartAutoHealDryRun(t *testing.T).`
			`// Why: covers the exported wrapper and the top-level dry-run guard so daemon`
			`// auto-heal never mutates cluster state during rehearsal runs.`
			`func TestRunPostStartAutoHealDryRun(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, nil)`
			`orch.runner.DryRun = true`

			`if err := orch.RunPostStartAutoHeal(context.Background()); err != nil {`
			`t.Fatalf("RunPostStartAutoHeal dry-run failed: %v", err)`
			`}`
			`}`

			`// TestPostStartAutoHealAggregatesErrors runs one orchestration or CLI step.`
			`// Signature: TestPostStartAutoHealAggregatesErrors(t *testing.T).`
			`// Why: proves the daemon reports each failed sub-repair together instead of`
			`// hiding later failures behind the first problem.`
			`func TestPostStartAutoHealAggregatesErrors(t *testing.T) {`
			`cfg := config.Config{`
			`Startup: config.Startup{`
			`DeadNodeCleanupGraceSeconds: 300,`
			`RequiredNodeLabels: map[string]map[string]string{`
			`"titan-07": {"node-role.kubernetes.io/worker": "true"},`
			`},`
			`},`
			`}`
			`orch := buildOrchestratorWithStubs(t, cfg, []commandStub{`
			`{`
			`match: matchContains("kubectl", "label node titan-07 --overwrite node-role.kubernetes.io/worker=true"),`
			`err: errors.New("label failed"),`
			`},`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`err: errors.New("vault phase failed"),`
			`},`
			`{`
			`match: matchContains("kubectl", "get nodes -o json"),`
			`err: errors.New("node query failed"),`
			`},`
			`})`

			`err := orch.postStartAutoHeal(context.Background())`
			`if err == nil {`
			`t.Fatalf("expected aggregated error")`
			`}`
			`msg := err.Error()`
			`for _, want := range []string{`
			`"required node labels:",`
			`"vault auto-recovery:",`
			`"dead-node terminating pod cleanup:",`
			`} {`
			`if !strings.Contains(msg, want) {`
			`t.Fatalf("expected %q in %q", want, msg)`
			`}`
			`}`
			`}`

			`// TestAutoRecoverSealedVaultBranches runs one orchestration or CLI step.`
			`// Signature: TestAutoRecoverSealedVaultBranches(t *testing.T).`
			`// Why: late Vault reseals are a high-risk failure path, so the daemon needs`
			`// coverage across the quiet-skip, parse-failure, and unseal-failure branches.`
			`func TestAutoRecoverSealedVaultBranches(t *testing.T) {`
			`t.Run("dry run skips", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, nil)`
			`orch.runner.DryRun = true`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if err != nil \|\| recovered {`
			`t.Fatalf("expected dry-run skip, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`

			`t.Run("pod missing is quiet", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`err: errors.New("vault-0 not found"),`
			`},`
			`})`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if err != nil \|\| recovered {`
			`t.Fatalf("expected quiet skip, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`

			`t.Run("phase check error surfaces", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`err: errors.New("phase check failed"),`
			`},`
			`})`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if recovered \|\| err == nil \|\| !strings.Contains(err.Error(), "vault pod phase check failed") {`
			`t.Fatalf("expected phase check error, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`

			`t.Run("non-running pod defers", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`out: "Pending",`
			`},`
			`})`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if err != nil \|\| recovered {`
			`t.Fatalf("expected pending pod skip, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`

			`t.Run("status parse failure surfaces", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`out: "Running",`
			`},`
			`{`
			`match: matchContains("kubectl", "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"),`
			`out: "garbage",`
			`},`
			`})`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if recovered \|\| err == nil \|\| !strings.Contains(err.Error(), "parse vault status") {`
			`t.Fatalf("expected parse error, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`

			`t.Run("already unsealed stays quiet", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`out: "Running",`
			`},`
			`{`
			`match: matchContains("kubectl", "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"),`
			out: `{"sealed":false}`,
			`},`
			`})`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if err != nil \|\| recovered {`
			`t.Fatalf("expected already-unsealed skip, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`

			`t.Run("unseal failure surfaces", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault get pod vault-0 -o jsonpath={.status.phase}"),`
			`out: "Running",`
			`},`
			`{`
			`match: matchContains("kubectl", "VAULT_ADDR=http://127.0.0.1:8200 vault status -format=json"),`
			out: `{"sealed":true}`,
			`},`
			`{`
			`match: matchContains("kubectl", "-n vault get secret vault-init -o jsonpath={.data.unseal_key_b64}"),`
			`out: base64.StdEncoding.EncodeToString([]byte("vault-unseal-key")),`
			`},`
			`{`
			`match: matchContains("kubectl", "vault operator unseal"),`
			`err: errors.New("exec boom"),`
			`},`
			`})`

			`recovered, err := orch.autoRecoverSealedVault(context.Background())`
			`if recovered \|\| err == nil \|\| !strings.Contains(err.Error(), "vault unseal attempt 1 failed") {`
			`t.Fatalf("expected unseal failure, got recovered=%v err=%v", recovered, err)`
			`}`
			`})`
			`}`

			`// TestRerunVaultK8sAuthConfigJobBranches runs one orchestration or CLI step.`
			`// Signature: TestRerunVaultK8sAuthConfigJobBranches(t *testing.T).`
			`// Why: the post-unseal auth job is part of the production recovery chain, so`
			`// dry-run and create-error behavior both need explicit coverage.`
			`func TestRerunVaultK8sAuthConfigJobBranches(t *testing.T) {`
			`t.Run("dry run skips", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, nil)`
			`orch.runner.DryRun = true`
			`if err := orch.rerunVaultK8sAuthConfigJob(context.Background()); err != nil {`
			`t.Fatalf("dry-run rerunVaultK8sAuthConfigJob failed: %v", err)`
			`}`
			`})`

			`t.Run("create error surfaces", func(t *testing.T) {`
			`orch := buildOrchestratorWithStubs(t, config.Config{}, []commandStub{`
			`{`
			`match: matchContains("kubectl", "-n vault create job --from=cronjob/vault-k8s-auth-config"),`
			`err: errors.New("create failed"),`
			`},`
			`})`
			`err := orch.rerunVaultK8sAuthConfigJob(context.Background())`
			`if err == nil \|\| !strings.Contains(err.Error(), "create job vault-k8s-auth-config-autoheal-") {`
			`t.Fatalf("expected create-job error, got %v", err)`
			`}`
			`})`
			`}`