package orchestrator import ( "context" "errors" "io" "log" "os" "path/filepath" "strings" "testing" "time" "scm.bstein.dev/bstein/ananke/internal/cluster" "scm.bstein.dev/bstein/ananke/internal/execx" "scm.bstein.dev/bstein/ananke/internal/state" ) // TestHookLowFileCoverageBoost runs one orchestration or CLI step. // Signature: TestHookLowFileCoverageBoost(t *testing.T). // Why: raises the low-coverage orchestrator files through deterministic top-level // tests that only use the exported hook surface. func TestHookLowFileCoverageBoost(t *testing.T) { t.Run("workload-convergence-helpers-and-gates", func(t *testing.T) { desiredCases := []struct { kind string has bool rep int32 ready int32 sched int32 num int32 wantD int32 wantR int32 wantB bool }{ {kind: "Deployment", has: false, ready: 1, wantD: 1, wantR: 1, wantB: true}, {kind: "deployment", has: true, rep: 3, ready: 2, wantD: 3, wantR: 2, wantB: true}, {kind: "daemonset", sched: 4, num: 3, wantD: 4, wantR: 3, wantB: true}, {kind: "job", wantD: 0, wantR: 0, wantB: false}, } for _, tc := range desiredCases { gotD, gotR, gotB := cluster.TestHookDesiredReady(tc.kind, tc.has, tc.rep, tc.ready, tc.sched, tc.num) if gotD != tc.wantD || gotR != tc.wantR || gotB != tc.wantB { t.Fatalf("desiredReady(%q)=%d,%d,%v want %d,%d,%v", tc.kind, gotD, gotR, gotB, tc.wantD, tc.wantR, tc.wantB) } } if !cluster.TestHookPodControllerOwned([]string{"ReplicaSet"}) { t.Fatalf("expected ReplicaSet owner to be controller-owned") } if !cluster.TestHookPodControllerOwned([]string{"StatefulSet"}) { t.Fatalf("expected StatefulSet owner to be controller-owned") } if !cluster.TestHookPodControllerOwned([]string{"DaemonSet"}) { t.Fatalf("expected DaemonSet owner to be controller-owned") } if cluster.TestHookPodControllerOwned([]string{"Job"}) { t.Fatalf("expected Job owner to be non controller-owned") } if got := cluster.TestHookStuckContainerReason([]string{"ImagePullBackOff"}, nil, []string{"ImagePullBackOff"}); got != "ImagePullBackOff" { t.Fatalf("expected init-container stuck reason, got %q", got) } if got := cluster.TestHookStuckContainerReason(nil, []string{"CrashLoopBackOff"}, []string{"CrashLoopBackOff"}); got != "CrashLoopBackOff" { t.Fatalf("expected container stuck reason, got %q", got) } if got := cluster.TestHookStuckContainerReason([]string{"ImagePullBackOff"}, []string{"CrashLoopBackOff"}, []string{"Missing"}); got != "" { t.Fatalf("expected filtered stuck reason to be empty, got %q", got) } vaultCases := []struct { name string phase string inject bool startedAgo time.Duration grace time.Duration want string }{ {name: "phase-running", phase: "Running", inject: true, startedAgo: 10 * time.Minute, grace: time.Minute, want: ""}, {name: "inject-false", phase: "Pending", inject: false, startedAgo: 10 * time.Minute, grace: time.Minute, want: ""}, {name: "within-grace", phase: "Pending", inject: true, startedAgo: 30 * time.Second, grace: time.Minute, want: ""}, {name: "stuck", phase: "Pending", inject: true, startedAgo: 10 * time.Minute, grace: time.Minute, want: "VaultInitStuck"}, } for _, tc := range vaultCases { got := cluster.TestHookStuckVaultInitReason(tc.phase, tc.inject, tc.startedAgo, tc.grace) if got != tc.want { t.Fatalf("%s: stuckVaultInitReason=%q want %q", tc.name, got, tc.want) } } cfg := lifecycleConfig(t) cfg.Startup.WorkloadConvergenceWaitSeconds = 1 cfg.Startup.WorkloadConvergencePollSeconds = 1 cfg.Startup.StuckPodGraceSeconds = 1 cfg.Startup.IgnoreWorkloadNamespaces = []string{"ignored-ns"} cfg.Startup.IgnoreUnavailableNodes = []string{"titan-22"} cfg.Startup.IgnoreWorkloads = []string{"monitoring/deployment/ignore-me"} cfg.Startup.IgnoreFluxKustomizations = []string{"ignored/flux-system"} readyRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"): return `{"items":[ {"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1,"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"titan-23"}}}},"status":{"readyReplicas":1}}, {"kind":"DaemonSet","metadata":{"namespace":"monitoring","name":"node-exporter"},"spec":{"template":{"spec":{"nodeName":"titan-23"}}},"status":{"desiredNumberScheduled":2,"numberReady":1}}, {"kind":"Deployment","metadata":{"namespace":"ignored-ns","name":"skip"},"spec":{"replicas":1},"status":{"readyReplicas":0}}, {"kind":"Deployment","metadata":{"namespace":"flux-system","name":"ignored"},"spec":{"replicas":1},"status":{"readyReplicas":0}}, {"kind":"Deployment","metadata":{"namespace":"monitoring","name":"ignore-me"},"spec":{"replicas":1},"status":{"readyReplicas":0}} ]}`, nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchReady, _ := newHookOrchestrator(t, cfg, readyRun, readyRun) ready, detail, err := orchReady.TestHookWorkloadConvergenceReady(context.Background()) if err != nil || !ready || !strings.Contains(detail, "controllers ready=") { t.Fatalf("expected workload convergence ready path, ready=%v detail=%q err=%v", ready, detail, err) } pendingRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"): return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":0}}]}`, nil case name == "kubectl" && strings.Contains(command, "get pods -A -o json"): return `{"items":[]}`, nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPending, _ := newHookOrchestrator(t, cfg, pendingRun, pendingRun) if err := orchPending.TestHookWaitForWorkloadConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "workload convergence not satisfied") { t.Fatalf("expected workload convergence timeout, got %v", err) } podRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get pods -A -o json"): return `{"items":[ {"metadata":{"namespace":"","name":"missing-ns"}}, {"metadata":{"namespace":"kube-system","name":"ignored","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"ignored"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}, {"metadata":{"namespace":"monitoring","name":"ignore-me","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"ignore-me"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}, {"metadata":{"namespace":"monitoring","name":"node-ignored","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"node-ignored"}]},"spec":{"nodeName":"titan-22","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}, {"metadata":{"namespace":"monitoring","name":"unowned","creationTimestamp":"2020-01-01T00:00:00Z"},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}, {"metadata":{"namespace":"monitoring","name":"recent","creationTimestamp":"` + time.Now().UTC().Format(time.RFC3339) + `","ownerReferences":[{"kind":"ReplicaSet","name":"recent"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}, {"metadata":{"namespace":"monitoring","name":"grafana-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"grafana"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}, {"metadata":{"namespace":"vault","name":"vault-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"StatefulSet","name":"vault"}],"annotations":{"vault.hashicorp.com/agent-inject":"true"}},"spec":{"nodeName":"titan-23","containers":[{"name":"vault"}]},"status":{"phase":"Pending","initContainerStatuses":[{"name":"vault-agent-init","state":{"running":{"startedAt":"2020-01-01T00:00:00Z"}}}]}} ]}`, nil case name == "kubectl" && strings.Contains(command, "delete pod grafana-0"): return "", errors.New("boom") case name == "kubectl" && strings.Contains(command, "delete pod vault-0"): return "", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPods, _ := newHookOrchestrator(t, cfg, podRun, podRun) if err := orchPods.TestHookRecycleStuckControllerPods(context.Background()); err != nil { t.Fatalf("expected recycleStuckControllerPods best-effort success, got %v", err) } }) t.Run("scaling-helpers-and-recovery", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.ExcludedNamespaces = []string{"flux-system", "vault"} explicit := []string{"worker-a", "worker-b"} cfg.Workers = append([]string{}, explicit...) orchWorkers, _ := newHookOrchestrator(t, cfg, nil, nil) gotWorkers, err := orchWorkers.TestHookEffectiveWorkers(context.Background()) if err != nil || len(gotWorkers) != len(explicit) || gotWorkers[0] != explicit[0] || gotWorkers[1] != explicit[1] { t.Fatalf("expected explicit workers copy, got %v err=%v", gotWorkers, err) } gotWorkers[0] = "mutated" if cfg.Workers[0] != explicit[0] { t.Fatalf("expected effectiveWorkers to return a copy") } cfg.Workers = nil cfg.SSHManagedNodes = nil cfg.SSHNodeHosts = map[string]string{ "worker-c": "worker-c", "worker-b": "worker-b", "worker-a": "worker-a", "titan-db": "titan-db", } discoverErrRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get nodes -o custom-columns=") { return "", errors.New("nodes unavailable") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchFallback, _ := newHookOrchestrator(t, cfg, discoverErrRun, discoverErrRun) fallbackWorkers, err := orchFallback.TestHookEffectiveWorkers(context.Background()) if err != nil || strings.Join(fallbackWorkers, ",") != "worker-a,worker-b,worker-c" { t.Fatalf("expected fallback workers, got %v err=%v", fallbackWorkers, err) } discoverRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get nodes -o custom-columns="): return "titan-db \nworker-b \nworker-c control-plane \nbadline\n", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchDiscover, _ := newHookOrchestrator(t, lifecycleConfig(t), discoverRun, discoverRun) discovered, err := orchDiscover.TestHookDiscoverWorkers(context.Background()) if err != nil || strings.Join(discovered, ",") != "titan-db,worker-b" { t.Fatalf("expected discovered workers, got %v err=%v", discovered, err) } cfgPatch := lifecycleConfig(t) patchRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "-n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath="): return "services\nignored\n", nil case name == "kubectl" && strings.Contains(command, "get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath="): return "monitoring/grafana\nmonitoring/failing\n", nil case name == "kubectl" && strings.Contains(command, "-n flux-system patch kustomization services"): return "", errors.New("patch failed") case name == "kubectl" && strings.Contains(command, "-n flux-system patch kustomization ignored"): return "", nil case name == "kubectl" && strings.Contains(command, "-n monitoring patch helmrelease grafana"): return "", nil case name == "kubectl" && strings.Contains(command, "-n monitoring patch helmrelease failing"): return "", errors.New("patch failed") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPatch, _ := newHookOrchestrator(t, cfgPatch, patchRun, patchRun) if err := orchPatch.TestHookPatchFluxSuspendAll(context.Background(), true); err != nil { t.Fatalf("expected patchFluxSuspendAll best-effort success, got %v", err) } listRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="): return "monitoring\tgrafana\t2\nflux-system\tsource-controller\t1\nmonitoring\tbad\tbogus\nmonitoring\tempty\t0\n", nil case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="): return "monitoring\tvictoria-metrics-single-server\t3\nvault\tvault\t1\n", nil case name == "kubectl" && strings.Contains(command, "scale deployment grafana --replicas=0"): return "", errors.New("scale failed") case name == "kubectl" && strings.Contains(command, "scale deployment grafana --replicas=1"): return "", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchList, _ := newHookOrchestrator(t, cfgPatch, listRun, listRun) entries, err := orchList.TestHookListScalableWorkloads(context.Background()) if err != nil || len(entries) != 4 { t.Fatalf("expected four scalable workloads, got %v err=%v", entries, err) } if err := orchList.TestHookScaleWorkloads(context.Background(), entries[:1], 0, 0); err != nil { t.Fatalf("expected single-entry scaleWorkloads success, got %v", err) } if err := orchList.TestHookScaleWorkloads(context.Background(), entries[1:], 0, 0); err == nil || !strings.Contains(err.Error(), "scaling had") { t.Fatalf("expected scaleWorkloads error aggregation, got %v", err) } if err := orchList.TestHookScaleWorkloads(context.Background(), nil, 0, 1); err != nil { t.Fatalf("expected empty scaleWorkloads success, got %v", err) } orchSnapshotWrite, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil) if err := orchSnapshotWrite.TestHookWriteScaledWorkloadSnapshot(nil); err != nil { t.Fatalf("expected snapshot write with empty entries, got %v", err) } orchSnapshotRead, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil) if snapshot, err := orchSnapshotRead.TestHookReadScaledWorkloadSnapshot(); err != nil || snapshot != nil { t.Fatalf("expected missing snapshot to read as nil,nil, got snapshot=%v err=%v", snapshot, err) } manualCfg := lifecycleConfig(t) manualCfg.State.Dir = filepath.Join(t.TempDir(), "state") manualOrch := cluster.New(manualCfg, &execx.Runner{DryRun: true}, state.New(manualCfg.State.RunHistoryPath), log.New(io.Discard, "", 0)) if err := manualOrch.TestHookWriteScaledWorkloadSnapshot(nil); err != nil { t.Fatalf("expected dry-run snapshot write success, got %v", err) } if snapshot, err := manualOrch.TestHookReadScaledWorkloadSnapshot(); err != nil || snapshot != nil { t.Fatalf("expected dry-run snapshot read to return nil,nil, got snapshot=%v err=%v", snapshot, err) } restorePath := filepath.Join(t.TempDir(), "state", "scaled-workloads.json") if err := os.MkdirAll(filepath.Dir(restorePath), 0o755); err != nil { t.Fatalf("mkdir restore path: %v", err) } valid := `{"generated_at":"2026-01-01T00:00:00Z","entries":[{"namespace":"monitoring","kind":"deployment","name":"grafana","replicas":1}]}` if err := os.WriteFile(restorePath, []byte(valid), 0o600); err != nil { t.Fatalf("write restore snapshot: %v", err) } restoreCfg := lifecycleConfig(t) restoreCfg.State.Dir = filepath.Dir(restorePath) restoreOrch, _ := newHookOrchestrator(t, restoreCfg, listRun, listRun) if err := restoreOrch.TestHookRestoreScaledApps(context.Background()); err != nil { t.Fatalf("expected restoreScaledApps success, got %v", err) } if _, err := os.Stat(restorePath); !os.IsNotExist(err) { t.Fatalf("expected restore snapshot to be removed, stat err=%v", err) } pendingCfg := lifecycleConfig(t) pendingCfg.Startup.WorkloadConvergenceWaitSeconds = 1 pendingCfg.Startup.WorkloadConvergencePollSeconds = 1 pendingRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"): return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":0}}]}`, nil case name == "kubectl" && strings.Contains(command, "get pods -A -o json"): return `{"items":[]}`, nil case name == "kubectl" && strings.Contains(command, "scale deployment grafana --replicas=0"): return "", errors.New("scale failed") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPendingScale, _ := newHookOrchestrator(t, pendingCfg, pendingRun, pendingRun) if err := orchPendingScale.TestHookScaleDownApps(context.Background()); err == nil { t.Fatalf("expected scaleDownApps to fail when workloads stay pending") } }) t.Run("critical-vault-and-flux-health-helpers", func(t *testing.T) { fluxCfg := lifecycleConfig(t) fluxCfg.Startup.FluxHealthWaitSeconds = 1 fluxCfg.Startup.FluxHealthPollSeconds = 1 fluxCfg.Startup.IgnoreFluxKustomizations = []string{"flux-system/ignored"} fluxRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json"): return `{"items":[ {"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false,"timeout":"40s"},"status":{"conditions":[{"type":"Ready","status":"False","message":"syncing"}]}}, {"metadata":{"namespace":"flux-system","name":"no-condition"},"spec":{"suspend":false,"timeout":"10s"},"status":{"conditions":[{"type":"Reconciling","status":"True","message":"still"}]}}, {"metadata":{"namespace":"flux-system","name":"ignored"},"spec":{"suspend":false,"timeout":"5s"},"status":{"conditions":[{"type":"Ready","status":"False","message":"ignored"}]}}, {"metadata":{"namespace":"flux-system","name":"suspended"},"spec":{"suspend":true,"timeout":"2m"},"status":{"conditions":[{"type":"Ready","status":"False","message":"skip"}]}}, {"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"suspend":false,"timeout":"5m"},"status":{"conditions":[{"type":"Ready","status":"True","message":"ok"}]}} ]}`, nil case name == "kubectl" && strings.Contains(command, "get jobs -A -o json"): return `{"items":[ {"metadata":{"namespace":"flux-system","name":"job-a","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}}, {"metadata":{"namespace":"flux-system","name":"job-b","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}}, {"metadata":{"namespace":"flux-system","name":"cronjob-owned","ownerReferences":[{"kind":"CronJob","name":"cron"}]},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}}, {"metadata":{"namespace":"flux-system","name":"succeeded"},"status":{"succeeded":1,"failed":1,"conditions":[{"type":"Failed","status":"True"}]}} ]}`, nil case name == "kubectl" && strings.Contains(command, "delete job job-a"): return "", nil case name == "kubectl" && strings.Contains(command, "delete job job-b"): return "", errors.New("boom") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchFlux, _ := newHookOrchestrator(t, fluxCfg, fluxRun, fluxRun) wait, reason, err := orchFlux.TestHookAdaptiveFluxHealthWait(context.Background(), 30*time.Second) if err != nil || wait <= 30*time.Second || !strings.Contains(reason, "max flux timeout") { t.Fatalf("expected adaptive flux wait extension, wait=%s reason=%q err=%v", wait, reason, err) } noTimeoutCfg := lifecycleConfig(t) noTimeoutRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json"): return `{"items":[{"metadata":{"namespace":"flux-system","name":"ready"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Ready","status":"True","message":"ok"}]}}]}`, nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchNoTimeout, _ := newHookOrchestrator(t, noTimeoutCfg, noTimeoutRun, noTimeoutRun) if wait, reason, err := orchNoTimeout.TestHookAdaptiveFluxHealthWait(context.Background(), 0); err != nil || wait < 15*time.Minute || !strings.Contains(reason, "no explicit kustomization timeouts found") { t.Fatalf("expected adaptive wait fallback, wait=%s reason=%q err=%v", wait, reason, err) } if ready, detail, err := orchFlux.TestHookFluxHealthReady(context.Background()); err != nil || ready || !strings.Contains(detail, "not ready") { t.Fatalf("expected flux health not-ready result, ready=%v detail=%q err=%v", ready, detail, err) } if ready, detail, err := orchNoTimeout.TestHookFluxHealthReady(context.Background()); err != nil || !ready || !strings.Contains(detail, "all kustomizations ready=") { t.Fatalf("expected flux health ready result, ready=%v detail=%q err=%v", ready, detail, err) } if !cluster.TestHookLooksLikeImmutableJobError("Job update failed: field is immutable") { t.Fatalf("expected immutable-job detector true") } if cluster.TestHookLooksLikeImmutableJobError("") { t.Fatalf("expected empty immutable-job detail to be false") } if !cluster.TestHookJobLooksFluxManaged("flux-system", "job-a", map[string]string{"kustomize.toolkit.fluxcd.io/name": "services"}, nil) { t.Fatalf("expected label-based flux-managed job") } if cluster.TestHookJobLooksFluxManaged("flux-system", "cronjob-owned", nil, []string{"CronJob"}) { t.Fatalf("expected CronJob-owned job to be non flux-managed") } if !cluster.TestHookJobFailed(1, 0, []string{"Failed"}, []string{"True"}) { t.Fatalf("expected failed job detector to be true") } if cluster.TestHookJobFailed(0, 1, []string{"Complete"}, []string{"True"}) { t.Fatalf("expected succeeded job to be false") } if healed, err := orchFlux.TestHookHealImmutableFluxJobs(context.Background()); err != nil || !healed { t.Fatalf("expected immutable job heal success, healed=%v err=%v", healed, err) } critCfg := lifecycleConfig(t) critCfg.Startup.VaultUnsealKeyFile = filepath.Join(t.TempDir(), "vault", "unseal.key") critCfg.Startup.VaultUnsealBreakglassCommand = "echo breakglass-key" critCfg.Startup.VaultUnsealBreakglassTimeout = 1 critCfg.Startup.WorkloadConvergenceWaitSeconds = 1 critCfg.Startup.WorkloadConvergencePollSeconds = 1 critCfg.Startup.StuckPodGraceSeconds = 1 critCfg.Startup.IgnoreWorkloadNamespaces = []string{"kube-system"} critCfg.Startup.IgnoreUnavailableNodes = []string{"titan-22"} critCfg.Startup.IgnoreWorkloads = []string{"monitoring/deployment/ignore-me"} critRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "-n flux-system get deployment source-controller -o jsonpath={.status.readyReplicas}"): return "", errors.New("boom") case name == "kubectl" && strings.Contains(command, "-n flux-system scale deployment source-controller --replicas=1"): return "", errors.New("boom") case name == "kubectl" && strings.Contains(command, "-n vault get pods -o custom-columns="): return "vault-0 Pending StatefulSet vault\nvault-1 Unknown StatefulSet vault\nvault-2 Running StatefulSet vault\nvault-other Failed Deployment vault\nbadline\n", nil case name == "kubectl" && strings.Contains(command, "delete pod vault-0"): return "", nil case name == "kubectl" && strings.Contains(command, "delete pod vault-1"): return "", errors.New("boom") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchCrit, _ := newHookOrchestrator(t, critCfg, critRun, critRun) if missing, err := orchCrit.TestHookMissingCriticalStartupWorkloads(context.Background()); err == nil || len(missing) != 0 { t.Fatalf("expected missingCriticalStartupWorkloads generic error, missing=%v err=%v", missing, err) } if err := orchCrit.TestHookEnsureCriticalStartupWorkloads(context.Background()); err == nil || !strings.Contains(err.Error(), "scale") { t.Fatalf("expected ensureCriticalStartupWorkloads scale error, got %v", err) } if err := orchCrit.TestHookCleanupStaleCriticalWorkloadPods(context.Background(), "vault", "statefulset", "vault"); err == nil { t.Fatalf("expected stale critical workload cleanup error branch") } vaultCfg := lifecycleConfig(t) vaultCfg.Startup.VaultUnsealKeyFile = filepath.Join(t.TempDir(), "vault", "unseal.key") vaultCfg.Startup.VaultUnsealBreakglassCommand = "echo breakglass-key" vaultCfg.Startup.VaultUnsealBreakglassTimeout = 1 ensureUnsealed := false ensureRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"): return "Running", nil case name == "kubectl" && strings.Contains(command, "vault status -format=json"): if ensureUnsealed { return `{"sealed":false}`, nil } return `{"sealed":true}`, nil case name == "kubectl" && strings.Contains(command, "get secret vault-init"): return "dmF1bHQta2V5", nil case name == "kubectl" && strings.Contains(command, "vault operator unseal"): ensureUnsealed = true return "", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchVaultEnsure, _ := newHookOrchestrator(t, vaultCfg, ensureRun, ensureRun) if err := orchVaultEnsure.TestHookEnsureVaultUnsealed(context.Background()); err != nil { t.Fatalf("expected vault auto-unseal success path, got %v", err) } if sealed, err := orchVaultEnsure.TestHookVaultSealed(context.Background()); err != nil || sealed { t.Fatalf("expected vault sealed helper false after unseal, sealed=%v err=%v", sealed, err) } waitReady := false waitUnsealed := false waitRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"): return "Running", nil case name == "kubectl" && strings.Contains(command, "vault status -format=json"): if waitUnsealed { return `{"sealed":false}`, nil } return `{"sealed":true}`, nil case name == "kubectl" && strings.Contains(command, "get secret vault-init"): return "dmF1bHQta2V5", nil case name == "kubectl" && strings.Contains(command, "vault operator unseal"): waitUnsealed = true waitReady = true return "", nil case name == "kubectl" && strings.Contains(command, "get statefulset vault -o jsonpath={.status.readyReplicas}"): if waitReady { return "1", nil } return "0", nil case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="): return "monitoring\tgrafana\t1\n", nil case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="): return "monitoring\tvictoria-metrics-single-server\t1\n", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchVaultWait, _ := newHookOrchestrator(t, vaultCfg, waitRun, waitRun) if err := orchVaultWait.TestHookWaitVaultReady(context.Background(), "vault", "statefulset", "vault"); err != nil { t.Fatalf("expected vault wait success path, got %v", err) } if err := orchVaultWait.TestHookWaitWorkloadReady(context.Background(), "monitoring", "deployment", "grafana"); err != nil { t.Fatalf("expected generic workload wait success, got %v", err) } if err := orchVaultWait.TestHookWriteVaultUnsealKeyFile("cached-key"); err != nil { t.Fatalf("expected vault key file write success, got %v", err) } if got, err := orchVaultWait.TestHookReadVaultUnsealKeyFile(); err != nil || got != "cached-key" { t.Fatalf("expected vault key file read success, got %q err=%v", got, err) } blockedDir := t.TempDir() blockedFile := filepath.Join(blockedDir, "blocked") if err := os.WriteFile(blockedFile, []byte("x"), 0o600); err != nil { t.Fatalf("write blocked file: %v", err) } blockedCfg := lifecycleConfig(t) blockedCfg.Startup.VaultUnsealKeyFile = filepath.Join(blockedFile, "vault.key") blockedOrch, _ := newHookOrchestrator(t, blockedCfg, nil, nil) if err := blockedOrch.TestHookWriteVaultUnsealKeyFile("x"); err == nil { t.Fatalf("expected vault key dir error") } }) }