package orchestrator import ( "context" "errors" "io" "log" "strings" "testing" "time" "scm.bstein.dev/bstein/ananke/internal/cluster" "scm.bstein.dev/bstein/ananke/internal/execx" "scm.bstein.dev/bstein/ananke/internal/state" ) // TestHookGapMatrixPart7CoverageClosure runs one orchestration or CLI step. // Signature: TestHookGapMatrixPart7CoverageClosure(t *testing.T). // Why: closes additional low-coverage branches in convergence, storage, access, // flux, lifecycle, and sensitive command wrappers. func TestHookGapMatrixPart7CoverageClosure(t *testing.T) { t.Run("workload-convergence-branch-matrix", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.WorkloadConvergenceWaitSeconds = 1 cfg.Startup.WorkloadConvergencePollSeconds = 1 cfg.Startup.IgnoreWorkloadNamespaces = []string{"ignored-ns"} cfg.Startup.IgnoreUnavailableNodes = []string{"titan-22"} cfg.Startup.IgnoreWorkloads = []string{"monitoring/deployment/skip-me"} cfg.Startup.IgnoreFluxKustomizations = []string{"flux-system/ignored"} runReady := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"): return `{"items":[ {"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":1}}, {"kind":"DaemonSet","metadata":{"namespace":"monitoring","name":"node-exporter"},"spec":{"template":{"spec":{"nodeName":"titan-22"}}},"status":{"desiredNumberScheduled":2,"numberReady":1}}, {"kind":"Deployment","metadata":{"namespace":"ignored-ns","name":"ignore-ns"},"spec":{"replicas":1},"status":{"readyReplicas":0}}, {"kind":"Deployment","metadata":{"namespace":"flux-system","name":"ignored"},"spec":{"replicas":1},"status":{"readyReplicas":1}}, {"kind":"Deployment","metadata":{"namespace":"monitoring","name":"skip-me"},"spec":{"replicas":1},"status":{"readyReplicas":0}} ]}`, nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchReady, _ := newHookOrchestrator(t, cfg, runReady, runReady) ready, detail, err := orchReady.TestHookWorkloadConvergenceReady(context.Background()) if err != nil || !ready || !strings.Contains(detail, "controllers ready=") { t.Fatalf("expected workload convergence ready branch, ready=%v detail=%q err=%v", ready, detail, err) } runDecodeErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json") { return "{bad-json", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchDecodeErr, _ := newHookOrchestrator(t, cfg, runDecodeErr, runDecodeErr) if _, _, err := orchDecodeErr.TestHookWorkloadConvergenceReady(context.Background()); err == nil { t.Fatalf("expected workload convergence decode error") } runPending := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"): return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":0}}]}`, nil case name == "kubectl" && strings.Contains(command, "get pods -A -o json"): return `{"items":[]}`, nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPending, _ := newHookOrchestrator(t, cfg, runPending, runPending) if err := orchPending.TestHookWaitForWorkloadConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "workload convergence not satisfied") { t.Fatalf("expected workload convergence timeout branch, got %v", err) } cancelCtx, cancel := context.WithCancel(context.Background()) cancel() if err := orchPending.TestHookWaitForWorkloadConvergence(cancelCtx); !errors.Is(err, context.Canceled) { t.Fatalf("expected canceled convergence wait, got %v", err) } cases := []cluster.TestHookStuckVaultInitInput{ {Phase: "Running", Inject: true, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60}, {Phase: "Pending", Inject: false, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60}, {Phase: "Pending", Inject: true, InitContainerName: "other-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60}, {Phase: "Pending", Inject: true, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 0, GraceSeconds: 60}, {Phase: "Pending", Inject: true, InitContainerName: "vault-agent-init", Running: false, StartedAtOffsetSec: 600, GraceSeconds: 60}, } for _, in := range cases { if got := cluster.TestHookStuckVaultInitReasonRaw(in); got != "" { t.Fatalf("expected no stuck reason for %+v, got %q", in, got) } } if got := cluster.TestHookStuckVaultInitReasonRaw(cluster.TestHookStuckVaultInitInput{ Phase: "Pending", Inject: true, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60, }); got != "VaultInitStuck" { t.Fatalf("expected VaultInitStuck branch, got %q", got) } }) t.Run("storage-access-and-reachability-branches", func(t *testing.T) { t.Run("storage-ready-invalid-entry-and-query-error", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.StorageCriticalPVCs = []string{"bad-entry"} run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io") { return "a:True:True\nb:True:True\n", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, run, run) if _, _, err := orch.TestHookStorageReady(context.Background()); err == nil { t.Fatalf("expected invalid storage_critical_pvcs entry error") } cfg.Startup.StorageCriticalPVCs = []string{"monitoring/grafana"} runPVCError := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io"): return "a:True:True\nb:True:True\n", nil case name == "kubectl" && strings.Contains(command, "-n monitoring get pvc grafana -o jsonpath={.status.phase}"): return "", errors.New("query pvc failed") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPVCError, _ := newHookOrchestrator(t, cfg, runPVCError, runPVCError) if _, _, err := orchPVCError.TestHookStorageReady(context.Background()); err == nil { t.Fatalf("expected pvc query error branch") } }) t.Run("wait-for-node-ssh-auth-and-inventory-timeouts", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.RequireNodeSSHAuth = true cfg.Startup.NodeSSHAuthWaitSeconds = 1 cfg.Startup.NodeSSHAuthPollSeconds = 1 cfg.Startup.RequireNodeInventoryReach = true cfg.Startup.NodeInventoryReachWaitSeconds = 1 cfg.Startup.NodeInventoryReachPollSeconds = 1 runUnexpected := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "ssh" && strings.Contains(command, "__ANANKE_") { return "unexpected", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, runUnexpected, runUnexpected) if err := orch.TestHookWaitForNodeSSHAuth(context.Background(), []string{"titan-db"}); err == nil || !strings.Contains(err.Error(), "did not pass") { t.Fatalf("expected ssh-auth timeout on unexpected output, got %v", err) } if err := orch.TestHookWaitForNodeInventoryReachability(context.Background()); err == nil || !strings.Contains(err.Error(), "did not pass") { t.Fatalf("expected inventory reachability timeout on unexpected output, got %v", err) } }) }) t.Run("flux-lifecycle-and-sensitive-run-branches", func(t *testing.T) { t.Run("sensitive-run-error-shapes", func(t *testing.T) { cfg := lifecycleConfig(t) orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0)) if _, err := orch.TestHookRunSensitive(context.Background(), 3*time.Second, "sh", "-lc", "exit 1"); err == nil { t.Fatalf("expected runSensitive failure without output") } out, err := orch.TestHookRunSensitive(context.Background(), 3*time.Second, "sh", "-lc", "echo boom; exit 1") if err == nil || strings.TrimSpace(out) != "boom" { t.Fatalf("expected runSensitive failure with captured output, out=%q err=%v", out, err) } }) t.Run("flux-health-helper-branches", func(t *testing.T) { cfg := lifecycleConfig(t) run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get jobs -A -o json"): return `{"items":[ {"metadata":{"namespace":"flux-system","name":"job-a","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Complete","status":"False"}]}}, {"metadata":{"namespace":"flux-system","name":"job-b","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}} ]}`, nil case name == "kubectl" && strings.Contains(command, "-n flux-system delete job job-b"): return "", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) healed, err := orch.TestHookHealImmutableFluxJobs(context.Background()) if err != nil || !healed { t.Fatalf("expected immutable-job heal branch, healed=%v err=%v", healed, err) } }) t.Run("lifecycle-etcd-restore-dryrun-and-shutdown-mode", func(t *testing.T) { cfg := lifecycleConfig(t) dry := cluster.New(cfg, &execx.Runner{DryRun: true}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0)) if err := dry.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{}); err != nil { t.Fatalf("expected dry-run etcd restore path, got %v", err) } orch, _ := newHookOrchestrator(t, cfg, nil, nil) err := orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "drill", Mode: "poweroff"}) if err == nil || !strings.Contains(err.Error(), "has been removed") { t.Fatalf("expected removed poweroff mode error, got %v", err) } }) t.Run("scale-down-list-error-and-restore-no-snapshot", func(t *testing.T) { cfg := lifecycleConfig(t) runListErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath=") { return "", errors.New("list deployments failed") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchListErr, _ := newHookOrchestrator(t, cfg, runListErr, runListErr) if err := orchListErr.TestHookScaleDownApps(context.Background()); err == nil || !strings.Contains(err.Error(), "collect deployments") { t.Fatalf("expected scaleDownApps list error, got %v", err) } orchNoSnapshot, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil) if err := orchNoSnapshot.TestHookRestoreScaledApps(context.Background()); err != nil { t.Fatalf("expected restore with missing snapshot to succeed, got %v", err) } }) }) }