package orchestrator import ( "context" "errors" "io" "log" "os" "path/filepath" "strings" "testing" "time" "scm.bstein.dev/bstein/ananke/internal/cluster" "scm.bstein.dev/bstein/ananke/internal/execx" "scm.bstein.dev/bstein/ananke/internal/state" ) // TestHookGapMatrixPart6CoverageClosure runs one orchestration or CLI step. // Signature: TestHookGapMatrixPart6CoverageClosure(t *testing.T). // Why: targets the remaining low branch paths after part5 so per-file coverage // can move toward the strict 95% quality gate. func TestHookGapMatrixPart6CoverageClosure(t *testing.T) { t.Run("critical-vault-and-poststart-branches", func(t *testing.T) { t.Run("wait-vault-ready-dryrun-and-cancel", func(t *testing.T) { cfg := lifecycleConfig(t) dry := cluster.New(cfg, &execx.Runner{DryRun: true}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0)) if err := dry.TestHookWaitVaultReady(context.Background(), "vault", "statefulset", "vault"); err != nil { t.Fatalf("expected dry-run waitVaultReady skip, got %v", err) } run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "-n vault get statefulset vault -o jsonpath={.status.readyReplicas}"): return "0", nil case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"): return "Running", nil case name == "kubectl" && strings.Contains(command, "vault status -format=json"): return `{"sealed":true}`, nil case name == "kubectl" && strings.Contains(command, "get secret vault-init"): return "", errors.New("missing secret") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) ctx, cancel := context.WithCancel(context.Background()) cancel() if err := orch.TestHookWaitVaultReady(ctx, "vault", "statefulset", "vault"); !errors.Is(err, context.Canceled) { t.Fatalf("expected canceled waitVaultReady, got %v", err) } }) t.Run("ensure-vault-unsealed-and-vault-sealed-errors", func(t *testing.T) { cfg := lifecycleConfig(t) runUnsealErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"): return "Running", nil case name == "kubectl" && strings.Contains(command, "vault status -format=json"): return `{"sealed":true}`, nil case name == "kubectl" && strings.Contains(command, "get secret vault-init"): return "c2VjcmV0Cg==", nil case name == "kubectl" && strings.Contains(command, "vault operator unseal"): return "", errors.New("unseal failed") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchUnsealErr, _ := newHookOrchestrator(t, cfg, runUnsealErr, runUnsealErr) if err := orchUnsealErr.TestHookEnsureVaultUnsealed(context.Background()); err == nil || !strings.Contains(err.Error(), "unseal attempt") { t.Fatalf("expected unseal failure branch, got %v", err) } runStatusErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "vault status -format=json") { return "", errors.New("status failed") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchStatusErr, _ := newHookOrchestrator(t, cfg, runStatusErr, runStatusErr) if _, err := orchStatusErr.TestHookVaultSealed(context.Background()); err == nil { t.Fatalf("expected vaultSealed command error") } runStatusParseErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "vault status -format=json") { return "not-json", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchStatusParseErr, _ := newHookOrchestrator(t, cfg, runStatusParseErr, runStatusParseErr) if _, err := orchStatusParseErr.TestHookVaultSealed(context.Background()); err == nil { t.Fatalf("expected vaultSealed parse error") } }) }) t.Run("scaling-and-storage-branches", func(t *testing.T) { t.Run("scale-down-and-restore-branch-matrix", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.ExcludedNamespaces = []string{"flux-system"} run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="): return "monitoring\tgrafana\t1\n", nil case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="): return "", nil case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=0"): return "", errors.New("scale failed") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.TestHookScaleDownApps(context.Background()); err == nil || !strings.Contains(err.Error(), "scaling had") { t.Fatalf("expected scaleDownApps failure aggregation, got %v", err) } snapshotPath := filepath.Join(cfg.State.Dir, "scaled-workloads.json") if err := os.WriteFile(snapshotPath, []byte("{bad-json"), 0o644); err != nil { t.Fatalf("seed decode error snapshot: %v", err) } if err := orch.TestHookRestoreScaledApps(context.Background()); err == nil || !strings.Contains(err.Error(), "decode scaled workload snapshot") { t.Fatalf("expected restore decode error, got %v", err) } }) t.Run("storage-ready-branch-matrix", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.StorageMinReadyNodes = 2 cfg.Startup.StorageCriticalPVCs = []string{"monitoring/grafana"} runInsufficient := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io") { return "a:True:False\n", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchInsufficient, _ := newHookOrchestrator(t, cfg, runInsufficient, runInsufficient) if ok, reason, err := orchInsufficient.TestHookStorageReady(context.Background()); err != nil || ok || !strings.Contains(reason, "longhorn ready+sched nodes") { t.Fatalf("expected insufficient longhorn readiness, ok=%v reason=%q err=%v", ok, reason, err) } runPVCNotFound := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io"): return "a:True:True\nb:True:True\n", nil case name == "kubectl" && strings.Contains(command, "-n monitoring get pvc grafana -o jsonpath={.status.phase}"): return "", errors.New("Error from server (NotFound): pvc not found") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orchPVCNotFound, _ := newHookOrchestrator(t, cfg, runPVCNotFound, runPVCNotFound) if ok, reason, err := orchPVCNotFound.TestHookStorageReady(context.Background()); err != nil || ok || !strings.Contains(reason, "not found") { t.Fatalf("expected pvc-not-found path, ok=%v reason=%q err=%v", ok, reason, err) } }) }) t.Run("reachability-drain-and-flux-branches", func(t *testing.T) { t.Run("node-inventory-reachability-auth-denied", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.RequireNodeInventoryReach = true cfg.Startup.NodeInventoryReachWaitSeconds = 1 cfg.Startup.NodeInventoryReachPollSeconds = 1 run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "ssh" && strings.Contains(command, "__ANANKE_NODE_REACHABLE__") { return "", errors.New("Permission denied (publickey)") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, run, run) err := orch.TestHookWaitForNodeInventoryReachability(context.Background()) if err == nil || !strings.Contains(err.Error(), "auth denied") { t.Fatalf("expected auth denied branch, got %v", err) } }) t.Run("drain-diagnostics-nonempty-error-and-no-snapshot", func(t *testing.T) { run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "spec.nodeName=titan-23"): return "monitoring grafana-0 Running ReplicaSet\n", errors.New("forbidden") case name == "ssh" && strings.Contains(command, "sudo -n /usr/local/bin/k3s etcd-snapshot ls"): return "Name Size Created Location\n", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run) got := orch.TestHookDrainNodeDiagnostics(context.Background(), "titan-23") if !strings.Contains(got, "diagnostics unavailable") || !strings.Contains(got, "grafana-0") { t.Fatalf("expected diagnostics error with output details, got %q", got) } if _, err := orch.TestHookLatestEtcdSnapshotPath(context.Background(), "titan-db"); err == nil || !strings.Contains(err.Error(), "no etcd snapshots found") { t.Fatalf("expected no-snapshot branch, got %v", err) } }) t.Run("flux-health-json-and-ready-condition-branches", func(t *testing.T) { cfg := lifecycleConfig(t) runDecodeErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") { return "{bad-json", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchDecodeErr, _ := newHookOrchestrator(t, cfg, runDecodeErr, runDecodeErr) if _, _, err := orchDecodeErr.TestHookFluxHealthReady(context.Background()); err == nil { t.Fatalf("expected flux health decode error") } runMissingReady := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") { return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Reconciling","status":"True","reason":"Progressing"}]}}]}`, nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchMissingReady, _ := newHookOrchestrator(t, cfg, runMissingReady, runMissingReady) ready, detail, err := orchMissingReady.TestHookFluxHealthReady(context.Background()) if err != nil || ready || !strings.Contains(detail, "ready condition missing") { t.Fatalf("expected missing-ready-condition branch, ready=%v detail=%q err=%v", ready, detail, err) } }) }) t.Run("access-coordination-lifecycle-branches", func(t *testing.T) { t.Run("reconcile-and-ssh-auth-gates", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.RequireNodeSSHAuth = true cfg.Startup.NodeSSHAuthWaitSeconds = 1 cfg.Startup.NodeSSHAuthPollSeconds = 1 run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "ssh" && strings.Contains(command, "/usr/bin/systemctl --version") && strings.Contains(command, "titan-db"): return "", errors.New("permission denied") case name == "ssh" && strings.Contains(command, "echo __ANANKE_SSH_AUTH_OK__"): return "", errors.New("Permission denied (publickey)") default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.TestHookReconcileNodeAccess(context.Background(), []string{"titan-db", "titan-23"}); err == nil || !strings.Contains(err.Error(), "access validation had") { t.Fatalf("expected reconcileNodeAccess error aggregation, got %v", err) } if err := orch.TestHookWaitForNodeSSHAuth(context.Background(), []string{"titan-db"}); err == nil || !strings.Contains(err.Error(), "ssh auth gate failed") { t.Fatalf("expected waitForNodeSSHAuth auth-denied branch, got %v", err) } }) t.Run("verify-snapshot-and-shutdown-mode-errors", func(t *testing.T) { cfg := lifecycleConfig(t) run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "ssh" && strings.Contains(command, "stat -c %s"): return "512", nil case name == "ssh" && strings.Contains(command, "sudo -n /usr/local/bin/k3s etcd-snapshot ls"): return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil case name == "ssh" && strings.Contains(command, "sha256sum"): return "short-hash", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.TestHookVerifyEtcdSnapshot(context.Background(), "titan-db", "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown"); err == nil || !strings.Contains(err.Error(), "snapshot too small") { t.Fatalf("expected small snapshot rejection, got %v", err) } if _, err := cluster.TestHookNormalizeShutdownMode("poweroff"); err == nil { t.Fatalf("expected removed poweroff shutdown mode error") } }) t.Run("etcd-restore-not-applicable-and-shutdown-mode", func(t *testing.T) { cfg := lifecycleConfig(t) run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "ssh" && strings.Contains(command, "systemctl cat k3s") { return "ExecStart=/usr/local/bin/k3s server --datastore-endpoint=postgres://db:5432/k3s", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, run, run) err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"}) if err == nil || !errors.Is(err, cluster.ErrEtcdRestoreNotApplicable) { t.Fatalf("expected etcd restore not-applicable error, got %v", err) } err = orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "drill", Mode: "unsupported"}) if err == nil || !strings.Contains(err.Error(), "unsupported shutdown mode") { t.Fatalf("expected shutdown mode validation error, got %v", err) } }) }) }