ananke/testing/orchestrator/hooks_gap_matrix_part6_test.go

314 lines
15 KiB
Go

package orchestrator
import (
"context"
"errors"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/execx"
"scm.bstein.dev/bstein/ananke/internal/state"
)
// TestHookGapMatrixPart6CoverageClosure runs one orchestration or CLI step.
// Signature: TestHookGapMatrixPart6CoverageClosure(t *testing.T).
// Why: targets the remaining low branch paths after part5 so per-file coverage
// can move toward the strict 95% quality gate.
func TestHookGapMatrixPart6CoverageClosure(t *testing.T) {
t.Run("critical-vault-and-poststart-branches", func(t *testing.T) {
t.Run("wait-vault-ready-dryrun-and-cancel", func(t *testing.T) {
cfg := lifecycleConfig(t)
dry := cluster.New(cfg, &execx.Runner{DryRun: true}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
if err := dry.TestHookWaitVaultReady(context.Background(), "vault", "statefulset", "vault"); err != nil {
t.Fatalf("expected dry-run waitVaultReady skip, got %v", err)
}
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "-n vault get statefulset vault -o jsonpath={.status.readyReplicas}"):
return "0", nil
case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
return "Running", nil
case name == "kubectl" && strings.Contains(command, "vault status -format=json"):
return `{"sealed":true}`, nil
case name == "kubectl" && strings.Contains(command, "get secret vault-init"):
return "", errors.New("missing secret")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
ctx, cancel := context.WithCancel(context.Background())
cancel()
if err := orch.TestHookWaitVaultReady(ctx, "vault", "statefulset", "vault"); !errors.Is(err, context.Canceled) {
t.Fatalf("expected canceled waitVaultReady, got %v", err)
}
})
t.Run("ensure-vault-unsealed-and-vault-sealed-errors", func(t *testing.T) {
cfg := lifecycleConfig(t)
runUnsealErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
return "Running", nil
case name == "kubectl" && strings.Contains(command, "vault status -format=json"):
return `{"sealed":true}`, nil
case name == "kubectl" && strings.Contains(command, "get secret vault-init"):
return "c2VjcmV0Cg==", nil
case name == "kubectl" && strings.Contains(command, "vault operator unseal"):
return "", errors.New("unseal failed")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchUnsealErr, _ := newHookOrchestrator(t, cfg, runUnsealErr, runUnsealErr)
if err := orchUnsealErr.TestHookEnsureVaultUnsealed(context.Background()); err == nil || !strings.Contains(err.Error(), "unseal attempt") {
t.Fatalf("expected unseal failure branch, got %v", err)
}
runStatusErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "vault status -format=json") {
return "", errors.New("status failed")
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchStatusErr, _ := newHookOrchestrator(t, cfg, runStatusErr, runStatusErr)
if _, err := orchStatusErr.TestHookVaultSealed(context.Background()); err == nil {
t.Fatalf("expected vaultSealed command error")
}
runStatusParseErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "vault status -format=json") {
return "not-json", nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchStatusParseErr, _ := newHookOrchestrator(t, cfg, runStatusParseErr, runStatusParseErr)
if _, err := orchStatusParseErr.TestHookVaultSealed(context.Background()); err == nil {
t.Fatalf("expected vaultSealed parse error")
}
})
})
t.Run("scaling-and-storage-branches", func(t *testing.T) {
t.Run("scale-down-and-restore-branch-matrix", func(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.ExcludedNamespaces = []string{"flux-system"}
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
return "monitoring\tgrafana\t1\n", nil
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
return "", nil
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=0"):
return "", errors.New("scale failed")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
if err := orch.TestHookScaleDownApps(context.Background()); err == nil || !strings.Contains(err.Error(), "scaling had") {
t.Fatalf("expected scaleDownApps failure aggregation, got %v", err)
}
snapshotPath := filepath.Join(cfg.State.Dir, "scaled-workloads.json")
if err := os.WriteFile(snapshotPath, []byte("{bad-json"), 0o644); err != nil {
t.Fatalf("seed decode error snapshot: %v", err)
}
if err := orch.TestHookRestoreScaledApps(context.Background()); err == nil || !strings.Contains(err.Error(), "decode scaled workload snapshot") {
t.Fatalf("expected restore decode error, got %v", err)
}
})
t.Run("storage-ready-branch-matrix", func(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.Startup.StorageMinReadyNodes = 2
cfg.Startup.StorageCriticalPVCs = []string{"monitoring/grafana"}
runInsufficient := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io") {
return "a:True:False\n", nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchInsufficient, _ := newHookOrchestrator(t, cfg, runInsufficient, runInsufficient)
if ok, reason, err := orchInsufficient.TestHookStorageReady(context.Background()); err != nil || ok || !strings.Contains(reason, "longhorn ready+sched nodes") {
t.Fatalf("expected insufficient longhorn readiness, ok=%v reason=%q err=%v", ok, reason, err)
}
runPVCNotFound := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io"):
return "a:True:True\nb:True:True\n", nil
case name == "kubectl" && strings.Contains(command, "-n monitoring get pvc grafana -o jsonpath={.status.phase}"):
return "", errors.New("Error from server (NotFound): pvc not found")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchPVCNotFound, _ := newHookOrchestrator(t, cfg, runPVCNotFound, runPVCNotFound)
if ok, reason, err := orchPVCNotFound.TestHookStorageReady(context.Background()); err != nil || ok || !strings.Contains(reason, "not found") {
t.Fatalf("expected pvc-not-found path, ok=%v reason=%q err=%v", ok, reason, err)
}
})
})
t.Run("reachability-drain-and-flux-branches", func(t *testing.T) {
t.Run("node-inventory-reachability-auth-denied", func(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.Startup.RequireNodeInventoryReach = true
cfg.Startup.NodeInventoryReachWaitSeconds = 1
cfg.Startup.NodeInventoryReachPollSeconds = 1
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "ssh" && strings.Contains(command, "__ANANKE_NODE_REACHABLE__") {
return "", errors.New("Permission denied (publickey)")
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
err := orch.TestHookWaitForNodeInventoryReachability(context.Background())
if err == nil || !strings.Contains(err.Error(), "auth denied") {
t.Fatalf("expected auth denied branch, got %v", err)
}
})
t.Run("drain-diagnostics-nonempty-error-and-no-snapshot", func(t *testing.T) {
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "spec.nodeName=titan-23"):
return "monitoring grafana-0 Running ReplicaSet\n", errors.New("forbidden")
case name == "ssh" && strings.Contains(command, "sudo -n /usr/local/bin/k3s etcd-snapshot ls"):
return "Name Size Created Location\n", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run)
got := orch.TestHookDrainNodeDiagnostics(context.Background(), "titan-23")
if !strings.Contains(got, "diagnostics unavailable") || !strings.Contains(got, "grafana-0") {
t.Fatalf("expected diagnostics error with output details, got %q", got)
}
if _, err := orch.TestHookLatestEtcdSnapshotPath(context.Background(), "titan-db"); err == nil || !strings.Contains(err.Error(), "no etcd snapshots found") {
t.Fatalf("expected no-snapshot branch, got %v", err)
}
})
t.Run("flux-health-json-and-ready-condition-branches", func(t *testing.T) {
cfg := lifecycleConfig(t)
runDecodeErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
return "{bad-json", nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchDecodeErr, _ := newHookOrchestrator(t, cfg, runDecodeErr, runDecodeErr)
if _, _, err := orchDecodeErr.TestHookFluxHealthReady(context.Background()); err == nil {
t.Fatalf("expected flux health decode error")
}
runMissingReady := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Reconciling","status":"True","reason":"Progressing"}]}}]}`, nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchMissingReady, _ := newHookOrchestrator(t, cfg, runMissingReady, runMissingReady)
ready, detail, err := orchMissingReady.TestHookFluxHealthReady(context.Background())
if err != nil || ready || !strings.Contains(detail, "ready condition missing") {
t.Fatalf("expected missing-ready-condition branch, ready=%v detail=%q err=%v", ready, detail, err)
}
})
})
t.Run("access-coordination-lifecycle-branches", func(t *testing.T) {
t.Run("reconcile-and-ssh-auth-gates", func(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.Startup.RequireNodeSSHAuth = true
cfg.Startup.NodeSSHAuthWaitSeconds = 1
cfg.Startup.NodeSSHAuthPollSeconds = 1
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "ssh" && strings.Contains(command, "/usr/bin/systemctl --version") && strings.Contains(command, "titan-db"):
return "", errors.New("permission denied")
case name == "ssh" && strings.Contains(command, "echo __ANANKE_SSH_AUTH_OK__"):
return "", errors.New("Permission denied (publickey)")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
if err := orch.TestHookReconcileNodeAccess(context.Background(), []string{"titan-db", "titan-23"}); err == nil || !strings.Contains(err.Error(), "access validation had") {
t.Fatalf("expected reconcileNodeAccess error aggregation, got %v", err)
}
if err := orch.TestHookWaitForNodeSSHAuth(context.Background(), []string{"titan-db"}); err == nil || !strings.Contains(err.Error(), "ssh auth gate failed") {
t.Fatalf("expected waitForNodeSSHAuth auth-denied branch, got %v", err)
}
})
t.Run("verify-snapshot-and-shutdown-mode-errors", func(t *testing.T) {
cfg := lifecycleConfig(t)
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "ssh" && strings.Contains(command, "stat -c %s"):
return "512", nil
case name == "ssh" && strings.Contains(command, "sudo -n /usr/local/bin/k3s etcd-snapshot ls"):
return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil
case name == "ssh" && strings.Contains(command, "sha256sum"):
return "short-hash", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
if err := orch.TestHookVerifyEtcdSnapshot(context.Background(), "titan-db", "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown"); err == nil || !strings.Contains(err.Error(), "snapshot too small") {
t.Fatalf("expected small snapshot rejection, got %v", err)
}
if _, err := cluster.TestHookNormalizeShutdownMode("poweroff"); err == nil {
t.Fatalf("expected removed poweroff shutdown mode error")
}
})
t.Run("etcd-restore-not-applicable-and-shutdown-mode", func(t *testing.T) {
cfg := lifecycleConfig(t)
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "ssh" && strings.Contains(command, "systemctl cat k3s") {
return "ExecStart=/usr/local/bin/k3s server --datastore-endpoint=postgres://db:5432/k3s", nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"})
if err == nil || !errors.Is(err, cluster.ErrEtcdRestoreNotApplicable) {
t.Fatalf("expected etcd restore not-applicable error, got %v", err)
}
err = orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "drill", Mode: "unsupported"})
if err == nil || !strings.Contains(err.Error(), "unsupported shutdown mode") {
t.Fatalf("expected shutdown mode validation error, got %v", err)
}
})
})
}