314 lines
15 KiB
Go
314 lines
15 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
|
)
|
|
|
|
// TestHookGapMatrixPart6CoverageClosure runs one orchestration or CLI step.
|
|
// Signature: TestHookGapMatrixPart6CoverageClosure(t *testing.T).
|
|
// Why: targets the remaining low branch paths after part5 so per-file coverage
|
|
// can move toward the strict 95% quality gate.
|
|
func TestHookGapMatrixPart6CoverageClosure(t *testing.T) {
|
|
t.Run("critical-vault-and-poststart-branches", func(t *testing.T) {
|
|
t.Run("wait-vault-ready-dryrun-and-cancel", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
dry := cluster.New(cfg, &execx.Runner{DryRun: true}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
if err := dry.TestHookWaitVaultReady(context.Background(), "vault", "statefulset", "vault"); err != nil {
|
|
t.Fatalf("expected dry-run waitVaultReady skip, got %v", err)
|
|
}
|
|
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "-n vault get statefulset vault -o jsonpath={.status.readyReplicas}"):
|
|
return "0", nil
|
|
case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
|
|
return "Running", nil
|
|
case name == "kubectl" && strings.Contains(command, "vault status -format=json"):
|
|
return `{"sealed":true}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get secret vault-init"):
|
|
return "", errors.New("missing secret")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
if err := orch.TestHookWaitVaultReady(ctx, "vault", "statefulset", "vault"); !errors.Is(err, context.Canceled) {
|
|
t.Fatalf("expected canceled waitVaultReady, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("ensure-vault-unsealed-and-vault-sealed-errors", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
|
|
runUnsealErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
|
|
return "Running", nil
|
|
case name == "kubectl" && strings.Contains(command, "vault status -format=json"):
|
|
return `{"sealed":true}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get secret vault-init"):
|
|
return "c2VjcmV0Cg==", nil
|
|
case name == "kubectl" && strings.Contains(command, "vault operator unseal"):
|
|
return "", errors.New("unseal failed")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchUnsealErr, _ := newHookOrchestrator(t, cfg, runUnsealErr, runUnsealErr)
|
|
if err := orchUnsealErr.TestHookEnsureVaultUnsealed(context.Background()); err == nil || !strings.Contains(err.Error(), "unseal attempt") {
|
|
t.Fatalf("expected unseal failure branch, got %v", err)
|
|
}
|
|
|
|
runStatusErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "vault status -format=json") {
|
|
return "", errors.New("status failed")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchStatusErr, _ := newHookOrchestrator(t, cfg, runStatusErr, runStatusErr)
|
|
if _, err := orchStatusErr.TestHookVaultSealed(context.Background()); err == nil {
|
|
t.Fatalf("expected vaultSealed command error")
|
|
}
|
|
|
|
runStatusParseErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "vault status -format=json") {
|
|
return "not-json", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchStatusParseErr, _ := newHookOrchestrator(t, cfg, runStatusParseErr, runStatusParseErr)
|
|
if _, err := orchStatusParseErr.TestHookVaultSealed(context.Background()); err == nil {
|
|
t.Fatalf("expected vaultSealed parse error")
|
|
}
|
|
})
|
|
})
|
|
|
|
t.Run("scaling-and-storage-branches", func(t *testing.T) {
|
|
t.Run("scale-down-and-restore-branch-matrix", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.ExcludedNamespaces = []string{"flux-system"}
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
|
|
return "monitoring\tgrafana\t1\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
|
|
return "", nil
|
|
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=0"):
|
|
return "", errors.New("scale failed")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookScaleDownApps(context.Background()); err == nil || !strings.Contains(err.Error(), "scaling had") {
|
|
t.Fatalf("expected scaleDownApps failure aggregation, got %v", err)
|
|
}
|
|
|
|
snapshotPath := filepath.Join(cfg.State.Dir, "scaled-workloads.json")
|
|
if err := os.WriteFile(snapshotPath, []byte("{bad-json"), 0o644); err != nil {
|
|
t.Fatalf("seed decode error snapshot: %v", err)
|
|
}
|
|
if err := orch.TestHookRestoreScaledApps(context.Background()); err == nil || !strings.Contains(err.Error(), "decode scaled workload snapshot") {
|
|
t.Fatalf("expected restore decode error, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("storage-ready-branch-matrix", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.StorageMinReadyNodes = 2
|
|
cfg.Startup.StorageCriticalPVCs = []string{"monitoring/grafana"}
|
|
|
|
runInsufficient := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io") {
|
|
return "a:True:False\n", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchInsufficient, _ := newHookOrchestrator(t, cfg, runInsufficient, runInsufficient)
|
|
if ok, reason, err := orchInsufficient.TestHookStorageReady(context.Background()); err != nil || ok || !strings.Contains(reason, "longhorn ready+sched nodes") {
|
|
t.Fatalf("expected insufficient longhorn readiness, ok=%v reason=%q err=%v", ok, reason, err)
|
|
}
|
|
|
|
runPVCNotFound := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io"):
|
|
return "a:True:True\nb:True:True\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "-n monitoring get pvc grafana -o jsonpath={.status.phase}"):
|
|
return "", errors.New("Error from server (NotFound): pvc not found")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchPVCNotFound, _ := newHookOrchestrator(t, cfg, runPVCNotFound, runPVCNotFound)
|
|
if ok, reason, err := orchPVCNotFound.TestHookStorageReady(context.Background()); err != nil || ok || !strings.Contains(reason, "not found") {
|
|
t.Fatalf("expected pvc-not-found path, ok=%v reason=%q err=%v", ok, reason, err)
|
|
}
|
|
})
|
|
})
|
|
|
|
t.Run("reachability-drain-and-flux-branches", func(t *testing.T) {
|
|
t.Run("node-inventory-reachability-auth-denied", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireNodeInventoryReach = true
|
|
cfg.Startup.NodeInventoryReachWaitSeconds = 1
|
|
cfg.Startup.NodeInventoryReachPollSeconds = 1
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "ssh" && strings.Contains(command, "__ANANKE_NODE_REACHABLE__") {
|
|
return "", errors.New("Permission denied (publickey)")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
err := orch.TestHookWaitForNodeInventoryReachability(context.Background())
|
|
if err == nil || !strings.Contains(err.Error(), "auth denied") {
|
|
t.Fatalf("expected auth denied branch, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("drain-diagnostics-nonempty-error-and-no-snapshot", func(t *testing.T) {
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "spec.nodeName=titan-23"):
|
|
return "monitoring grafana-0 Running ReplicaSet\n", errors.New("forbidden")
|
|
case name == "ssh" && strings.Contains(command, "sudo -n /usr/local/bin/k3s etcd-snapshot ls"):
|
|
return "Name Size Created Location\n", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run)
|
|
got := orch.TestHookDrainNodeDiagnostics(context.Background(), "titan-23")
|
|
if !strings.Contains(got, "diagnostics unavailable") || !strings.Contains(got, "grafana-0") {
|
|
t.Fatalf("expected diagnostics error with output details, got %q", got)
|
|
}
|
|
if _, err := orch.TestHookLatestEtcdSnapshotPath(context.Background(), "titan-db"); err == nil || !strings.Contains(err.Error(), "no etcd snapshots found") {
|
|
t.Fatalf("expected no-snapshot branch, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("flux-health-json-and-ready-condition-branches", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
runDecodeErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
|
|
return "{bad-json", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchDecodeErr, _ := newHookOrchestrator(t, cfg, runDecodeErr, runDecodeErr)
|
|
if _, _, err := orchDecodeErr.TestHookFluxHealthReady(context.Background()); err == nil {
|
|
t.Fatalf("expected flux health decode error")
|
|
}
|
|
|
|
runMissingReady := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
|
|
return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Reconciling","status":"True","reason":"Progressing"}]}}]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchMissingReady, _ := newHookOrchestrator(t, cfg, runMissingReady, runMissingReady)
|
|
ready, detail, err := orchMissingReady.TestHookFluxHealthReady(context.Background())
|
|
if err != nil || ready || !strings.Contains(detail, "ready condition missing") {
|
|
t.Fatalf("expected missing-ready-condition branch, ready=%v detail=%q err=%v", ready, detail, err)
|
|
}
|
|
})
|
|
})
|
|
|
|
t.Run("access-coordination-lifecycle-branches", func(t *testing.T) {
|
|
t.Run("reconcile-and-ssh-auth-gates", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireNodeSSHAuth = true
|
|
cfg.Startup.NodeSSHAuthWaitSeconds = 1
|
|
cfg.Startup.NodeSSHAuthPollSeconds = 1
|
|
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "ssh" && strings.Contains(command, "/usr/bin/systemctl --version") && strings.Contains(command, "titan-db"):
|
|
return "", errors.New("permission denied")
|
|
case name == "ssh" && strings.Contains(command, "echo __ANANKE_SSH_AUTH_OK__"):
|
|
return "", errors.New("Permission denied (publickey)")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookReconcileNodeAccess(context.Background(), []string{"titan-db", "titan-23"}); err == nil || !strings.Contains(err.Error(), "access validation had") {
|
|
t.Fatalf("expected reconcileNodeAccess error aggregation, got %v", err)
|
|
}
|
|
if err := orch.TestHookWaitForNodeSSHAuth(context.Background(), []string{"titan-db"}); err == nil || !strings.Contains(err.Error(), "ssh auth gate failed") {
|
|
t.Fatalf("expected waitForNodeSSHAuth auth-denied branch, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("verify-snapshot-and-shutdown-mode-errors", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "ssh" && strings.Contains(command, "stat -c %s"):
|
|
return "512", nil
|
|
case name == "ssh" && strings.Contains(command, "sudo -n /usr/local/bin/k3s etcd-snapshot ls"):
|
|
return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil
|
|
case name == "ssh" && strings.Contains(command, "sha256sum"):
|
|
return "short-hash", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookVerifyEtcdSnapshot(context.Background(), "titan-db", "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown"); err == nil || !strings.Contains(err.Error(), "snapshot too small") {
|
|
t.Fatalf("expected small snapshot rejection, got %v", err)
|
|
}
|
|
if _, err := cluster.TestHookNormalizeShutdownMode("poweroff"); err == nil {
|
|
t.Fatalf("expected removed poweroff shutdown mode error")
|
|
}
|
|
})
|
|
|
|
t.Run("etcd-restore-not-applicable-and-shutdown-mode", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "ssh" && strings.Contains(command, "systemctl cat k3s") {
|
|
return "ExecStart=/usr/local/bin/k3s server --datastore-endpoint=postgres://db:5432/k3s", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"})
|
|
if err == nil || !errors.Is(err, cluster.ErrEtcdRestoreNotApplicable) {
|
|
t.Fatalf("expected etcd restore not-applicable error, got %v", err)
|
|
}
|
|
|
|
err = orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "drill", Mode: "unsupported"})
|
|
if err == nil || !strings.Contains(err.Error(), "unsupported shutdown mode") {
|
|
t.Fatalf("expected shutdown mode validation error, got %v", err)
|
|
}
|
|
})
|
|
})
|
|
}
|