241 lines
12 KiB
Go
241 lines
12 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"log"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
|
)
|
|
|
|
// TestHookGapMatrixPart7CoverageClosure runs one orchestration or CLI step.
|
|
// Signature: TestHookGapMatrixPart7CoverageClosure(t *testing.T).
|
|
// Why: closes additional low-coverage branches in convergence, storage, access,
|
|
// flux, lifecycle, and sensitive command wrappers.
|
|
func TestHookGapMatrixPart7CoverageClosure(t *testing.T) {
|
|
t.Run("workload-convergence-branch-matrix", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.WorkloadConvergenceWaitSeconds = 1
|
|
cfg.Startup.WorkloadConvergencePollSeconds = 1
|
|
cfg.Startup.IgnoreWorkloadNamespaces = []string{"ignored-ns"}
|
|
cfg.Startup.IgnoreUnavailableNodes = []string{"titan-22"}
|
|
cfg.Startup.IgnoreWorkloads = []string{"monitoring/deployment/skip-me"}
|
|
cfg.Startup.IgnoreFluxKustomizations = []string{"flux-system/ignored"}
|
|
|
|
runReady := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"):
|
|
return `{"items":[
|
|
{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":1}},
|
|
{"kind":"DaemonSet","metadata":{"namespace":"monitoring","name":"node-exporter"},"spec":{"template":{"spec":{"nodeName":"titan-22"}}},"status":{"desiredNumberScheduled":2,"numberReady":1}},
|
|
{"kind":"Deployment","metadata":{"namespace":"ignored-ns","name":"ignore-ns"},"spec":{"replicas":1},"status":{"readyReplicas":0}},
|
|
{"kind":"Deployment","metadata":{"namespace":"flux-system","name":"ignored"},"spec":{"replicas":1},"status":{"readyReplicas":1}},
|
|
{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"skip-me"},"spec":{"replicas":1},"status":{"readyReplicas":0}}
|
|
]}`, nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchReady, _ := newHookOrchestrator(t, cfg, runReady, runReady)
|
|
ready, detail, err := orchReady.TestHookWorkloadConvergenceReady(context.Background())
|
|
if err != nil || !ready || !strings.Contains(detail, "controllers ready=") {
|
|
t.Fatalf("expected workload convergence ready branch, ready=%v detail=%q err=%v", ready, detail, err)
|
|
}
|
|
|
|
runDecodeErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json") {
|
|
return "{bad-json", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchDecodeErr, _ := newHookOrchestrator(t, cfg, runDecodeErr, runDecodeErr)
|
|
if _, _, err := orchDecodeErr.TestHookWorkloadConvergenceReady(context.Background()); err == nil {
|
|
t.Fatalf("expected workload convergence decode error")
|
|
}
|
|
|
|
runPending := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"):
|
|
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":0}}]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get pods -A -o json"):
|
|
return `{"items":[]}`, nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchPending, _ := newHookOrchestrator(t, cfg, runPending, runPending)
|
|
if err := orchPending.TestHookWaitForWorkloadConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "workload convergence not satisfied") {
|
|
t.Fatalf("expected workload convergence timeout branch, got %v", err)
|
|
}
|
|
|
|
cancelCtx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
if err := orchPending.TestHookWaitForWorkloadConvergence(cancelCtx); !errors.Is(err, context.Canceled) {
|
|
t.Fatalf("expected canceled convergence wait, got %v", err)
|
|
}
|
|
|
|
cases := []cluster.TestHookStuckVaultInitInput{
|
|
{Phase: "Running", Inject: true, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60},
|
|
{Phase: "Pending", Inject: false, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60},
|
|
{Phase: "Pending", Inject: true, InitContainerName: "other-init", Running: true, StartedAtOffsetSec: 600, GraceSeconds: 60},
|
|
{Phase: "Pending", Inject: true, InitContainerName: "vault-agent-init", Running: true, StartedAtOffsetSec: 0, GraceSeconds: 60},
|
|
{Phase: "Pending", Inject: true, InitContainerName: "vault-agent-init", Running: false, StartedAtOffsetSec: 600, GraceSeconds: 60},
|
|
}
|
|
for _, in := range cases {
|
|
if got := cluster.TestHookStuckVaultInitReasonRaw(in); got != "" {
|
|
t.Fatalf("expected no stuck reason for %+v, got %q", in, got)
|
|
}
|
|
}
|
|
if got := cluster.TestHookStuckVaultInitReasonRaw(cluster.TestHookStuckVaultInitInput{
|
|
Phase: "Pending",
|
|
Inject: true,
|
|
InitContainerName: "vault-agent-init",
|
|
Running: true,
|
|
StartedAtOffsetSec: 600,
|
|
GraceSeconds: 60,
|
|
}); got != "VaultInitStuck" {
|
|
t.Fatalf("expected VaultInitStuck branch, got %q", got)
|
|
}
|
|
})
|
|
|
|
t.Run("storage-access-and-reachability-branches", func(t *testing.T) {
|
|
t.Run("storage-ready-invalid-entry-and-query-error", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.StorageCriticalPVCs = []string{"bad-entry"}
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io") {
|
|
return "a:True:True\nb:True:True\n", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if _, _, err := orch.TestHookStorageReady(context.Background()); err == nil {
|
|
t.Fatalf("expected invalid storage_critical_pvcs entry error")
|
|
}
|
|
|
|
cfg.Startup.StorageCriticalPVCs = []string{"monitoring/grafana"}
|
|
runPVCError := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get nodes.longhorn.io"):
|
|
return "a:True:True\nb:True:True\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "-n monitoring get pvc grafana -o jsonpath={.status.phase}"):
|
|
return "", errors.New("query pvc failed")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchPVCError, _ := newHookOrchestrator(t, cfg, runPVCError, runPVCError)
|
|
if _, _, err := orchPVCError.TestHookStorageReady(context.Background()); err == nil {
|
|
t.Fatalf("expected pvc query error branch")
|
|
}
|
|
})
|
|
|
|
t.Run("wait-for-node-ssh-auth-and-inventory-timeouts", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireNodeSSHAuth = true
|
|
cfg.Startup.NodeSSHAuthWaitSeconds = 1
|
|
cfg.Startup.NodeSSHAuthPollSeconds = 1
|
|
cfg.Startup.RequireNodeInventoryReach = true
|
|
cfg.Startup.NodeInventoryReachWaitSeconds = 1
|
|
cfg.Startup.NodeInventoryReachPollSeconds = 1
|
|
|
|
runUnexpected := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "ssh" && strings.Contains(command, "__ANANKE_") {
|
|
return "unexpected", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, runUnexpected, runUnexpected)
|
|
if err := orch.TestHookWaitForNodeSSHAuth(context.Background(), []string{"titan-db"}); err == nil || !strings.Contains(err.Error(), "did not pass") {
|
|
t.Fatalf("expected ssh-auth timeout on unexpected output, got %v", err)
|
|
}
|
|
if err := orch.TestHookWaitForNodeInventoryReachability(context.Background()); err == nil || !strings.Contains(err.Error(), "did not pass") {
|
|
t.Fatalf("expected inventory reachability timeout on unexpected output, got %v", err)
|
|
}
|
|
})
|
|
})
|
|
|
|
t.Run("flux-lifecycle-and-sensitive-run-branches", func(t *testing.T) {
|
|
t.Run("sensitive-run-error-shapes", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
if _, err := orch.TestHookRunSensitive(context.Background(), 3*time.Second, "sh", "-lc", "exit 1"); err == nil {
|
|
t.Fatalf("expected runSensitive failure without output")
|
|
}
|
|
out, err := orch.TestHookRunSensitive(context.Background(), 3*time.Second, "sh", "-lc", "echo boom; exit 1")
|
|
if err == nil || strings.TrimSpace(out) != "boom" {
|
|
t.Fatalf("expected runSensitive failure with captured output, out=%q err=%v", out, err)
|
|
}
|
|
})
|
|
|
|
t.Run("flux-health-helper-branches", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get jobs -A -o json"):
|
|
return `{"items":[
|
|
{"metadata":{"namespace":"flux-system","name":"job-a","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Complete","status":"False"}]}},
|
|
{"metadata":{"namespace":"flux-system","name":"job-b","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}}
|
|
]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "-n flux-system delete job job-b"):
|
|
return "", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
healed, err := orch.TestHookHealImmutableFluxJobs(context.Background())
|
|
if err != nil || !healed {
|
|
t.Fatalf("expected immutable-job heal branch, healed=%v err=%v", healed, err)
|
|
}
|
|
})
|
|
|
|
t.Run("lifecycle-etcd-restore-dryrun-and-shutdown-mode", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
dry := cluster.New(cfg, &execx.Runner{DryRun: true}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
if err := dry.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{}); err != nil {
|
|
t.Fatalf("expected dry-run etcd restore path, got %v", err)
|
|
}
|
|
|
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
|
err := orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "drill", Mode: "poweroff"})
|
|
if err == nil || !strings.Contains(err.Error(), "has been removed") {
|
|
t.Fatalf("expected removed poweroff mode error, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("scale-down-list-error-and-restore-no-snapshot", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
runListErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath=") {
|
|
return "", errors.New("list deployments failed")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchListErr, _ := newHookOrchestrator(t, cfg, runListErr, runListErr)
|
|
if err := orchListErr.TestHookScaleDownApps(context.Background()); err == nil || !strings.Contains(err.Error(), "collect deployments") {
|
|
t.Fatalf("expected scaleDownApps list error, got %v", err)
|
|
}
|
|
|
|
orchNoSnapshot, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
|
|
if err := orchNoSnapshot.TestHookRestoreScaledApps(context.Background()); err != nil {
|
|
t.Fatalf("expected restore with missing snapshot to succeed, got %v", err)
|
|
}
|
|
})
|
|
})
|
|
}
|