175 lines
8.4 KiB
Go
175 lines
8.4 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
|
)
|
|
|
|
// TestHookGapMatrixPart3ConvergenceAndStability runs one orchestration or CLI step.
|
|
// Signature: TestHookGapMatrixPart3ConvergenceAndStability(t *testing.T).
|
|
// Why: raises coverage for startup convergence orchestration and stability gates
|
|
// that determine whether startup is considered truly complete.
|
|
func TestHookGapMatrixPart3ConvergenceAndStability(t *testing.T) {
|
|
t.Run("wait-for-startup-convergence-gate-matrix", func(t *testing.T) {
|
|
cfgIngress := lifecycleConfig(t)
|
|
cfgIngress.Startup.RequireIngressChecklist = true
|
|
cfgIngress.Startup.IngressChecklistWaitSeconds = 1
|
|
cfgIngress.Startup.IngressChecklistPollSeconds = 1
|
|
cfgIngress.Startup.RequireServiceChecklist = false
|
|
cfgIngress.Startup.RequireCriticalServiceEndpoints = false
|
|
cfgIngress.Startup.RequireFluxHealth = false
|
|
cfgIngress.Startup.RequireWorkloadConvergence = false
|
|
cfgIngress.Startup.ServiceChecklistStabilitySec = 0
|
|
runIngress := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get ingress -A -o json") {
|
|
return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"127.0.0.1:1"}]}}]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchIngress, _ := newHookOrchestrator(t, cfgIngress, runIngress, runIngress)
|
|
if err := orchIngress.TestHookWaitForStartupConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "ingress checklist") {
|
|
t.Fatalf("expected ingress convergence failure, got %v", err)
|
|
}
|
|
|
|
cfgService := lifecycleConfig(t)
|
|
cfgService.Startup.RequireServiceChecklist = true
|
|
cfgService.Startup.ServiceChecklistWaitSeconds = 1
|
|
cfgService.Startup.ServiceChecklistPollSeconds = 1
|
|
cfgService.Startup.RequireIngressChecklist = false
|
|
cfgService.Startup.RequireCriticalServiceEndpoints = false
|
|
cfgService.Startup.RequireFluxHealth = false
|
|
cfgService.Startup.RequireWorkloadConvergence = false
|
|
cfgService.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
|
|
{Name: "api", URL: "http://127.0.0.1:1/health", AcceptedStatuses: []int{200}, TimeoutSeconds: 1},
|
|
}
|
|
orchService, _ := newHookOrchestrator(t, cfgService, nil, nil)
|
|
if err := orchService.TestHookWaitForStartupConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "service checklist") {
|
|
t.Fatalf("expected service convergence failure, got %v", err)
|
|
}
|
|
|
|
cfgFlux := lifecycleConfig(t)
|
|
cfgFlux.Startup.RequireIngressChecklist = false
|
|
cfgFlux.Startup.RequireServiceChecklist = false
|
|
cfgFlux.Startup.RequireCriticalServiceEndpoints = false
|
|
cfgFlux.Startup.RequireFluxHealth = true
|
|
cfgFlux.Startup.FluxHealthWaitSeconds = 1
|
|
cfgFlux.Startup.FluxHealthPollSeconds = 1
|
|
cfgFlux.Startup.RequireWorkloadConvergence = false
|
|
runFlux := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
|
|
return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Ready","status":"False","message":"syncing"}]}}]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchFlux, _ := newHookOrchestrator(t, cfgFlux, runFlux, runFlux)
|
|
if err := orchFlux.TestHookWaitForStartupConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "flux convergence") {
|
|
t.Fatalf("expected flux convergence failure, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("startup-stability-success-and-pod-check-error", func(t *testing.T) {
|
|
cfgOK := lifecycleConfig(t)
|
|
cfgOK.Startup.RequireFluxHealth = false
|
|
cfgOK.Startup.RequireWorkloadConvergence = false
|
|
cfgOK.Startup.RequireServiceChecklist = false
|
|
cfgOK.Startup.RequireIngressChecklist = false
|
|
runOK := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
|
|
return `{"items":[]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchOK, _ := newHookOrchestrator(t, cfgOK, runOK, runOK)
|
|
if err := orchOK.TestHookStartupStabilityHealthy(context.Background()); err != nil {
|
|
t.Fatalf("expected startup stability success, got %v", err)
|
|
}
|
|
|
|
cfgErr := cfgOK
|
|
runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
|
|
return "", errors.New("pod list failed")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchErr, _ := newHookOrchestrator(t, cfgErr, runErr, runErr)
|
|
if err := orchErr.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "pod failure check error") {
|
|
t.Fatalf("expected pod-check error branch, got %v", err)
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestHookGapMatrixPart3LifecycleRestoreShutdown runs one orchestration or CLI step.
|
|
// Signature: TestHookGapMatrixPart3LifecycleRestoreShutdown(t *testing.T).
|
|
// Why: fills lifecycle restore/shutdown success paths that are easy to miss in
|
|
// failure-focused drill tests.
|
|
func TestHookGapMatrixPart3LifecycleRestoreShutdown(t *testing.T) {
|
|
t.Run("etcd-restore-dry-run-and-success", func(t *testing.T) {
|
|
cfgDry := lifecycleConfig(t)
|
|
dry := newDryRunHookOrchestrator(t, cfgDry, nil)
|
|
if err := dry.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"}); err != nil {
|
|
t.Fatalf("expected dry-run etcd restore success, got %v", err)
|
|
}
|
|
|
|
cfg := lifecycleConfig(t)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "ssh" && strings.Contains(command, "systemctl cat k3s"):
|
|
return "ExecStart=/usr/local/bin/k3s server", nil
|
|
case name == "ssh" && strings.Contains(command, "etcd-snapshot ls"):
|
|
return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil
|
|
case name == "ssh" && strings.Contains(command, "stat -c %s"):
|
|
return "2097152", nil
|
|
case name == "ssh" && strings.Contains(command, "sha256sum"):
|
|
return "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", nil
|
|
case name == "ssh" && strings.Contains(command, "server --cluster-reset"):
|
|
return "reset done", nil
|
|
case name == "ssh" && strings.Contains(command, "systemctl stop k3s"):
|
|
return "stopped", nil
|
|
case name == "ssh" && strings.Contains(command, "systemctl start k3s"):
|
|
return "started", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"}); err != nil {
|
|
t.Fatalf("expected etcd restore success path, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("shutdown-full-path-cluster-only", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "ssh" && strings.Contains(command, "k3s etcd-snapshot save"):
|
|
return "saved", nil
|
|
case name == "ssh" && strings.Contains(command, "k3s etcd-snapshot ls"):
|
|
return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil
|
|
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
|
|
return "monitoring\tgrafana\t1\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
|
|
return "", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
err := orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "full", Mode: "cluster-only"})
|
|
if err != nil {
|
|
t.Fatalf("expected full shutdown success, got %v", err)
|
|
}
|
|
})
|
|
}
|