518 lines
24 KiB
Go
518 lines
24 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
neturl "net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
|
)
|
|
|
|
// TestHookGapMatrixPart5CoverageClosure runs one orchestration or CLI step.
|
|
// Signature: TestHookGapMatrixPart5CoverageClosure(t *testing.T).
|
|
// Why: closes branch gaps that still remained after drill-style tests by driving
|
|
// low-coverage orchestrator internals through the exported top-level hook surface.
|
|
func TestHookGapMatrixPart5CoverageClosure(t *testing.T) {
|
|
t.Run("critical-endpoint-backend-heal-matrix", func(t *testing.T) {
|
|
t.Run("empty-namespace-service-noop", func(t *testing.T) {
|
|
orch, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
|
|
healed, err := orch.TestHookMaybeHealCriticalEndpointBackends(context.Background(), "", "")
|
|
if err != nil || len(healed) != 0 {
|
|
t.Fatalf("expected empty noop heal, healed=%v err=%v", healed, err)
|
|
}
|
|
})
|
|
|
|
t.Run("scale-error-bubbles", func(t *testing.T) {
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, " -n monitoring scale deployment grafana ") {
|
|
return "", errors.New("boom")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run)
|
|
_, err := orch.TestHookMaybeHealCriticalEndpointBackends(context.Background(), "monitoring", "grafana")
|
|
if err == nil || !strings.Contains(err.Error(), "scale monitoring/deployment/grafana to 1") {
|
|
t.Fatalf("expected deployment scale error, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("deployment-notfound-statefulset-healed", func(t *testing.T) {
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, " -n monitoring scale deployment grafana "):
|
|
return "", errors.New("Error from server (NotFound): deployments.apps \"grafana\" not found")
|
|
case name == "kubectl" && strings.Contains(command, " -n monitoring scale statefulset grafana "):
|
|
return "", nil
|
|
case name == "kubectl" && strings.Contains(command, "rollout status statefulset/grafana"):
|
|
return "rolled out", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run)
|
|
healed, err := orch.TestHookMaybeHealCriticalEndpointBackends(context.Background(), "monitoring", "grafana")
|
|
if err != nil {
|
|
t.Fatalf("expected statefulset fallback heal success, got %v", err)
|
|
}
|
|
if len(healed) != 1 || healed[0] != "monitoring/statefulset/grafana" {
|
|
t.Fatalf("expected statefulset heal entry, got %v", healed)
|
|
}
|
|
})
|
|
})
|
|
|
|
t.Run("ingress-backend-heal-and-stability-matrix", func(t *testing.T) {
|
|
t.Run("ingress-host-heal-noop-and-error", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
runNoop := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
|
|
return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"other.bstein.dev"}]}}]}`, nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchNoop, _ := newHookOrchestrator(t, cfg, runNoop, runNoop)
|
|
healed, err := orchNoop.TestHookHealIngressHostBackendReplicas(context.Background(), "metrics.bstein.dev")
|
|
if err != nil || len(healed) != 0 {
|
|
t.Fatalf("expected no-op ingress heal, healed=%v err=%v", healed, err)
|
|
}
|
|
|
|
runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
|
|
return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"metrics.bstein.dev"}]}}]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"):
|
|
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":0}}]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
|
|
return "", errors.New("permission denied")
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr)
|
|
_, err = orchErr.TestHookHealIngressHostBackendReplicas(context.Background(), "metrics.bstein.dev")
|
|
if err == nil || !strings.Contains(err.Error(), "scale monitoring/deployment/grafana to 1") {
|
|
t.Fatalf("expected ingress backend scale error, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("startup-stability-health-failure-branches", func(t *testing.T) {
|
|
t.Run("flux-check-error", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireFluxHealth = true
|
|
cfg.Startup.RequireWorkloadConvergence = false
|
|
cfg.Startup.RequireServiceChecklist = false
|
|
cfg.Startup.RequireIngressChecklist = false
|
|
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
|
|
return "", errors.New("api down")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "flux check error") {
|
|
t.Fatalf("expected flux-check error branch, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("service-checklist-failure", func(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(httpStatusHandler(500, "not ready")))
|
|
defer srv.Close()
|
|
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireFluxHealth = false
|
|
cfg.Startup.RequireWorkloadConvergence = false
|
|
cfg.Startup.RequireIngressChecklist = false
|
|
cfg.Startup.RequireServiceChecklist = true
|
|
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
|
|
{Name: "svc", URL: srv.URL, AcceptedStatuses: []int{200}, TimeoutSeconds: 1},
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
|
if err := orch.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "external services not healthy") {
|
|
t.Fatalf("expected service-check failure, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("ingress-checklist-failure", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireFluxHealth = false
|
|
cfg.Startup.RequireWorkloadConvergence = false
|
|
cfg.Startup.RequireServiceChecklist = false
|
|
cfg.Startup.RequireIngressChecklist = true
|
|
cfg.Startup.IngressChecklistInsecureSkip = true
|
|
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get ingress -A -o json") {
|
|
return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"nonexistent.invalid"}]}}]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "ingress reachability not healthy") {
|
|
t.Fatalf("expected ingress-check failure, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("pod-failure-check-error-and-nonempty", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequireFluxHealth = false
|
|
cfg.Startup.RequireWorkloadConvergence = false
|
|
cfg.Startup.RequireServiceChecklist = false
|
|
cfg.Startup.RequireIngressChecklist = false
|
|
|
|
runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
|
|
return "", errors.New("pods unavailable")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr)
|
|
if err := orchErr.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "pod failure check error") {
|
|
t.Fatalf("expected pod-check error branch, got %v", err)
|
|
}
|
|
|
|
runFail := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
|
|
return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"grafana"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchFail, _ := newHookOrchestrator(t, cfg, runFail, runFail)
|
|
if err := orchFail.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "pods in crash/image-pull failures") {
|
|
t.Fatalf("expected failing-pod branch, got %v", err)
|
|
}
|
|
})
|
|
})
|
|
})
|
|
|
|
t.Run("report-scaling-timesync-and-ssh-matrix", func(t *testing.T) {
|
|
t.Run("report-artifact-and-progress-branches", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
|
|
|
orch.TestHookNoteStartupAutoHeal("ignored without active report")
|
|
orch.TestHookBeginStartupReport("drill")
|
|
orch.TestHookNoteStartupAutoHeal("")
|
|
orch.TestHookNoteStartupAutoHeal("restored critical workload replicas")
|
|
orch.TestHookFinalizeStartupReport(nil)
|
|
|
|
if err := orch.TestHookWriteRunRecordArtifact(state.RunRecord{
|
|
ID: "shutdown-record-1",
|
|
Action: "shutdown",
|
|
Reason: "drill",
|
|
StartedAt: time.Now().UTC().Add(-3 * time.Second),
|
|
EndedAt: time.Now().UTC(),
|
|
}); err != nil {
|
|
t.Fatalf("expected shutdown run artifact success, got %v", err)
|
|
}
|
|
if _, err := os.Stat(orch.TestHookLastShutdownReportPath()); err != nil {
|
|
t.Fatalf("expected last-shutdown report artifact, err=%v", err)
|
|
}
|
|
|
|
stateFile := filepath.Join(t.TempDir(), "state-as-file")
|
|
if err := os.WriteFile(stateFile, []byte("x"), 0o644); err != nil {
|
|
t.Fatalf("seed state file: %v", err)
|
|
}
|
|
badCfg := lifecycleConfig(t)
|
|
badCfg.State.Dir = stateFile
|
|
badCfg.State.ReportsDir = filepath.Join(stateFile, "reports")
|
|
badCfg.State.RunHistoryPath = filepath.Join(stateFile, "runs.json")
|
|
bad := cluster.New(badCfg, &execx.Runner{DryRun: false}, state.New(badCfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
if err := bad.TestHookWriteStartupReportFile(filepath.Join(stateFile, "startup.json"), "running"); err == nil {
|
|
t.Fatalf("expected startup report mkdir/write error")
|
|
}
|
|
bad.TestHookPersistStartupProgress("running")
|
|
bad.TestHookFinalizeRecord("shutdown", "drill", "simulated-failure")
|
|
})
|
|
|
|
t.Run("scaled-snapshot-write-and-restore-errors", func(t *testing.T) {
|
|
listRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
|
|
return "monitoring\tgrafana\t1\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
|
|
return "", nil
|
|
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
|
|
return "", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
|
|
stateAsFile := filepath.Join(t.TempDir(), "state-as-file")
|
|
if err := os.WriteFile(stateAsFile, []byte("x"), 0o644); err != nil {
|
|
t.Fatalf("seed state file: %v", err)
|
|
}
|
|
cfgWriteErr := lifecycleConfig(t)
|
|
cfgWriteErr.State.Dir = stateAsFile
|
|
orchWriteErr := cluster.New(cfgWriteErr, &execx.Runner{DryRun: false}, state.New(cfgWriteErr.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
orchWriteErr.SetCommandOverrides(listRun, listRun)
|
|
entries, err := orchWriteErr.TestHookListScalableWorkloads(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("list entries for write-error case: %v", err)
|
|
}
|
|
if err := orchWriteErr.TestHookWriteScaledWorkloadSnapshot(entries); err == nil {
|
|
t.Fatalf("expected write scaled snapshot error")
|
|
}
|
|
|
|
cfgRemoveErr := lifecycleConfig(t)
|
|
snapshotPath := filepath.Join(cfgRemoveErr.State.Dir, "scaled-workloads.json")
|
|
removeErrRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
|
|
return "monitoring\tgrafana\t1\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
|
|
return "", nil
|
|
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
|
|
if err := os.Remove(snapshotPath); err != nil && !os.IsNotExist(err) {
|
|
return "", err
|
|
}
|
|
if err := os.MkdirAll(snapshotPath, 0o755); err != nil {
|
|
return "", err
|
|
}
|
|
if err := os.WriteFile(filepath.Join(snapshotPath, "keep"), []byte("x"), 0o644); err != nil {
|
|
return "", err
|
|
}
|
|
return "", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orchRemoveErr, _ := newHookOrchestrator(t, cfgRemoveErr, removeErrRun, removeErrRun)
|
|
entries, err = orchRemoveErr.TestHookListScalableWorkloads(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("list entries for remove-error case: %v", err)
|
|
}
|
|
if err := orchRemoveErr.TestHookWriteScaledWorkloadSnapshot(entries); err != nil {
|
|
t.Fatalf("seed snapshot for remove-error case: %v", err)
|
|
}
|
|
if err := orchRemoveErr.TestHookRestoreScaledApps(context.Background()); err == nil || !strings.Contains(err.Error(), "remove scaled workload snapshot") {
|
|
t.Fatalf("expected remove snapshot error, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("timesync-quorum-and-strict", func(t *testing.T) {
|
|
build := func(mode string, quorum int, local string, remote map[string]string) *cluster.Orchestrator {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.ControlPlanes = []string{"titan-db", "titan-24"}
|
|
cfg.SSHManagedNodes = []string{"titan-db", "titan-24"}
|
|
cfg.SSHNodeHosts["titan-24"] = "titan-24"
|
|
cfg.Startup.TimeSyncWaitSeconds = 1
|
|
cfg.Startup.TimeSyncPollSeconds = 1
|
|
cfg.Startup.TimeSyncMode = mode
|
|
cfg.Startup.TimeSyncQuorum = quorum
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "sh" && strings.Contains(command, "timedatectl show -p NTPSynchronized"):
|
|
return local, nil
|
|
case name == "ssh" && strings.Contains(command, "timedatectl show -p NTPSynchronized"):
|
|
for node, out := range remote {
|
|
if strings.Contains(command, node) {
|
|
return out, nil
|
|
}
|
|
}
|
|
return "no", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
return orch
|
|
}
|
|
|
|
strict := build("strict", 0, "no", map[string]string{"titan-db": "no", "titan-24": "no"})
|
|
if err := strict.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-24"}); err == nil || !strings.Contains(err.Error(), "time sync not ready") {
|
|
t.Fatalf("expected strict-mode timeout, got %v", err)
|
|
}
|
|
|
|
quorumOK := build("quorum", 1, "yes", map[string]string{"titan-db": "yes", "titan-24": "no"})
|
|
if err := quorumOK.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-24"}); err != nil {
|
|
t.Fatalf("expected quorum-mode success, got %v", err)
|
|
}
|
|
|
|
quorumFail := build("quorum", 2, "yes", map[string]string{"titan-db": "yes", "titan-24": "no"})
|
|
if err := quorumFail.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-24"}); err == nil || !strings.Contains(err.Error(), "time sync quorum not ready") {
|
|
t.Fatalf("expected quorum-mode timeout, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("poststart-and-ssh-helper-branches", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.PostStartProbes = []string{"https://metrics.bstein.dev/health"}
|
|
cfg.Startup.PostStartProbeWaitSeconds = 2
|
|
cfg.Startup.PostStartProbePollSeconds = 1
|
|
cfg.SSHManagedNodes = []string{"titan-db"}
|
|
cfg.SSHConfigFile = " /tmp/ananke-ssh-config "
|
|
cfg.SSHIdentityFile = " /tmp/ananke-ssh-id "
|
|
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
if name == "curl" {
|
|
return "500", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if !orch.TestHookSSHManaged("titan-db") || orch.TestHookSSHManaged("titan-23") {
|
|
t.Fatalf("unexpected sshManaged evaluation")
|
|
}
|
|
if got := orch.TestHookResolveSSHConfigFile(); got != "/tmp/ananke-ssh-config" {
|
|
t.Fatalf("unexpected ssh config path: %q", got)
|
|
}
|
|
if got := orch.TestHookResolveSSHIdentityFile(); got != "/tmp/ananke-ssh-id" {
|
|
t.Fatalf("unexpected ssh identity path: %q", got)
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
if err := orch.TestHookWaitForPostStartProbes(ctx); !errors.Is(err, context.Canceled) {
|
|
t.Fatalf("expected canceled post-start probe wait, got %v", err)
|
|
}
|
|
|
|
dryCfg := lifecycleConfig(t)
|
|
dry := cluster.New(dryCfg, &execx.Runner{DryRun: true}, state.New(dryCfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
if err := dry.TestHookWaitForPostStartProbes(context.Background()); err != nil {
|
|
t.Fatalf("expected dry-run post-start skip, got %v", err)
|
|
}
|
|
})
|
|
})
|
|
|
|
t.Run("coordination-and-workload-ignore-helpers", func(t *testing.T) {
|
|
t.Run("peer-guard-skip-unknown-and-block", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Coordination.PeerHosts = []string{"ghost", "titan-24"}
|
|
cfg.Coordination.StartupGuardMaxAgeSec = 120
|
|
cfg.SSHManagedNodes = append(cfg.SSHManagedNodes, "titan-24")
|
|
cfg.SSHNodeHosts["titan-24"] = "titan-24"
|
|
|
|
now := time.Now().UTC().Format(time.RFC3339)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "ssh" && strings.Contains(command, "ghost"):
|
|
return "", errors.New("no route to host")
|
|
case name == "ssh" && strings.Contains(command, "ananke intent --config /etc/ananke/ananke.yaml"):
|
|
return "__ANANKE_BOOTSTRAP_ACTIVE__\nintent=startup_in_progress reason=\"normal\" source=peer updated_at=" + now + "\n", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookGuardPeerStartupIntents(context.Background()); err == nil || !strings.Contains(err.Error(), "startup_in_progress") {
|
|
t.Fatalf("expected startup_in_progress block, got %v", err)
|
|
}
|
|
|
|
runUnknown := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "ssh" && strings.Contains(command, "ananke intent --config /etc/ananke/ananke.yaml") {
|
|
return "__ANANKE_BOOTSTRAP_IDLE__\nintent=unknown_state reason=\"odd\" source=peer updated_at=" + now + "\n", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchUnknown, _ := newHookOrchestrator(t, cfg, runUnknown, runUnknown)
|
|
if err := orchUnknown.TestHookGuardPeerStartupIntents(context.Background()); err != nil {
|
|
t.Fatalf("expected unknown-state ignore, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("workload-ignore-affinity-branch-matrix", func(t *testing.T) {
|
|
ignored := []string{"titan-22"}
|
|
if cluster.TestHookWorkloadTargetsIgnoredNodesRaw(nil, ignored) {
|
|
t.Fatalf("expected false when no affinity terms are present")
|
|
}
|
|
if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
|
|
{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22"}}},
|
|
{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22"}}},
|
|
}, ignored) {
|
|
t.Fatalf("expected false when more than one term is present")
|
|
}
|
|
if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
|
|
{{Key: "other", Operator: "In", Values: []string{"titan-22"}}},
|
|
}, ignored) {
|
|
t.Fatalf("expected false for non-hostname key")
|
|
}
|
|
if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
|
|
{{Key: "kubernetes.io/hostname", Operator: "NotIn", Values: []string{"titan-22"}}},
|
|
}, ignored) {
|
|
t.Fatalf("expected false for non-In operator")
|
|
}
|
|
if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
|
|
{{Key: "kubernetes.io/hostname", Operator: "In", Values: nil}},
|
|
}, ignored) {
|
|
t.Fatalf("expected false for empty hostname values")
|
|
}
|
|
if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
|
|
{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22", "titan-23"}}},
|
|
}, ignored) {
|
|
t.Fatalf("expected false when any hostname is not ignored")
|
|
}
|
|
if !cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
|
|
{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22"}}},
|
|
}, ignored) {
|
|
t.Fatalf("expected true when all affinity hostnames are ignored")
|
|
}
|
|
if cluster.TestHookPodTargetsIgnoredNode("", ignored) {
|
|
t.Fatalf("expected false when pod has no explicit node name")
|
|
}
|
|
})
|
|
})
|
|
}
|
|
|
|
// httpStatusHandler runs one orchestration or CLI step.
|
|
// Signature: httpStatusHandler(code int, body string) func(http.ResponseWriter, *http.Request).
|
|
// Why: keeps checklist/stability tests compact while still driving real HTTP probe branches.
|
|
func httpStatusHandler(code int, body string) func(http.ResponseWriter, *http.Request) {
|
|
return func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(code)
|
|
_, _ = w.Write([]byte(body))
|
|
}
|
|
}
|
|
|
|
// TestHookGapMatrixPart5IngressHostMappingRegression runs one orchestration or CLI step.
|
|
// Signature: TestHookGapMatrixPart5IngressHostMappingRegression(t *testing.T).
|
|
// Why: ensures host parsing fallback paths stay stable for ingress/service checklist failures.
|
|
func TestHookGapMatrixPart5IngressHostMappingRegression(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
|
|
{Name: "metrics", URL: "https://metrics.bstein.dev/api/health"},
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
|
if got := orch.TestHookChecklistFailureHost("metrics: down"); got != "metrics.bstein.dev" {
|
|
t.Fatalf("expected metrics host map, got %q", got)
|
|
}
|
|
rawURL := "https://grafana.bstein.dev/path"
|
|
if got := orch.TestHookChecklistFailureHost(rawURL); got != "" {
|
|
t.Fatalf("expected checklist host parser to treat raw URL as unknown prefix, got %q", got)
|
|
}
|
|
if got := cluster.TestHookHostFromURL(rawURL); got != "grafana.bstein.dev" {
|
|
t.Fatalf("expected hostFromURL helper parse, got %q", got)
|
|
}
|
|
u, _ := neturl.Parse(rawURL)
|
|
if u == nil || u.Hostname() == "" {
|
|
t.Fatalf("expected URL parse sanity")
|
|
}
|
|
}
|