ananke/testing/orchestrator/hooks_gap_matrix_part5_test.go

package orchestrator

import (
	"context"
	"errors"
	"io"
	"log"
	"net/http"
	"net/http/httptest"
	neturl "net/url"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"scm.bstein.dev/bstein/ananke/internal/cluster"
	"scm.bstein.dev/bstein/ananke/internal/config"
	"scm.bstein.dev/bstein/ananke/internal/execx"
	"scm.bstein.dev/bstein/ananke/internal/state"
)

// TestHookGapMatrixPart5CoverageClosure runs one orchestration or CLI step.
// Signature: TestHookGapMatrixPart5CoverageClosure(t *testing.T).
// Why: closes branch gaps that still remained after drill-style tests by driving
// low-coverage orchestrator internals through the exported top-level hook surface.
func TestHookGapMatrixPart5CoverageClosure(t *testing.T) {
	t.Run("critical-endpoint-backend-heal-matrix", func(t *testing.T) {
		t.Run("empty-namespace-service-noop", func(t *testing.T) {
			orch, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
			healed, err := orch.TestHookMaybeHealCriticalEndpointBackends(context.Background(), "", "")
			if err != nil || len(healed) != 0 {
				t.Fatalf("expected empty noop heal, healed=%v err=%v", healed, err)
			}
		})

		t.Run("scale-error-bubbles", func(t *testing.T) {
			run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				if name == "kubectl" && strings.Contains(command, " -n monitoring scale deployment grafana ") {
					return "", errors.New("boom")
				}
				return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
			}
			orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run)
			_, err := orch.TestHookMaybeHealCriticalEndpointBackends(context.Background(), "monitoring", "grafana")
			if err == nil || !strings.Contains(err.Error(), "scale monitoring/deployment/grafana to 1") {
				t.Fatalf("expected deployment scale error, got %v", err)
			}
		})

		t.Run("deployment-notfound-statefulset-healed", func(t *testing.T) {
			run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				switch {
				case name == "kubectl" && strings.Contains(command, " -n monitoring scale deployment grafana "):
					return "", errors.New("Error from server (NotFound): deployments.apps \"grafana\" not found")
				case name == "kubectl" && strings.Contains(command, " -n monitoring scale statefulset grafana "):
					return "", nil
				case name == "kubectl" && strings.Contains(command, "rollout status statefulset/grafana"):
					return "rolled out", nil
				default:
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
			}
			orch, _ := newHookOrchestrator(t, lifecycleConfig(t), run, run)
			healed, err := orch.TestHookMaybeHealCriticalEndpointBackends(context.Background(), "monitoring", "grafana")
			if err != nil {
				t.Fatalf("expected statefulset fallback heal success, got %v", err)
			}
			if len(healed) != 1 || healed[0] != "monitoring/statefulset/grafana" {
				t.Fatalf("expected statefulset heal entry, got %v", healed)
			}
		})
	})

	t.Run("ingress-backend-heal-and-stability-matrix", func(t *testing.T) {
		t.Run("ingress-host-heal-noop-and-error", func(t *testing.T) {
			cfg := lifecycleConfig(t)
			runNoop := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				switch {
				case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
					return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"other.bstein.dev"}]}}]}`, nil
				default:
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
			}
			orchNoop, _ := newHookOrchestrator(t, cfg, runNoop, runNoop)
			healed, err := orchNoop.TestHookHealIngressHostBackendReplicas(context.Background(), "metrics.bstein.dev")
			if err != nil || len(healed) != 0 {
				t.Fatalf("expected no-op ingress heal, healed=%v err=%v", healed, err)
			}

			runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				switch {
				case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
					return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"metrics.bstein.dev"}]}}]}`, nil
				case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"):
					return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":0}}]}`, nil
				case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
					return "", errors.New("permission denied")
				default:
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
			}
			orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr)
			_, err = orchErr.TestHookHealIngressHostBackendReplicas(context.Background(), "metrics.bstein.dev")
			if err == nil || !strings.Contains(err.Error(), "scale monitoring/deployment/grafana to 1") {
				t.Fatalf("expected ingress backend scale error, got %v", err)
			}
		})

		t.Run("startup-stability-health-failure-branches", func(t *testing.T) {
			t.Run("flux-check-error", func(t *testing.T) {
				cfg := lifecycleConfig(t)
				cfg.Startup.RequireFluxHealth = true
				cfg.Startup.RequireWorkloadConvergence = false
				cfg.Startup.RequireServiceChecklist = false
				cfg.Startup.RequireIngressChecklist = false

				run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
					command := name + " " + strings.Join(args, " ")
					if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") {
						return "", errors.New("api down")
					}
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
				orch, _ := newHookOrchestrator(t, cfg, run, run)
				if err := orch.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "flux check error") {
					t.Fatalf("expected flux-check error branch, got %v", err)
				}
			})

			t.Run("service-checklist-failure", func(t *testing.T) {
				srv := httptest.NewServer(http.HandlerFunc(httpStatusHandler(500, "not ready")))
				defer srv.Close()

				cfg := lifecycleConfig(t)
				cfg.Startup.RequireFluxHealth = false
				cfg.Startup.RequireWorkloadConvergence = false
				cfg.Startup.RequireIngressChecklist = false
				cfg.Startup.RequireServiceChecklist = true
				cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
					{Name: "svc", URL: srv.URL, AcceptedStatuses: []int{200}, TimeoutSeconds: 1},
				}
				orch, _ := newHookOrchestrator(t, cfg, nil, nil)
				if err := orch.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "external services not healthy") {
					t.Fatalf("expected service-check failure, got %v", err)
				}
			})

			t.Run("ingress-checklist-failure", func(t *testing.T) {
				cfg := lifecycleConfig(t)
				cfg.Startup.RequireFluxHealth = false
				cfg.Startup.RequireWorkloadConvergence = false
				cfg.Startup.RequireServiceChecklist = false
				cfg.Startup.RequireIngressChecklist = true
				cfg.Startup.IngressChecklistInsecureSkip = true

				run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
					command := name + " " + strings.Join(args, " ")
					if name == "kubectl" && strings.Contains(command, "get ingress -A -o json") {
						return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"nonexistent.invalid"}]}}]}`, nil
					}
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
				orch, _ := newHookOrchestrator(t, cfg, run, run)
				if err := orch.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "ingress reachability not healthy") {
					t.Fatalf("expected ingress-check failure, got %v", err)
				}
			})

			t.Run("pod-failure-check-error-and-nonempty", func(t *testing.T) {
				cfg := lifecycleConfig(t)
				cfg.Startup.RequireFluxHealth = false
				cfg.Startup.RequireWorkloadConvergence = false
				cfg.Startup.RequireServiceChecklist = false
				cfg.Startup.RequireIngressChecklist = false

				runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
					command := name + " " + strings.Join(args, " ")
					if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
						return "", errors.New("pods unavailable")
					}
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
				orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr)
				if err := orchErr.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "pod failure check error") {
					t.Fatalf("expected pod-check error branch, got %v", err)
				}

				runFail := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
					command := name + " " + strings.Join(args, " ")
					if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
						return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"grafana"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}]}`, nil
					}
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
				orchFail, _ := newHookOrchestrator(t, cfg, runFail, runFail)
				if err := orchFail.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "pods in crash/image-pull failures") {
					t.Fatalf("expected failing-pod branch, got %v", err)
				}
			})
		})
	})

	t.Run("report-scaling-timesync-and-ssh-matrix", func(t *testing.T) {
		t.Run("report-artifact-and-progress-branches", func(t *testing.T) {
			cfg := lifecycleConfig(t)
			orch, _ := newHookOrchestrator(t, cfg, nil, nil)

			orch.TestHookNoteStartupAutoHeal("ignored without active report")
			orch.TestHookBeginStartupReport("drill")
			orch.TestHookNoteStartupAutoHeal("")
			orch.TestHookNoteStartupAutoHeal("restored critical workload replicas")
			orch.TestHookFinalizeStartupReport(nil)

			if err := orch.TestHookWriteRunRecordArtifact(state.RunRecord{
				ID:        "shutdown-record-1",
				Action:    "shutdown",
				Reason:    "drill",
				StartedAt: time.Now().UTC().Add(-3 * time.Second),
				EndedAt:   time.Now().UTC(),
			}); err != nil {
				t.Fatalf("expected shutdown run artifact success, got %v", err)
			}
			if _, err := os.Stat(orch.TestHookLastShutdownReportPath()); err != nil {
				t.Fatalf("expected last-shutdown report artifact, err=%v", err)
			}

			stateFile := filepath.Join(t.TempDir(), "state-as-file")
			if err := os.WriteFile(stateFile, []byte("x"), 0o644); err != nil {
				t.Fatalf("seed state file: %v", err)
			}
			badCfg := lifecycleConfig(t)
			badCfg.State.Dir = stateFile
			badCfg.State.ReportsDir = filepath.Join(stateFile, "reports")
			badCfg.State.RunHistoryPath = filepath.Join(stateFile, "runs.json")
			bad := cluster.New(badCfg, &execx.Runner{DryRun: false}, state.New(badCfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
			if err := bad.TestHookWriteStartupReportFile(filepath.Join(stateFile, "startup.json"), "running"); err == nil {
				t.Fatalf("expected startup report mkdir/write error")
			}
			bad.TestHookPersistStartupProgress("running")
			bad.TestHookFinalizeRecord("shutdown", "drill", "simulated-failure")
		})

		t.Run("scaled-snapshot-write-and-restore-errors", func(t *testing.T) {
			listRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				switch {
				case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
					return "monitoring\tgrafana\t1\n", nil
				case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
					return "", nil
				case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
					return "", nil
				default:
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
			}

			stateAsFile := filepath.Join(t.TempDir(), "state-as-file")
			if err := os.WriteFile(stateAsFile, []byte("x"), 0o644); err != nil {
				t.Fatalf("seed state file: %v", err)
			}
			cfgWriteErr := lifecycleConfig(t)
			cfgWriteErr.State.Dir = stateAsFile
			orchWriteErr := cluster.New(cfgWriteErr, &execx.Runner{DryRun: false}, state.New(cfgWriteErr.State.RunHistoryPath), log.New(io.Discard, "", 0))
			orchWriteErr.SetCommandOverrides(listRun, listRun)
			entries, err := orchWriteErr.TestHookListScalableWorkloads(context.Background())
			if err != nil {
				t.Fatalf("list entries for write-error case: %v", err)
			}
			if err := orchWriteErr.TestHookWriteScaledWorkloadSnapshot(entries); err == nil {
				t.Fatalf("expected write scaled snapshot error")
			}

			cfgRemoveErr := lifecycleConfig(t)
			snapshotPath := filepath.Join(cfgRemoveErr.State.Dir, "scaled-workloads.json")
			removeErrRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				switch {
				case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
					return "monitoring\tgrafana\t1\n", nil
				case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
					return "", nil
				case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
					if err := os.Remove(snapshotPath); err != nil && !os.IsNotExist(err) {
						return "", err
					}
					if err := os.MkdirAll(snapshotPath, 0o755); err != nil {
						return "", err
					}
					if err := os.WriteFile(filepath.Join(snapshotPath, "keep"), []byte("x"), 0o644); err != nil {
						return "", err
					}
					return "", nil
				default:
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
			}
			orchRemoveErr, _ := newHookOrchestrator(t, cfgRemoveErr, removeErrRun, removeErrRun)
			entries, err = orchRemoveErr.TestHookListScalableWorkloads(context.Background())
			if err != nil {
				t.Fatalf("list entries for remove-error case: %v", err)
			}
			if err := orchRemoveErr.TestHookWriteScaledWorkloadSnapshot(entries); err != nil {
				t.Fatalf("seed snapshot for remove-error case: %v", err)
			}
			if err := orchRemoveErr.TestHookRestoreScaledApps(context.Background()); err == nil || !strings.Contains(err.Error(), "remove scaled workload snapshot") {
				t.Fatalf("expected remove snapshot error, got %v", err)
			}
		})

		t.Run("timesync-quorum-and-strict", func(t *testing.T) {
			build := func(mode string, quorum int, local string, remote map[string]string) *cluster.Orchestrator {
				cfg := lifecycleConfig(t)
				cfg.ControlPlanes = []string{"titan-db", "titan-24"}
				cfg.SSHManagedNodes = []string{"titan-db", "titan-24"}
				cfg.SSHNodeHosts["titan-24"] = "titan-24"
				cfg.Startup.TimeSyncWaitSeconds = 1
				cfg.Startup.TimeSyncPollSeconds = 1
				cfg.Startup.TimeSyncMode = mode
				cfg.Startup.TimeSyncQuorum = quorum
				run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
					command := name + " " + strings.Join(args, " ")
					switch {
					case name == "sh" && strings.Contains(command, "timedatectl show -p NTPSynchronized"):
						return local, nil
					case name == "ssh" && strings.Contains(command, "timedatectl show -p NTPSynchronized"):
						for node, out := range remote {
							if strings.Contains(command, node) {
								return out, nil
							}
						}
						return "no", nil
					default:
						return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
					}
				}
				orch, _ := newHookOrchestrator(t, cfg, run, run)
				return orch
			}

			strict := build("strict", 0, "no", map[string]string{"titan-db": "no", "titan-24": "no"})
			if err := strict.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-24"}); err == nil || !strings.Contains(err.Error(), "time sync not ready") {
				t.Fatalf("expected strict-mode timeout, got %v", err)
			}

			quorumOK := build("quorum", 1, "yes", map[string]string{"titan-db": "yes", "titan-24": "no"})
			if err := quorumOK.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-24"}); err != nil {
				t.Fatalf("expected quorum-mode success, got %v", err)
			}

			quorumFail := build("quorum", 2, "yes", map[string]string{"titan-db": "yes", "titan-24": "no"})
			if err := quorumFail.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-24"}); err == nil || !strings.Contains(err.Error(), "time sync quorum not ready") {
				t.Fatalf("expected quorum-mode timeout, got %v", err)
			}
		})

		t.Run("poststart-and-ssh-helper-branches", func(t *testing.T) {
			cfg := lifecycleConfig(t)
			cfg.Startup.PostStartProbes = []string{"https://metrics.bstein.dev/health"}
			cfg.Startup.PostStartProbeWaitSeconds = 2
			cfg.Startup.PostStartProbePollSeconds = 1
			cfg.SSHManagedNodes = []string{"titan-db"}
			cfg.SSHConfigFile = " /tmp/ananke-ssh-config "
			cfg.SSHIdentityFile = " /tmp/ananke-ssh-id "

			run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				if name == "curl" {
					return "500", nil
				}
				return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
			}
			orch, _ := newHookOrchestrator(t, cfg, run, run)
			if !orch.TestHookSSHManaged("titan-db") || orch.TestHookSSHManaged("titan-23") {
				t.Fatalf("unexpected sshManaged evaluation")
			}
			if got := orch.TestHookResolveSSHConfigFile(); got != "/tmp/ananke-ssh-config" {
				t.Fatalf("unexpected ssh config path: %q", got)
			}
			if got := orch.TestHookResolveSSHIdentityFile(); got != "/tmp/ananke-ssh-id" {
				t.Fatalf("unexpected ssh identity path: %q", got)
			}

			ctx, cancel := context.WithCancel(context.Background())
			cancel()
			if err := orch.TestHookWaitForPostStartProbes(ctx); !errors.Is(err, context.Canceled) {
				t.Fatalf("expected canceled post-start probe wait, got %v", err)
			}

			dryCfg := lifecycleConfig(t)
			dry := cluster.New(dryCfg, &execx.Runner{DryRun: true}, state.New(dryCfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
			if err := dry.TestHookWaitForPostStartProbes(context.Background()); err != nil {
				t.Fatalf("expected dry-run post-start skip, got %v", err)
			}
		})
	})

	t.Run("coordination-and-workload-ignore-helpers", func(t *testing.T) {
		t.Run("peer-guard-skip-unknown-and-block", func(t *testing.T) {
			cfg := lifecycleConfig(t)
			cfg.Coordination.PeerHosts = []string{"ghost", "titan-24"}
			cfg.Coordination.StartupGuardMaxAgeSec = 120
			cfg.SSHManagedNodes = append(cfg.SSHManagedNodes, "titan-24")
			cfg.SSHNodeHosts["titan-24"] = "titan-24"

			now := time.Now().UTC().Format(time.RFC3339)
			run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				switch {
				case name == "ssh" && strings.Contains(command, "ghost"):
					return "", errors.New("no route to host")
				case name == "ssh" && strings.Contains(command, "ananke intent --config /etc/ananke/ananke.yaml"):
					return "__ANANKE_BOOTSTRAP_ACTIVE__\nintent=startup_in_progress reason=\"normal\" source=peer updated_at=" + now + "\n", nil
				default:
					return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
				}
			}
			orch, _ := newHookOrchestrator(t, cfg, run, run)
			if err := orch.TestHookGuardPeerStartupIntents(context.Background()); err == nil || !strings.Contains(err.Error(), "startup_in_progress") {
				t.Fatalf("expected startup_in_progress block, got %v", err)
			}

			runUnknown := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
				command := name + " " + strings.Join(args, " ")
				if name == "ssh" && strings.Contains(command, "ananke intent --config /etc/ananke/ananke.yaml") {
					return "__ANANKE_BOOTSTRAP_IDLE__\nintent=unknown_state reason=\"odd\" source=peer updated_at=" + now + "\n", nil
				}
				return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
			}
			orchUnknown, _ := newHookOrchestrator(t, cfg, runUnknown, runUnknown)
			if err := orchUnknown.TestHookGuardPeerStartupIntents(context.Background()); err != nil {
				t.Fatalf("expected unknown-state ignore, got %v", err)
			}
		})

		t.Run("workload-ignore-affinity-branch-matrix", func(t *testing.T) {
			ignored := []string{"titan-22"}
			if cluster.TestHookWorkloadTargetsIgnoredNodesRaw(nil, ignored) {
				t.Fatalf("expected false when no affinity terms are present")
			}
			if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
				{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22"}}},
				{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22"}}},
			}, ignored) {
				t.Fatalf("expected false when more than one term is present")
			}
			if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
				{{Key: "other", Operator: "In", Values: []string{"titan-22"}}},
			}, ignored) {
				t.Fatalf("expected false for non-hostname key")
			}
			if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
				{{Key: "kubernetes.io/hostname", Operator: "NotIn", Values: []string{"titan-22"}}},
			}, ignored) {
				t.Fatalf("expected false for non-In operator")
			}
			if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
				{{Key: "kubernetes.io/hostname", Operator: "In", Values: nil}},
			}, ignored) {
				t.Fatalf("expected false for empty hostname values")
			}
			if cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
				{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22", "titan-23"}}},
			}, ignored) {
				t.Fatalf("expected false when any hostname is not ignored")
			}
			if !cluster.TestHookWorkloadTargetsIgnoredNodesRaw([][]cluster.TestHookNodeSelectorExpr{
				{{Key: "kubernetes.io/hostname", Operator: "In", Values: []string{"titan-22"}}},
			}, ignored) {
				t.Fatalf("expected true when all affinity hostnames are ignored")
			}
			if cluster.TestHookPodTargetsIgnoredNode("", ignored) {
				t.Fatalf("expected false when pod has no explicit node name")
			}
		})
	})
}

// httpStatusHandler runs one orchestration or CLI step.
// Signature: httpStatusHandler(code int, body string) func(http.ResponseWriter, *http.Request).
// Why: keeps checklist/stability tests compact while still driving real HTTP probe branches.
func httpStatusHandler(code int, body string) func(http.ResponseWriter, *http.Request) {
	return func(w http.ResponseWriter, _ *http.Request) {
		w.WriteHeader(code)
		_, _ = w.Write([]byte(body))
	}
}

// TestHookGapMatrixPart5IngressHostMappingRegression runs one orchestration or CLI step.
// Signature: TestHookGapMatrixPart5IngressHostMappingRegression(t *testing.T).
// Why: ensures host parsing fallback paths stay stable for ingress/service checklist failures.
func TestHookGapMatrixPart5IngressHostMappingRegression(t *testing.T) {
	cfg := lifecycleConfig(t)
	cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
		{Name: "metrics", URL: "https://metrics.bstein.dev/api/health"},
	}
	orch, _ := newHookOrchestrator(t, cfg, nil, nil)
	if got := orch.TestHookChecklistFailureHost("metrics: down"); got != "metrics.bstein.dev" {
		t.Fatalf("expected metrics host map, got %q", got)
	}
	rawURL := "https://grafana.bstein.dev/path"
	if got := orch.TestHookChecklistFailureHost(rawURL); got != "" {
		t.Fatalf("expected checklist host parser to treat raw URL as unknown prefix, got %q", got)
	}
	if got := cluster.TestHookHostFromURL(rawURL); got != "grafana.bstein.dev" {
		t.Fatalf("expected hostFromURL helper parse, got %q", got)
	}
	u, _ := neturl.Parse(rawURL)
	if u == nil || u.Hostname() == "" {
		t.Fatalf("expected URL parse sanity")
	}
}