ananke/internal/cluster/orchestrator_workload_recovery_test.go
2026-06-19 15:43:44 -03:00

459 lines
24 KiB
Go

package cluster
import (
"context"
"errors"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/config"
)
// TestRecycleStuckControllerPodsHandlesLonghornAttachBlockedPods runs one orchestration or CLI step.
// Signature: TestRecycleStuckControllerPodsHandlesLonghornAttachBlockedPods(t *testing.T).
// Why: Pending Longhorn-backed pods on Longhorn-unready nodes should be
// rescheduled without mutating Longhorn volume, replica, or disk objects.
func TestRecycleStuckControllerPodsHandlesLonghornAttachBlockedPods(t *testing.T) {
created := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
lastSeen := time.Now().UTC().Format(time.RFC3339)
pods := `{"items":[{"metadata":{"namespace":"monitoring","name":"victoria-metrics-single-server-0","creationTimestamp":"` + created + `","ownerReferences":[{"kind":"StatefulSet","name":"victoria-metrics-single-server"}]},"spec":{"nodeName":"titan-0b"},"status":{"phase":"Pending"}}]}`
events := `{"items":[{"metadata":{"namespace":"monitoring","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"monitoring","name":"victoria-metrics-single-server-0"},"type":"Warning","reason":"FailedAttachVolume","message":"AttachVolume.Attach failed for volume \"pvc-1\" : rpc error from [http://longhorn-backend:9500/v1/volumes/pvc-1?action=attach]: unable to attach volume pvc-1 to titan-0b: node titan-0b is not ready","lastTimestamp":"` + lastSeen + `"}]}`
deleted := false
orch := buildOrchestratorWithStubs(t, config.Config{
Startup: config.Startup{StuckPodGraceSeconds: 180},
}, []commandStub{
{match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods},
{match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-0b\tFalse\n"},
{match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events},
{match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-0b"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "monitoring", "delete", "pod", "victoria-metrics-single-server-0", "--wait=false")(name, args) {
return false
}
deleted = true
return true
},
},
})
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("recycleStuckControllerPods failed: %v", err)
}
if !deleted {
t.Fatalf("expected longhorn attach-blocked pending pod to be recycled")
}
}
// TestRecycleStuckControllerPodsRepairsEncryptedVolumeCryptsetup runs one orchestration or CLI step.
// Signature: TestRecycleStuckControllerPodsRepairsEncryptedVolumeCryptsetup(t *testing.T).
// Why: encrypted Longhorn PVC recovery should repair missing host cryptsetup and
// then recycle the blocked pod without touching Longhorn data-plane objects.
func TestRecycleStuckControllerPodsRepairsEncryptedVolumeCryptsetup(t *testing.T) {
created := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
lastSeen := time.Now().UTC().Format(time.RFC3339)
pods := `{"items":[{"metadata":{"namespace":"finance","name":"actual-budget-abc","creationTimestamp":"` + created + `","ownerReferences":[{"kind":"ReplicaSet","name":"actual-budget"}]},"spec":{"nodeName":"titan-19"},"status":{"phase":"Pending"}}]}`
events := `{"items":[{"metadata":{"namespace":"finance","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"finance","name":"actual-budget-abc"},"type":"Warning","reason":"FailedMount","message":"MountVolume.MountDevice failed for volume \"pvc-1\" : nsenter: failed to execute cryptsetup: No such file or directory","lastTimestamp":"` + lastSeen + `"}]}`
installed := false
deleted := false
orch := buildOrchestratorWithStubs(t, config.Config{
Startup: config.Startup{StuckPodGraceSeconds: 180},
}, []commandStub{
{match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods},
{match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-19\tTrue\n"},
{match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events},
{match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-19"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`},
{
match: func(name string, args []string) bool {
if name != "ssh" || !strings.Contains(strings.Join(args, " "), "apt-get install -y --no-install-recommends cryptsetup-bin") {
return false
}
installed = true
return true
},
out: "__ANANKE_CRYPTSETUP_INSTALLED__",
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "finance", "delete", "pod", "actual-budget-abc", "--wait=false")(name, args) {
return false
}
deleted = true
return true
},
},
})
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("recycleStuckControllerPods failed: %v", err)
}
if !installed {
t.Fatalf("expected missing host cryptsetup to be installed")
}
if !deleted {
t.Fatalf("expected encrypted-volume blocked pod to be recycled")
}
}
// TestRecycleStuckControllerPodsCordonsEncryptedVolumeNodeWhenRepairFails runs one orchestration or CLI step.
// Signature: TestRecycleStuckControllerPodsCordonsEncryptedVolumeNodeWhenRepairFails(t *testing.T).
// Why: when host package repair is blocked by sudo policy, Ananke should avoid
// the bad node and retry the controller-owned pod elsewhere.
func TestRecycleStuckControllerPodsCordonsEncryptedVolumeNodeWhenRepairFails(t *testing.T) {
created := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
lastSeen := time.Now().UTC().Format(time.RFC3339)
pods := `{"items":[{"metadata":{"namespace":"finance","name":"actual-budget-abc","creationTimestamp":"` + created + `","ownerReferences":[{"kind":"ReplicaSet","name":"actual-budget"}]},"spec":{"nodeName":"titan-19"},"status":{"phase":"Pending"}}]}`
events := `{"items":[{"metadata":{"namespace":"finance","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"finance","name":"actual-budget-abc"},"type":"Warning","reason":"FailedMount","message":"MountVolume.MountDevice failed for volume \"pvc-1\" : nsenter: failed to execute cryptsetup: No such file or directory","lastTimestamp":"` + lastSeen + `"}]}`
cordoned := false
deleted := false
orch := buildOrchestratorWithStubs(t, config.Config{
Startup: config.Startup{StuckPodGraceSeconds: 180},
}, []commandStub{
{match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods},
{match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-19\tTrue\n"},
{match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events},
{match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-19"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`},
{
match: matchContains("ssh", "apt-get install -y --no-install-recommends cryptsetup-bin"),
err: errors.New("sudo: a password is required"),
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "cordon", "titan-19")(name, args) {
return false
}
cordoned = true
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "finance", "delete", "pod", "actual-budget-abc", "--wait=false")(name, args) {
return false
}
deleted = true
return true
},
},
})
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("recycleStuckControllerPods failed: %v", err)
}
if !cordoned {
t.Fatalf("expected cryptsetup-missing node to be cordoned")
}
if !deleted {
t.Fatalf("expected encrypted-volume blocked pod to be recycled")
}
}
// TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes runs one orchestration or CLI step.
// Signature: TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T).
// Why: post-outage controller pods can remain Unknown or Failed after their
// node recovers; deletion clears stale status while force deletion stays away
// from PVC-backed storage.
func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
old := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
recent := time.Now().Add(-30 * time.Second).UTC().Format(time.RFC3339)
pods := `{"items":[` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-old","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"secret"}]},"status":{"phase":"Failed"}},` +
`{"metadata":{"namespace":"logging","name":"oauth2-proxy-terminating","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"oauth2-proxy-logs"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"secret"}]},"status":{"phase":"Running"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"pvc-backed-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"pvc-backed"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"data"}}]},"status":{"phase":"Failed"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-fresh","creationTimestamp":"` + recent + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` +
`{"metadata":{"namespace":"maintenance","name":"stale-on-bad-node","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"maintenance"}]},"spec":{"nodeName":"titan-22"},"status":{"phase":"Unknown"}},` +
`{"metadata":{"namespace":"default","name":"bare-pod","creationTimestamp":"` + old + `"},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}}]}`
deleted := []string{}
orch := buildOrchestratorWithStubs(t, config.Config{
Startup: config.Startup{StuckPodGraceSeconds: 180},
}, []commandStub{
{match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods},
{match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-12\tTrue\ntitan-22\tTrue\n"},
{match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: `{"items":[]}`},
{match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-12"},"status":{"conditions":[{"type":"Ready","status":"True"}]}},{"metadata":{"name":"titan-22"},"status":{"conditions":[{"type":"Ready","status":"False"}]}}]}`},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-old", "--wait=false")(name, args) {
return false
}
deleted = append(deleted, "longhorn-vault-sync-old")
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-failed", "--wait=false", "--grace-period=0", "--force")(name, args) {
return false
}
deleted = append(deleted, "longhorn-vault-sync-failed")
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "logging", "delete", "pod", "oauth2-proxy-terminating", "--wait=false", "--grace-period=0", "--force")(name, args) {
return false
}
deleted = append(deleted, "oauth2-proxy-terminating")
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "pvc-backed-failed", "--wait=false")(name, args) {
return false
}
if strings.Contains(strings.Join(args, " "), "--force") {
t.Fatalf("pvc-backed stale pod must not be force deleted")
}
deleted = append(deleted, "pvc-backed-failed")
return true
},
},
})
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("recycleStuckControllerPods failed: %v", err)
}
if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed,oauth2-proxy-terminating,pvc-backed-failed" {
t.Fatalf("expected only stale controller pods on Ready node to be recycled, got %#v", deleted)
}
}
// TestRecycleStuckControllerPodsCordonsContainerRuntimeWedgeNode runs one orchestration or CLI step.
// Signature: TestRecycleStuckControllerPodsCordonsContainerRuntimeWedgeNode(t *testing.T).
// Why: a Ready node with a wedged container runtime can trap replacement pods
// indefinitely; startup should cordon that scheduler target without draining it
// or touching Longhorn data-plane objects.
func TestRecycleStuckControllerPodsCordonsContainerRuntimeWedgeNode(t *testing.T) {
old := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
lastSeen := time.Now().UTC().Format(time.RFC3339)
pods := `{"items":[` +
`{"metadata":{"namespace":"logging","name":"oauth2-proxy-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"oauth2-proxy"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","containerStatuses":[{"name":"oauth2-proxy","state":{"waiting":{"reason":"CreateContainerError"}}}]}},` +
`{"metadata":{"namespace":"monitoring","name":"suite-probe-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"Job","name":"suite-probe"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","containerStatuses":[{"name":"probe","state":{"waiting":{"reason":"CreateContainerError"}}}]}},` +
`{"metadata":{"namespace":"sso","name":"secret-ensure-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"Job","name":"secret-ensure"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","initContainerStatuses":[{"name":"init","state":{"waiting":{"reason":"CreateContainerError"}}}]}},` +
`{"metadata":{"namespace":"finance","name":"single-node-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"single"}]},"spec":{"nodeName":"titan-19","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","containerStatuses":[{"name":"app","state":{"waiting":{"reason":"CreateContainerError"}}}]}}]}`
events := `{"items":[` +
`{"metadata":{"namespace":"logging","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"logging","name":"oauth2-proxy-bad"},"type":"Warning","reason":"Failed","message":"spec.containers{oauth2-proxy}: Error: failed to reserve container name oauth2-proxy_logging","lastTimestamp":"` + lastSeen + `"},` +
`{"metadata":{"namespace":"monitoring","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"monitoring","name":"suite-probe-bad"},"type":"Warning","reason":"Failed","message":"spec.containers{probe}: Error: context deadline exceeded","lastTimestamp":"` + lastSeen + `"},` +
`{"metadata":{"namespace":"sso","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"sso","name":"secret-ensure-bad"},"type":"Warning","reason":"Failed","message":"spec.initContainers{init}: Error: failed to reserve container name init_sso","lastTimestamp":"` + lastSeen + `"},` +
`{"metadata":{"namespace":"finance","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"finance","name":"single-node-bad"},"type":"Warning","reason":"Failed","message":"spec.containers{app}: Error: failed to reserve container name app_finance","lastTimestamp":"` + lastSeen + `"}]}`
cordoned := []string{}
deleted := []string{}
orch := buildOrchestratorWithStubs(t, config.Config{
Startup: config.Startup{StuckPodGraceSeconds: 180},
}, []commandStub{
{match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods},
{match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: ""},
{match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events},
{match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-18"},"status":{"conditions":[{"type":"Ready","status":"True"}]}},{"metadata":{"name":"titan-19"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`},
{
match: func(name string, args []string) bool {
if name != "kubectl" || len(args) == 0 || args[0] != "cordon" {
return false
}
cordoned = append(cordoned, args[len(args)-1])
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "delete", "pod", "--wait=false")(name, args) {
return false
}
joined := strings.Join(args, " ")
if strings.Contains(joined, "--force") {
t.Fatalf("container-runtime wedge recycle must not force-delete fresh pods")
}
if len(args) >= 5 {
deleted = append(deleted, args[4])
}
return true
},
},
})
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("recycleStuckControllerPods failed: %v", err)
}
if strings.Join(cordoned, ",") != "titan-18" {
t.Fatalf("expected only titan-18 to be cordoned, got %#v", cordoned)
}
if strings.Join(deleted, ",") != "oauth2-proxy-bad,suite-probe-bad,secret-ensure-bad,single-node-bad" {
t.Fatalf("expected runtime-wedged pods to be recycled, got %#v", deleted)
}
}
// TestEffectiveWorkersFiltersIgnoredUnavailableNodes runs one orchestration or CLI step.
// Signature: TestEffectiveWorkersFiltersIgnoredUnavailableNodes(t *testing.T).
// Why: ignored unavailable nodes should be excluded before startup tries SSH,
// k3s-agent start, or uncordon operations against intentionally absent hosts.
func TestEffectiveWorkersFiltersIgnoredUnavailableNodes(t *testing.T) {
cfg := config.Config{
Workers: []string{" titan-08 ", "titan-09", "titan-10", "titan-11"},
Startup: config.Startup{
IgnoreUnavailableNodes: []string{"titan-09", "titan-10"},
},
}
orch := buildOrchestratorWithStubs(t, cfg, nil)
got, err := orch.effectiveWorkers(context.Background())
if err != nil {
t.Fatalf("effectiveWorkers failed: %v", err)
}
want := []string{"titan-08", "titan-11"}
if strings.Join(got, ",") != strings.Join(want, ",") {
t.Fatalf("effectiveWorkers mismatch got=%v want=%v", got, want)
}
}
// TestEnsureLonghornEncryptedHostPrereqsFiltersUnsafeWorkers runs one orchestration or CLI step.
// Signature: TestEnsureLonghornEncryptedHostPrereqsFiltersUnsafeWorkers(t *testing.T).
// Why: startup must not uncordon Longhorn workers that cannot mount encrypted
// PVCs; cordoning those nodes is safe and avoids repeating the post-outage
// mount deadlock.
func TestEnsureLonghornEncryptedHostPrereqsFiltersUnsafeWorkers(t *testing.T) {
cordoned := []string{}
orch := buildOrchestratorWithStubs(t, config.Config{
SSHManagedNodes: []string{"titan-04", "titan-19"},
}, []commandStub{
{match: matchContains("kubectl", "get", "nodes", "-l", "longhorn-host=true"), out: "titan-04\ntitan-19\ntitan-23\n"},
{
match: matchContains("ssh", "titan-04", "command -v cryptsetup"),
out: "__ANANKE_CRYPTSETUP_PRESENT__",
},
{
match: matchContains("ssh", "titan-19", "apt-get install -y --no-install-recommends cryptsetup-bin"),
err: errors.New("sudo: a password is required"),
},
{
match: func(name string, args []string) bool {
if name != "kubectl" || len(args) == 0 || args[0] != "cordon" {
return false
}
if len(args) > 1 {
cordoned = append(cordoned, args[len(args)-1])
}
return true
},
},
})
got, err := orch.ensureLonghornEncryptedHostPrereqs(context.Background(), []string{"titan-04", "titan-19", "titan-20"})
if err != nil {
t.Fatalf("ensureLonghornEncryptedHostPrereqs failed: %v", err)
}
want := []string{"titan-04", "titan-20"}
if strings.Join(got, ",") != strings.Join(want, ",") {
t.Fatalf("guarded workers mismatch got=%v want=%v", got, want)
}
if strings.Join(cordoned, ",") != "titan-19,titan-23" {
t.Fatalf("expected unsafe longhorn hosts to be cordoned, got %v", cordoned)
}
}
// TestLonghornCryptsetupExemptNodesAreNotQuarantined runs one orchestration or CLI step.
// Signature: TestLonghornCryptsetupExemptNodesAreNotQuarantined(t *testing.T).
// Why: Veles/Oceanus uses titan-23 as a Longhorn host for unencrypted local
// volumes; startup should uncordon that policy-exempt node without requiring
// host SSH or weakening encrypted-volume safety on other workers.
func TestLonghornCryptsetupExemptNodesAreNotQuarantined(t *testing.T) {
cordoned := []string{}
uncordoned := []string{}
sshTitan23 := false
orch := buildOrchestratorWithStubs(t, config.Config{
SSHManagedNodes: []string{"titan-04"},
Startup: config.Startup{
LonghornCryptsetupExemptNodes: []string{"titan-23"},
},
}, []commandStub{
{match: matchContains("kubectl", "get", "nodes", "-l", "longhorn-host=true"), out: "titan-04\ntitan-23\n"},
{
match: matchContains("ssh", "titan-04", "command -v cryptsetup"),
out: "__ANANKE_CRYPTSETUP_PRESENT__",
},
{
match: func(name string, args []string) bool {
if name == "ssh" && strings.Contains(strings.Join(args, " "), "titan-23") {
sshTitan23 = true
return true
}
return false
},
},
{
match: func(name string, args []string) bool {
if name != "kubectl" || len(args) == 0 || args[0] != "cordon" {
return false
}
if len(args) > 1 {
cordoned = append(cordoned, args[len(args)-1])
}
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "uncordon")(name, args) {
return false
}
if len(args) > 1 {
uncordoned = append(uncordoned, args[len(args)-1])
}
return true
},
},
})
got, err := orch.ensureLonghornEncryptedHostPrereqs(context.Background(), []string{"titan-04"})
if err != nil {
t.Fatalf("ensureLonghornEncryptedHostPrereqs failed: %v", err)
}
if strings.Join(got, ",") != "titan-04" {
t.Fatalf("guarded workers mismatch got=%v", got)
}
if err := orch.uncordonLonghornCryptsetupExemptNodes(context.Background()); err != nil {
t.Fatalf("uncordonLonghornCryptsetupExemptNodes failed: %v", err)
}
if sshTitan23 {
t.Fatalf("did not expect cryptsetup SSH check for exempt titan-23")
}
if len(cordoned) != 0 {
t.Fatalf("did not expect exempt node to be cordoned, got %v", cordoned)
}
if strings.Join(uncordoned, ",") != "titan-23" {
t.Fatalf("expected exempt titan-23 to be uncordoned, got %v", uncordoned)
}
}
// TestLonghornHostNodesFallsBackToConfiguredLabels runs one orchestration or CLI step.
// Signature: TestLonghornHostNodesFallsBackToConfiguredLabels(t *testing.T).
// Why: bootstrap caches or minimal test clusters can lack live labels; the
// static startup inventory should still protect configured storage workers.
func TestLonghornHostNodesFallsBackToConfiguredLabels(t *testing.T) {
orch := buildOrchestratorWithStubs(t, config.Config{
Startup: config.Startup{
RequiredNodeLabels: map[string]map[string]string{
"titan-04": {"longhorn-host": "true"},
"titan-20": {"node-role.kubernetes.io/worker": "true"},
},
},
}, []commandStub{
{match: matchContains("kubectl", "get", "nodes", "-l", "longhorn-host=true"), out: ""},
})
got, err := orch.longhornHostNodes(context.Background())
if err != nil {
t.Fatalf("longhornHostNodes failed: %v", err)
}
if _, ok := got["titan-04"]; !ok || len(got) != 1 {
t.Fatalf("expected configured longhorn host fallback, got %v", got)
}
}