metis/pkg/service/node_recovery_test.go

255 lines
9.9 KiB
Go

package service
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"reflect"
"strings"
"testing"
"time"
"metis/pkg/sentinel"
)
func TestStageDesiredNodeMetadataMergesInventoryAndLiveCluster(t *testing.T) {
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
_ = json.NewEncoder(w).Encode(map[string]any{
"items": []any{
map[string]any{
"metadata": map[string]any{
"name": "titan-15",
"labels": map[string]string{
"hardware": "rpi5",
"rack": "a1",
"maintenance.bstein.dev/color": "blue",
"kubernetes.io/arch": "arm64",
"node-role.kubernetes.io/worker": "true",
},
"annotations": map[string]string{
"maintenance.bstein.dev/owner": "atlas",
"volumes.kubernetes.io/controller-managed-attach-detach": "true",
},
},
"spec": map[string]any{
"unschedulable": true,
"taints": []any{
map[string]any{"key": "dedicated", "value": "recovery", "effect": "NoSchedule"},
map[string]any{"key": "node.kubernetes.io/unreachable", "effect": "NoExecute"},
},
},
},
},
})
default:
http.NotFound(w, r)
}
}))
defer kube.Close()
installKubeFactory(t, kube)
app := newTestApp(t)
app.inventory.Nodes[0].Labels = map[string]string{"hardware": "rpi4", "rack": "a1"}
app.inventory.Nodes[0].Taints = []string{"flash=true:NoSchedule"}
app.desiredMetadata["titan-15"] = DesiredNodeMetadata{
Node: "titan-15",
Annotations: map[string]string{"maintenance.bstein.dev/legacy": "keep"},
}
desired, err := app.stageDesiredNodeMetadata("titan-15")
if err != nil {
t.Fatalf("stageDesiredNodeMetadata: %v", err)
}
if desired.Hostname != "titan-15" || !desired.Unschedulable {
t.Fatalf("unexpected desired metadata header: %#v", desired)
}
if desired.Labels["hardware"] != "rpi5" || desired.Labels["rack"] != "a1" || desired.Labels["maintenance.bstein.dev/color"] != "blue" {
t.Fatalf("unexpected desired labels: %#v", desired.Labels)
}
if _, ok := desired.Labels["kubernetes.io/arch"]; ok {
t.Fatalf("system labels should not be persisted: %#v", desired.Labels)
}
if desired.Annotations["maintenance.bstein.dev/owner"] != "atlas" || desired.Annotations["maintenance.bstein.dev/legacy"] != "keep" {
t.Fatalf("unexpected desired annotations: %#v", desired.Annotations)
}
if _, ok := desired.Annotations["volumes.kubernetes.io/controller-managed-attach-detach"]; ok {
t.Fatalf("controller annotations should not be persisted: %#v", desired.Annotations)
}
if !reflect.DeepEqual(desired.Taints, []string{"dedicated=recovery:NoSchedule"}) {
t.Fatalf("unexpected desired taints: %#v", desired.Taints)
}
data, err := os.ReadFile(app.settings.DesiredMetadataPath)
if err != nil {
t.Fatalf("read desired metadata file: %v", err)
}
if !strings.Contains(string(data), "titan-15") {
t.Fatalf("desired metadata file missing titan-15: %s", string(data))
}
}
func TestStoreSnapshotRestoresDesiredNodeMetadata(t *testing.T) {
var patchBody map[string]any
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
_ = json.NewEncoder(w).Encode(map[string]any{
"items": []any{
map[string]any{
"metadata": map[string]any{
"name": "titan-15",
"labels": map[string]string{
"hardware": "rpi4",
"maintenance.bstein.dev/old": "1",
},
"annotations": map[string]string{
"maintenance.bstein.dev/mode": "old",
},
},
"spec": map[string]any{
"unschedulable": true,
"taints": []any{
map[string]any{"key": "dedicated", "value": "old", "effect": "NoSchedule"},
map[string]any{"key": "node.kubernetes.io/unreachable", "effect": "NoExecute"},
},
},
},
},
})
case r.Method == http.MethodPatch && r.URL.Path == "/api/v1/nodes/titan-15":
if err := json.NewDecoder(r.Body).Decode(&patchBody); err != nil {
t.Fatalf("decode patch: %v", err)
}
_ = json.NewEncoder(w).Encode(map[string]any{"status": "ok"})
default:
http.NotFound(w, r)
}
}))
defer kube.Close()
installKubeFactory(t, kube)
app := newTestApp(t)
app.desiredMetadata["titan-15"] = DesiredNodeMetadata{
Node: "titan-15",
Hostname: "titan-15",
Labels: map[string]string{"hardware": "rpi5"},
Annotations: map[string]string{"maintenance.bstein.dev/mode": "recovery"},
Taints: []string{"dedicated=recovery:NoSchedule"},
Unschedulable: false,
}
if err := app.StoreSnapshot(SnapshotRecord{
Node: "titan-15",
CollectedAt: time.Date(2026, 4, 24, 6, 0, 0, 0, time.UTC),
Snapshot: sentinel.Snapshot{Hostname: "titan-15"},
}); err != nil {
t.Fatalf("StoreSnapshot: %v", err)
}
if patchBody == nil {
t.Fatal("expected desired metadata patch")
}
metadata := patchBody["metadata"].(map[string]any)
labels := metadata["labels"].(map[string]any)
if labels["hardware"] != "rpi5" || labels["maintenance.bstein.dev/old"] != nil {
t.Fatalf("unexpected label patch: %#v", labels)
}
annotations := metadata["annotations"].(map[string]any)
if annotations["maintenance.bstein.dev/mode"] != "recovery" {
t.Fatalf("unexpected annotation patch: %#v", annotations)
}
spec := patchBody["spec"].(map[string]any)
if spec["unschedulable"] != false {
t.Fatalf("unexpected spec patch: %#v", spec)
}
taints := spec["taints"].([]any)
if len(taints) != 2 {
t.Fatalf("unexpected taint payload: %#v", taints)
}
entries := map[string]map[string]any{}
for _, raw := range taints {
entry := raw.(map[string]any)
key := entry["key"].(string)
entries[key] = entry
}
if entries["dedicated"]["value"] != "recovery" || entries["dedicated"]["effect"] != "NoSchedule" {
t.Fatalf("missing desired taint replacement: %#v", entries)
}
if entries["node.kubernetes.io/unreachable"]["effect"] != "NoExecute" {
t.Fatalf("system taint should be preserved: %#v", entries)
}
}
func TestDesiredNodeMetadataHelpers(t *testing.T) {
app := newTestApp(t)
if _, ok := app.desiredMetadataForNode("missing"); ok {
t.Fatal("expected no desired metadata for missing node")
}
if err := app.syncDesiredNodeMetadata(SnapshotRecord{Node: "missing"}); err != nil {
t.Fatalf("syncDesiredNodeMetadata missing should noop: %v", err)
}
if _, ok := liveClusterNode(""); ok {
t.Fatal("empty liveClusterNode lookup should fail")
}
if !isRestorableLabel("maintenance.bstein.dev/role") || isRestorableLabel("kubernetes.io/arch") {
t.Fatal("unexpected label restoration filter")
}
if !isRestorableAnnotation("maintenance.bstein.dev/state") || isRestorableAnnotation("volumes.kubernetes.io/foo") {
t.Fatal("unexpected annotation restoration filter")
}
if !isRestorableTaint("dedicated=recovery:NoSchedule") || isRestorableTaint("node.kubernetes.io/not-ready:NoExecute") {
t.Fatal("unexpected taint restoration filter")
}
key, value, effect := splitTaint("dedicated=recovery:NoSchedule")
if key != "dedicated" || value != "recovery" || effect != "NoSchedule" {
t.Fatalf("splitTaint mismatch: %q %q %q", key, value, effect)
}
if key, value, effect := splitTaint("just-a-key"); key != "just-a-key" || value != "" || effect != "" {
t.Fatalf("splitTaint key-only mismatch: %q %q %q", key, value, effect)
}
labels := filteredRestorableLabels(map[string]string{"hardware": "rpi5", "kubernetes.io/arch": "arm64"})
if !reflect.DeepEqual(labels, map[string]string{"hardware": "rpi5"}) {
t.Fatalf("filteredRestorableLabels = %#v", labels)
}
annotations := filteredRestorableAnnotations(map[string]string{"maintenance.bstein.dev/state": "ok", "volumes.kubernetes.io/foo": "bar"})
if !reflect.DeepEqual(annotations, map[string]string{"maintenance.bstein.dev/state": "ok"}) {
t.Fatalf("filteredRestorableAnnotations = %#v", annotations)
}
patch := metadataStringPatch(
map[string]string{"hardware": "rpi4", "maintenance.bstein.dev/old": "1"},
map[string]string{"hardware": "rpi5"},
isRestorableLabel,
)
if patch["hardware"] != "rpi5" || patch["maintenance.bstein.dev/old"] != nil {
t.Fatalf("metadataStringPatch = %#v", patch)
}
mergedTaints := mergeLiveAndDesiredTaints(
[]string{"node.kubernetes.io/unreachable:NoExecute", "dedicated=old:NoSchedule"},
[]string{"dedicated=new:NoSchedule", "dedicated=new:NoSchedule"},
)
if !reflect.DeepEqual(mergedTaints, []string{"dedicated=new:NoSchedule", "node.kubernetes.io/unreachable:NoExecute"}) {
t.Fatalf("mergeLiveAndDesiredTaints = %#v", mergedTaints)
}
payload := taintPatchPayload([]string{"dedicated=new:NoSchedule"})
if len(payload) != 1 || payload[0]["key"] != "dedicated" || payload[0]["value"] != "new" || payload[0]["effect"] != "NoSchedule" {
t.Fatalf("taintPatchPayload = %#v", payload)
}
original := DesiredNodeMetadata{Labels: map[string]string{"hardware": "rpi5"}, Taints: []string{"dedicated=new:NoSchedule"}}
cloned := cloneDesiredNodeMetadata(original)
cloned.Labels["hardware"] = "mutated"
cloned.Taints[0] = "changed"
if original.Labels["hardware"] != "rpi5" || original.Taints[0] != "dedicated=new:NoSchedule" {
t.Fatalf("cloneDesiredNodeMetadata should deep-copy slices/maps: %#v %#v", original, cloned)
}
if err := patchDesiredNodeMetadata(
clusterNode{Name: "titan-15", Labels: map[string]string{"hardware": "rpi5"}, Taints: []string{"dedicated=new:NoSchedule"}},
DesiredNodeMetadata{Node: "titan-15", Labels: map[string]string{"hardware": "rpi5"}, Taints: []string{"dedicated=new:NoSchedule"}},
); err != nil {
t.Fatalf("patchDesiredNodeMetadata should noop when already in sync: %v", err)
}
if event := desiredNodeMetadataSyncEvent("titan-15", os.ErrPermission); event.Kind != "sentinel.node-metadata" || event.Details["node"] != "titan-15" {
t.Fatalf("desiredNodeMetadataSyncEvent = %#v", event)
}
}