package service import ( "encoding/json" "net/http" "net/http/httptest" "os" "path/filepath" "strings" "testing" "time" ) func TestRemoteWorkflowErrorBranches(t *testing.T) { kube := fakeKubeServer(t) installKubeFactory(t, kube) app := newTestApp(t) app.settings.Namespace = "maintenance" app.settings.RunnerImageARM64 = "" if _, err := app.RefreshDevices("titan-22"); err == nil { t.Fatal("expected RefreshDevices to fail without runner image") } job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError { t.Fatalf("expected build job error, got %#v", got) } job = app.newJob("flash", "titan-15", "titan-22", "/dev/sdz") if err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil { t.Fatal("expected flashArtifact error") } app.setJob(job.ID, func(j *Job) { j.Status = JobRunning j.Stage = "build" j.StageStartedAt = time.Now().Add(-30 * time.Second) }) app.heartbeatRemoteJob(job.ID) if got := app.job(job.ID); got == nil || got.ProgressPct == 0 { t.Fatalf("expected heartbeat progress, got %#v", got) } } func TestRemoteWorkflowMissingRunnerImageBranch(t *testing.T) { kube := fakeKubeServer(t) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := newTestApp(t) app.settings.Namespace = "maintenance" app.settings.RunnerImageARM64 = "" app.settings.HarborAPIBase = harbor.URL + "/api/v2.0" app.settings.HarborUsername = "admin" app.settings.HarborPassword = "pw" app.settings.HarborProject = "metis" app.settings.HarborRegistry = "registry.example" app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json") job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError { t.Fatalf("expected build job error, got %#v", got) } } func TestRefreshDevicesDefaultSortAndErrorBranches(t *testing.T) { t.Run("default host and deterministic sorting", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{ deviceMessage: `{"devices":[{"name":"sdc","path":"/dev/sdc","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":64000000000},{"name":"sdb","path":"/dev/sdb","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000},{"name":"sda","path":"/dev/sda","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000}]}`, }) installKubeFactory(t, kube) app := remoteTestApp(t, nil) devices, err := app.RefreshDevices("") if err != nil { t.Fatalf("RefreshDevices: %v", err) } if len(devices) != 3 || devices[0].Path != "/dev/sda" || devices[1].Path != "/dev/sdb" { t.Fatalf("unexpected sorted devices: %#v", devices) } }) t.Run("remote pod failure records device error", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{devicePhase: "Failed", deviceMessage: "device scan failed"}) installKubeFactory(t, kube) app := remoteTestApp(t, nil) if _, err := app.RefreshDevices("titan-22"); err == nil || !strings.Contains(err.Error(), "device scan failed") { t.Fatalf("expected device scan failure, got %v", err) } if _, err := app.cachedDevices("titan-22"); err == nil || !strings.Contains(err.Error(), "device scan failed") { t.Fatalf("expected cached device error, got %v", err) } }) t.Run("malformed device payload records decode error", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{deviceMessage: "{"}) installKubeFactory(t, kube) app := remoteTestApp(t, nil) if _, err := app.RefreshDevices("titan-22"); err == nil || !strings.Contains(err.Error(), "decode remote devices") { t.Fatalf("expected device decode failure, got %v", err) } }) } func TestRunBuildAdditionalRemoteBranches(t *testing.T) { t.Run("missing inventory node", func(t *testing.T) { app := remoteTestApp(t, nil) job := app.newJob("build", "missing-node", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError { t.Fatalf("expected missing-node job error, got %#v", got) } }) t.Run("no eligible builder", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{nodes: []map[string]any{}}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError || !strings.Contains(got.Error, "no build host") { t.Fatalf("expected builder selection error, got %#v", got) } }) t.Run("build pod failure", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{buildPhase: "Failed", buildMessage: "build crashed"}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError || !strings.Contains(got.Error, "build crashed") { t.Fatalf("expected build pod error, got %#v", got) } }) t.Run("build output decode failure", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{buildMessage: "{"}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError || !strings.Contains(got.Error, "decode remote build output") { t.Fatalf("expected build decode error, got %#v", got) } }) t.Run("artifact persistence failure", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) app.settings.ArtifactStatePath = t.TempDir() job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) if got := app.job(job.ID); got == nil || got.Status != JobError { t.Fatalf("expected artifact persist error, got %#v", got) } }) t.Run("prune warning still completes build", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{}) harbor := harborPruneFailureServer(t) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("build", "titan-15", "", "") app.runBuild(job, false) got := app.job(job.ID) if got == nil || got.Status != JobDone { t.Fatalf("expected build to finish despite prune warning, got %#v", got) } if events := app.recentEvents(5); len(events) == 0 || events[0].Kind != "image.build" { t.Fatalf("expected image build event, got %#v", events) } }) t.Run("flash preflight rejects stale device", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("replace", "titan-15", "titan-22", "/dev/sda") app.runBuild(job, true) if got := app.job(job.ID); got == nil || got.Status != JobError || !strings.Contains(got.Error, "not a current flash candidate") { t.Fatalf("expected stale device error, got %#v", got) } }) t.Run("flash pod failure", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{flashPhase: "Failed", flashMessage: "flash failed"}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("replace", "titan-15", "titan-22", "/dev/sdz") app.runBuild(job, true) if got := app.job(job.ID); got == nil || got.Status != JobError || !strings.Contains(got.Error, "flash failed") { t.Fatalf("expected flash pod error, got %#v", got) } }) t.Run("host tmp flash completion message", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) app := remoteTestApp(t, harbor) job := app.newJob("replace", "titan-15", "titan-22", hostTmpDevicePath) app.runBuild(job, true) if got := app.job(job.ID); got == nil || got.Status != JobDone || !strings.Contains(got.Message, "host /tmp") { t.Fatalf("expected hosttmp completion, got %#v", got) } }) t.Run("node delete warning still flashes", func(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{deleteNodeStatus: http.StatusInternalServerError}) harbor := fakeHarborServer(t, true) installKubeFactory(t, kube) tmp := t.TempDir() kubectl := filepath.Join(tmp, "kubectl") if err := os.WriteFile(kubectl, []byte("#!/usr/bin/env sh\nprintf 'delete denied' >&2\nexit 1\n"), 0o755); err != nil { t.Fatal(err) } t.Setenv("PATH", tmp+string(os.PathListSeparator)+os.Getenv("PATH")) app := remoteTestApp(t, harbor) job := app.newJob("replace", "titan-15", "titan-22", "/dev/sdz") app.runBuild(job, true) if got := app.job(job.ID); got == nil || got.Status != JobDone { t.Fatalf("expected flash success despite delete warning, got %#v", got) } found := false for _, event := range app.recentEvents(10) { if event.Kind == "node.delete.warning" { found = true } } if !found { t.Fatalf("expected node.delete.warning event, got %#v", app.recentEvents(10)) } }) } func TestFlashArtifactAndHeartbeatBranches(t *testing.T) { kube := remoteWorkflowKubeServer(t, remoteKubeOptions{}) installKubeFactory(t, kube) app := remoteTestApp(t, nil) job := app.newJob("replace", "titan-15", "missing-host", "/dev/sdz") if err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil || !strings.Contains(err.Error(), "not a current cluster node") { t.Fatalf("expected missing host flashArtifact error, got %v", err) } app.heartbeatRemoteJob("") app.heartbeatRemoteJob(job.ID) if got := app.job(job.ID); got == nil || got.ProgressPct != 0 { t.Fatalf("queued heartbeat should be a no-op, got %#v", got) } app.setJob(job.ID, func(j *Job) { j.Status = JobRunning j.Stage = "preflight" j.Device = "/dev/sdz" j.Host = "titan-22" j.ProgressPct = 10 }) app.heartbeatRemoteJob(job.ID) if got := app.job(job.ID); got == nil || got.ProgressPct != 80 || !strings.Contains(got.Message, "Validating") { t.Fatalf("preflight heartbeat = %#v", got) } app.setJob(job.ID, func(j *Job) { j.Stage = "flash" j.ProgressPct = 80 j.Written = 120 j.Total = 100 }) app.heartbeatRemoteJob(job.ID) if got := app.job(job.ID); got == nil || got.ProgressPct != 98 || !strings.Contains(got.Message, "Writing") { t.Fatalf("flash byte heartbeat = %#v", got) } app.setJob(job.ID, func(j *Job) { j.Stage = "flash" j.StageStartedAt = time.Time{} j.StartedAt = time.Now().Add(-20 * time.Second) j.ProgressPct = 80 j.Written = 0 j.Total = 0 }) app.heartbeatRemoteJob(job.ID) if got := app.job(job.ID); got == nil || got.ProgressPct <= 80 || !strings.Contains(got.Message, "Writing") { t.Fatalf("flash elapsed heartbeat = %#v", got) } } type remoteKubeOptions struct { nodes []map[string]any devicePhase string deviceMessage string buildPhase string buildMessage string flashPhase string flashMessage string deleteNodeStatus int } func remoteTestApp(t *testing.T, harbor *httptest.Server) *App { t.Helper() app := newTestApp(t) app.settings.Namespace = "maintenance" app.settings.RunnerImageARM64 = "runner:arm64" app.settings.HarborProject = "metis" app.settings.HarborRegistry = "registry.example" app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json") if harbor != nil { app.settings.HarborAPIBase = harbor.URL + "/api/v2.0" app.settings.HarborUsername = "admin" app.settings.HarborPassword = "pw" } return app } func remoteWorkflowKubeServer(t *testing.T, opts remoteKubeOptions) *httptest.Server { t.Helper() devicePhase := defaultString(opts.devicePhase, "Succeeded") deviceMessage := defaultString(opts.deviceMessage, `{"devices":[{"name":"sdz","path":"/dev/sdz","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000},{"name":"tmp","path":"hosttmp:///tmp","model":"Host /tmp","transport":"test","type":"file","note":"Test-only host write target under /tmp","size_bytes":1}]}`) buildPhase := defaultString(opts.buildPhase, "Succeeded") buildMessage := defaultString(opts.buildMessage, `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}`) flashPhase := defaultString(opts.flashPhase, "Succeeded") flashMessage := defaultString(opts.flashMessage, `{"dest_path":"/tmp/metis-flash-test/titan-15.img"}`) nodes := opts.nodes if nodes == nil { nodes = []map[string]any{ { "metadata": map[string]any{ "name": "titan-22", "labels": map[string]string{ "kubernetes.io/arch": "arm64", "hardware": "rpi5", "node-role.kubernetes.io/worker": "true", }, }, "spec": map[string]any{"unschedulable": false}, }, } } deleteNodeStatus := opts.deleteNodeStatus if deleteNodeStatus == 0 { deleteNodeStatus = http.StatusOK } return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch { case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes": _ = json.NewEncoder(w).Encode(map[string]any{"items": nodes}) case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/maintenance/pods": _ = json.NewEncoder(w).Encode(map[string]any{"items": []any{}}) case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"): w.WriteHeader(http.StatusCreated) case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/nodes/"): w.WriteHeader(deleteNodeStatus) case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/pods/"): w.WriteHeader(http.StatusOK) case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/") && strings.HasSuffix(r.URL.Path, "/log"): _, _ = w.Write([]byte("remote logs")) case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"): podName := filepath.Base(r.URL.Path) phase, message := "Succeeded", "{}" switch { case strings.Contains(podName, "devices"): phase, message = devicePhase, deviceMessage case strings.Contains(podName, "build"): phase, message = buildPhase, buildMessage case strings.Contains(podName, "flash"): phase, message = flashPhase, flashMessage } _ = json.NewEncoder(w).Encode(map[string]any{ "metadata": map[string]any{"name": podName}, "status": map[string]any{ "phase": phase, "reason": "Completed", "message": message, }, }) default: http.NotFound(w, r) } })) } func harborPruneFailureServer(t *testing.T) *httptest.Server { t.Helper() return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch { case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"): _ = json.NewEncoder(w).Encode([]map[string]string{{"name": "metis"}}) case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"): http.Error(w, "artifact list failed", http.StatusInternalServerError) default: http.NotFound(w, r) } })) } func defaultString(value, fallback string) string { if strings.TrimSpace(value) == "" { return fallback } return value }