2026-04-11 00:17:10 -03:00
package service
import (
2026-04-21 05:54:36 -03:00
"encoding/json"
"net/http"
"net/http/httptest"
"os"
2026-04-11 00:17:10 -03:00
"path/filepath"
2026-04-21 05:54:36 -03:00
"strings"
2026-04-11 00:17:10 -03:00
"testing"
"time"
)
func TestRemoteWorkflowErrorBranches ( t * testing . T ) {
kube := fakeKubeServer ( t )
installKubeFactory ( t , kube )
app := newTestApp ( t )
app . settings . Namespace = "maintenance"
app . settings . RunnerImageARM64 = ""
if _ , err := app . RefreshDevices ( "titan-22" ) ; err == nil {
t . Fatal ( "expected RefreshDevices to fail without runner image" )
}
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError {
t . Fatalf ( "expected build job error, got %#v" , got )
}
job = app . newJob ( "flash" , "titan-15" , "titan-22" , "/dev/sdz" )
2026-04-24 12:09:53 -03:00
if _ , err := app . flashArtifact ( job . ID , "registry.example/metis/titan-15" ) ; err == nil {
2026-04-11 00:17:10 -03:00
t . Fatal ( "expected flashArtifact error" )
}
app . setJob ( job . ID , func ( j * Job ) {
j . Status = JobRunning
j . Stage = "build"
j . StageStartedAt = time . Now ( ) . Add ( - 30 * time . Second )
} )
app . heartbeatRemoteJob ( job . ID )
if got := app . job ( job . ID ) ; got == nil || got . ProgressPct == 0 {
t . Fatalf ( "expected heartbeat progress, got %#v" , got )
}
}
func TestRemoteWorkflowMissingRunnerImageBranch ( t * testing . T ) {
kube := fakeKubeServer ( t )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := newTestApp ( t )
app . settings . Namespace = "maintenance"
app . settings . RunnerImageARM64 = ""
app . settings . HarborAPIBase = harbor . URL + "/api/v2.0"
app . settings . HarborUsername = "admin"
app . settings . HarborPassword = "pw"
app . settings . HarborProject = "metis"
app . settings . HarborRegistry = "registry.example"
app . settings . ArtifactStatePath = filepath . Join ( t . TempDir ( ) , "artifacts.json" )
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError {
t . Fatalf ( "expected build job error, got %#v" , got )
}
}
2026-04-21 05:54:36 -03:00
func TestRefreshDevicesDefaultSortAndErrorBranches ( t * testing . T ) {
t . Run ( "default host and deterministic sorting" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions {
deviceMessage : ` { "devices":[ { "name":"sdc","path":"/dev/sdc","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":64000000000}, { "name":"sdb","path":"/dev/sdb","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000}, { "name":"sda","path":"/dev/sda","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000}]} ` ,
} )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , nil )
devices , err := app . RefreshDevices ( "" )
if err != nil {
t . Fatalf ( "RefreshDevices: %v" , err )
}
if len ( devices ) != 3 || devices [ 0 ] . Path != "/dev/sda" || devices [ 1 ] . Path != "/dev/sdb" {
t . Fatalf ( "unexpected sorted devices: %#v" , devices )
}
} )
t . Run ( "remote pod failure records device error" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { devicePhase : "Failed" , deviceMessage : "device scan failed" } )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , nil )
if _ , err := app . RefreshDevices ( "titan-22" ) ; err == nil || ! strings . Contains ( err . Error ( ) , "device scan failed" ) {
t . Fatalf ( "expected device scan failure, got %v" , err )
}
if _ , err := app . cachedDevices ( "titan-22" ) ; err == nil || ! strings . Contains ( err . Error ( ) , "device scan failed" ) {
t . Fatalf ( "expected cached device error, got %v" , err )
}
} )
t . Run ( "malformed device payload records decode error" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { deviceMessage : "{" } )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , nil )
if _ , err := app . RefreshDevices ( "titan-22" ) ; err == nil || ! strings . Contains ( err . Error ( ) , "decode remote devices" ) {
t . Fatalf ( "expected device decode failure, got %v" , err )
}
} )
}
func TestRunBuildAdditionalRemoteBranches ( t * testing . T ) {
t . Run ( "missing inventory node" , func ( t * testing . T ) {
app := remoteTestApp ( t , nil )
job := app . newJob ( "build" , "missing-node" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError {
t . Fatalf ( "expected missing-node job error, got %#v" , got )
}
} )
t . Run ( "no eligible builder" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { nodes : [ ] map [ string ] any { } } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError || ! strings . Contains ( got . Error , "no build host" ) {
t . Fatalf ( "expected builder selection error, got %#v" , got )
}
} )
t . Run ( "build pod failure" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { buildPhase : "Failed" , buildMessage : "build crashed" } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError || ! strings . Contains ( got . Error , "build crashed" ) {
t . Fatalf ( "expected build pod error, got %#v" , got )
}
} )
t . Run ( "build output decode failure" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { buildMessage : "{" } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError || ! strings . Contains ( got . Error , "decode remote build output" ) {
t . Fatalf ( "expected build decode error, got %#v" , got )
}
} )
t . Run ( "artifact persistence failure" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
app . settings . ArtifactStatePath = t . TempDir ( )
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError {
t . Fatalf ( "expected artifact persist error, got %#v" , got )
}
} )
t . Run ( "prune warning still completes build" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { } )
harbor := harborPruneFailureServer ( t )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "build" , "titan-15" , "" , "" )
app . runBuild ( job , false )
got := app . job ( job . ID )
if got == nil || got . Status != JobDone {
t . Fatalf ( "expected build to finish despite prune warning, got %#v" , got )
}
if events := app . recentEvents ( 5 ) ; len ( events ) == 0 || events [ 0 ] . Kind != "image.build" {
t . Fatalf ( "expected image build event, got %#v" , events )
}
} )
t . Run ( "flash preflight rejects stale device" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "replace" , "titan-15" , "titan-22" , "/dev/sda" )
app . runBuild ( job , true )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError || ! strings . Contains ( got . Error , "not a current flash candidate" ) {
t . Fatalf ( "expected stale device error, got %#v" , got )
}
} )
t . Run ( "flash pod failure" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { flashPhase : "Failed" , flashMessage : "flash failed" } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "replace" , "titan-15" , "titan-22" , "/dev/sdz" )
app . runBuild ( job , true )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobError || ! strings . Contains ( got . Error , "flash failed" ) {
t . Fatalf ( "expected flash pod error, got %#v" , got )
}
} )
t . Run ( "host tmp flash completion message" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "replace" , "titan-15" , "titan-22" , hostTmpDevicePath )
app . runBuild ( job , true )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobDone || ! strings . Contains ( got . Message , "host /tmp" ) {
t . Fatalf ( "expected hosttmp completion, got %#v" , got )
}
} )
t . Run ( "node delete warning still flashes" , func ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { deleteNodeStatus : http . StatusInternalServerError } )
harbor := fakeHarborServer ( t , true )
installKubeFactory ( t , kube )
tmp := t . TempDir ( )
kubectl := filepath . Join ( tmp , "kubectl" )
if err := os . WriteFile ( kubectl , [ ] byte ( "#!/usr/bin/env sh\nprintf 'delete denied' >&2\nexit 1\n" ) , 0 o755 ) ; err != nil {
t . Fatal ( err )
}
t . Setenv ( "PATH" , tmp + string ( os . PathListSeparator ) + os . Getenv ( "PATH" ) )
app := remoteTestApp ( t , harbor )
job := app . newJob ( "replace" , "titan-15" , "titan-22" , "/dev/sdz" )
app . runBuild ( job , true )
if got := app . job ( job . ID ) ; got == nil || got . Status != JobDone {
t . Fatalf ( "expected flash success despite delete warning, got %#v" , got )
}
found := false
for _ , event := range app . recentEvents ( 10 ) {
if event . Kind == "node.delete.warning" {
found = true
}
}
if ! found {
t . Fatalf ( "expected node.delete.warning event, got %#v" , app . recentEvents ( 10 ) )
}
} )
}
func TestFlashArtifactAndHeartbeatBranches ( t * testing . T ) {
kube := remoteWorkflowKubeServer ( t , remoteKubeOptions { } )
installKubeFactory ( t , kube )
app := remoteTestApp ( t , nil )
job := app . newJob ( "replace" , "titan-15" , "missing-host" , "/dev/sdz" )
2026-04-24 12:09:53 -03:00
if _ , err := app . flashArtifact ( job . ID , "registry.example/metis/titan-15" ) ; err == nil || ! strings . Contains ( err . Error ( ) , "not a current cluster node" ) {
2026-04-21 05:54:36 -03:00
t . Fatalf ( "expected missing host flashArtifact error, got %v" , err )
}
app . heartbeatRemoteJob ( "" )
app . heartbeatRemoteJob ( job . ID )
if got := app . job ( job . ID ) ; got == nil || got . ProgressPct != 0 {
t . Fatalf ( "queued heartbeat should be a no-op, got %#v" , got )
}
app . setJob ( job . ID , func ( j * Job ) {
j . Status = JobRunning
j . Stage = "preflight"
j . Device = "/dev/sdz"
j . Host = "titan-22"
j . ProgressPct = 10
} )
app . heartbeatRemoteJob ( job . ID )
if got := app . job ( job . ID ) ; got == nil || got . ProgressPct != 80 || ! strings . Contains ( got . Message , "Validating" ) {
t . Fatalf ( "preflight heartbeat = %#v" , got )
}
app . setJob ( job . ID , func ( j * Job ) {
2026-04-24 12:09:53 -03:00
j . Stage = "flash_write"
2026-04-21 05:54:36 -03:00
j . ProgressPct = 80
j . Written = 120
j . Total = 100
} )
app . heartbeatRemoteJob ( job . ID )
if got := app . job ( job . ID ) ; got == nil || got . ProgressPct != 98 || ! strings . Contains ( got . Message , "Writing" ) {
t . Fatalf ( "flash byte heartbeat = %#v" , got )
}
app . setJob ( job . ID , func ( j * Job ) {
j . Stage = "flash"
j . StageStartedAt = time . Time { }
j . StartedAt = time . Now ( ) . Add ( - 20 * time . Second )
j . ProgressPct = 80
j . Written = 0
j . Total = 0
} )
app . heartbeatRemoteJob ( job . ID )
if got := app . job ( job . ID ) ; got == nil || got . ProgressPct <= 80 || ! strings . Contains ( got . Message , "Writing" ) {
t . Fatalf ( "flash elapsed heartbeat = %#v" , got )
}
}
type remoteKubeOptions struct {
nodes [ ] map [ string ] any
devicePhase string
deviceMessage string
buildPhase string
buildMessage string
flashPhase string
flashMessage string
deleteNodeStatus int
}
func remoteTestApp ( t * testing . T , harbor * httptest . Server ) * App {
t . Helper ( )
app := newTestApp ( t )
app . settings . Namespace = "maintenance"
app . settings . RunnerImageARM64 = "runner:arm64"
app . settings . HarborProject = "metis"
app . settings . HarborRegistry = "registry.example"
app . settings . ArtifactStatePath = filepath . Join ( t . TempDir ( ) , "artifacts.json" )
if harbor != nil {
app . settings . HarborAPIBase = harbor . URL + "/api/v2.0"
app . settings . HarborUsername = "admin"
app . settings . HarborPassword = "pw"
}
return app
}
func remoteWorkflowKubeServer ( t * testing . T , opts remoteKubeOptions ) * httptest . Server {
t . Helper ( )
devicePhase := defaultString ( opts . devicePhase , "Succeeded" )
2026-04-23 23:36:42 -03:00
deviceMessage := defaultString ( opts . deviceMessage , ` { "devices":[ { "name":"sdz","path":"/dev/sdz","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000}, { "name":"tmp","path":"hosttmp:///var/tmp/metis-flash-test","model":"Host scratch","transport":"test","type":"file","note":"Test-only host write target under /var/tmp/metis-flash-test","size_bytes":1}]} ` )
2026-04-21 05:54:36 -03:00
buildPhase := defaultString ( opts . buildPhase , "Succeeded" )
buildMessage := defaultString ( opts . buildMessage , ` { "local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"} ` )
flashPhase := defaultString ( opts . flashPhase , "Succeeded" )
2026-04-24 12:09:53 -03:00
flashMessage := defaultString ( opts . flashMessage , ` { "dest_path":"/var/tmp/metis-flash-test/titan-15.img","verified":true,"verification_kind":"image-file","verification_summary":"Verified image layout at /var/tmp/metis-flash-test/titan-15.img; boot and writable partitions are present."} ` )
2026-04-21 05:54:36 -03:00
nodes := opts . nodes
if nodes == nil {
nodes = [ ] map [ string ] any {
{
"metadata" : map [ string ] any {
"name" : "titan-22" ,
"labels" : map [ string ] string {
"kubernetes.io/arch" : "arm64" ,
"hardware" : "rpi5" ,
"node-role.kubernetes.io/worker" : "true" ,
} ,
} ,
"spec" : map [ string ] any { "unschedulable" : false } ,
} ,
}
}
deleteNodeStatus := opts . deleteNodeStatus
if deleteNodeStatus == 0 {
deleteNodeStatus = http . StatusOK
}
return httptest . NewServer ( http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
switch {
case r . Method == http . MethodGet && r . URL . Path == "/api/v1/nodes" :
_ = json . NewEncoder ( w ) . Encode ( map [ string ] any { "items" : nodes } )
case r . Method == http . MethodGet && r . URL . Path == "/api/v1/namespaces/maintenance/pods" :
_ = json . NewEncoder ( w ) . Encode ( map [ string ] any { "items" : [ ] any { } } )
case r . Method == http . MethodPost && strings . Contains ( r . URL . Path , "/pods" ) :
w . WriteHeader ( http . StatusCreated )
case r . Method == http . MethodDelete && strings . Contains ( r . URL . Path , "/nodes/" ) :
w . WriteHeader ( deleteNodeStatus )
case r . Method == http . MethodDelete && strings . Contains ( r . URL . Path , "/pods/" ) :
w . WriteHeader ( http . StatusOK )
case r . Method == http . MethodGet && strings . Contains ( r . URL . Path , "/pods/" ) && strings . HasSuffix ( r . URL . Path , "/log" ) :
_ , _ = w . Write ( [ ] byte ( "remote logs" ) )
case r . Method == http . MethodGet && strings . Contains ( r . URL . Path , "/pods/" ) :
podName := filepath . Base ( r . URL . Path )
phase , message := "Succeeded" , "{}"
switch {
case strings . Contains ( podName , "devices" ) :
phase , message = devicePhase , deviceMessage
case strings . Contains ( podName , "build" ) :
phase , message = buildPhase , buildMessage
case strings . Contains ( podName , "flash" ) :
phase , message = flashPhase , flashMessage
}
_ = json . NewEncoder ( w ) . Encode ( map [ string ] any {
"metadata" : map [ string ] any { "name" : podName } ,
"status" : map [ string ] any {
"phase" : phase ,
"reason" : "Completed" ,
"message" : message ,
} ,
} )
default :
http . NotFound ( w , r )
}
} ) )
}
func harborPruneFailureServer ( t * testing . T ) * httptest . Server {
t . Helper ( )
return httptest . NewServer ( http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
switch {
case r . Method == http . MethodGet && strings . HasPrefix ( r . URL . Path , "/api/v2.0/projects" ) :
_ = json . NewEncoder ( w ) . Encode ( [ ] map [ string ] string { { "name" : "metis" } } )
case r . Method == http . MethodGet && strings . Contains ( r . URL . Path , "/artifacts" ) :
http . Error ( w , "artifact list failed" , http . StatusInternalServerError )
default :
http . NotFound ( w , r )
}
} ) )
}
func defaultString ( value , fallback string ) string {
if strings . TrimSpace ( value ) == "" {
return fallback
}
return value
}