metis(ui): split build and flash workflows
This commit is contained in:
parent
bf9c514fd0
commit
56d31fa613
@ -163,76 +163,6 @@ func remoteBuildCmd(args []string) {
|
|||||||
writeStructuredResult(summary)
|
writeStructuredResult(summary)
|
||||||
}
|
}
|
||||||
|
|
||||||
func remoteFlashCmd(args []string) {
|
|
||||||
fs := flag.NewFlagSet("remote-flash", flag.ExitOnError)
|
|
||||||
node := fs.String("node", "", "target node")
|
|
||||||
device := fs.String("device", "", "target device path or test sink")
|
|
||||||
artifactRef := fs.String("artifact-ref", "", "harbor artifact ref without tag")
|
|
||||||
workDir := fs.String("work-dir", filepath.Join(os.TempDir(), "metis-flash"), "working directory")
|
|
||||||
harborRegistry := fs.String("harbor-registry", getenvOr("METIS_HARBOR_REGISTRY", "registry.bstein.dev"), "harbor registry host")
|
|
||||||
harborUsername := fs.String("harbor-username", getenvOr("METIS_HARBOR_USERNAME", ""), "harbor username")
|
|
||||||
harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password")
|
|
||||||
hostTmpDir := fs.String("host-tmp-dir", "/host-tmp", "mounted host tmp dir for test writes")
|
|
||||||
fs.Parse(args)
|
|
||||||
if *node == "" || *device == "" || *artifactRef == "" {
|
|
||||||
fatalf("--node, --device, and --artifact-ref are required")
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.MkdirAll(*workDir, 0o755); err != nil {
|
|
||||||
fatalf("mkdir workdir: %v", err)
|
|
||||||
}
|
|
||||||
emitStageProgress("flash", 84, fmt.Sprintf("Pulling the latest Harbor artifact for %s", *node))
|
|
||||||
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
|
|
||||||
fatalf("oras login: %v", err)
|
|
||||||
}
|
|
||||||
if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil {
|
|
||||||
fatalf("oras pull: %v", err)
|
|
||||||
}
|
|
||||||
emitStageProgress("flash", 88, fmt.Sprintf("Preparing the downloaded image for %s", *node))
|
|
||||||
imagePath, compressed, err := resolvePulledArtifact(*workDir)
|
|
||||||
if err != nil {
|
|
||||||
fatalf("resolve artifact: %v", err)
|
|
||||||
}
|
|
||||||
rawImage := imagePath
|
|
||||||
if compressed {
|
|
||||||
emitStageProgress("flash", 90, fmt.Sprintf("Decompressing the image for %s before writing", *node))
|
|
||||||
rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
|
|
||||||
cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage))
|
|
||||||
if out, err := cmd.CombinedOutput(); err != nil {
|
|
||||||
fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
destPath := *device
|
|
||||||
if strings.HasPrefix(destPath, "hosttmp://") {
|
|
||||||
if err := os.MkdirAll(*hostTmpDir, 0o755); err != nil {
|
|
||||||
fatalf("mkdir host tmp dir: %v", err)
|
|
||||||
}
|
|
||||||
destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node))
|
|
||||||
}
|
|
||||||
emitStageProgress("flash", 92, fmt.Sprintf("Writing the latest image for %s to %s", *node, destPath))
|
|
||||||
writeEmitter := newProgressEmitter("flash", 92, 98, fmt.Sprintf("Writing the latest image for %s", *node), true)
|
|
||||||
if err := writer.WriteImageWithProgress(context.Background(), rawImage, destPath, writeEmitter); err != nil {
|
|
||||||
fatalf("write image: %v", err)
|
|
||||||
}
|
|
||||||
emitStageProgress("flash", 99, fmt.Sprintf("Flushing the finished image for %s", *node))
|
|
||||||
_ = exec.Command("sync").Run()
|
|
||||||
if strings.HasPrefix(destPath, "/dev/") {
|
|
||||||
_ = exec.Command("blockdev", "--flushbufs", destPath).Run()
|
|
||||||
}
|
|
||||||
|
|
||||||
info, err := os.Stat(destPath)
|
|
||||||
if err != nil {
|
|
||||||
fatalf("stat destination: %v", err)
|
|
||||||
}
|
|
||||||
writeStructuredResult(map[string]any{
|
|
||||||
"node": *node,
|
|
||||||
"device": *device,
|
|
||||||
"dest_path": destPath,
|
|
||||||
"size_bytes": info.Size(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func writeStructuredResult(payload any) {
|
func writeStructuredResult(payload any) {
|
||||||
data, err := json.Marshal(payload)
|
data, err := json.Marshal(payload)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
304
cmd/metis/remote_flash.go
Normal file
304
cmd/metis/remote_flash.go
Normal file
@ -0,0 +1,304 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/service"
|
||||||
|
"metis/pkg/writer"
|
||||||
|
)
|
||||||
|
|
||||||
|
type flashPartitionTable struct {
|
||||||
|
PartitionTable struct {
|
||||||
|
Partitions []flashTablePartition `json:"partitions"`
|
||||||
|
} `json:"partitiontable"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type flashTablePartition struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type flashBlockDevicePayload struct {
|
||||||
|
Blockdevices []flashBlockDevice `json:"blockdevices"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type flashBlockDevice struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
FSType string `json:"fstype"`
|
||||||
|
Label string `json:"label"`
|
||||||
|
Children []flashBlockDevice `json:"children"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var requiredBootFiles = []string{"config.txt", "cmdline.txt", "boot.scr"}
|
||||||
|
|
||||||
|
func remoteFlashCmd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("remote-flash", flag.ExitOnError)
|
||||||
|
node := fs.String("node", "", "target node")
|
||||||
|
device := fs.String("device", "", "target device path or test sink")
|
||||||
|
artifactRef := fs.String("artifact-ref", "", "harbor artifact ref without tag")
|
||||||
|
workDir := fs.String("work-dir", filepath.Join(os.TempDir(), "metis-flash"), "working directory")
|
||||||
|
harborRegistry := fs.String("harbor-registry", getenvOr("METIS_HARBOR_REGISTRY", "registry.bstein.dev"), "harbor registry host")
|
||||||
|
harborUsername := fs.String("harbor-username", getenvOr("METIS_HARBOR_USERNAME", ""), "harbor username")
|
||||||
|
harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password")
|
||||||
|
hostTmpDir := fs.String("host-tmp-dir", "/host-tmp", "mounted host tmp dir for test writes")
|
||||||
|
fs.Parse(args)
|
||||||
|
if *node == "" || *device == "" || *artifactRef == "" {
|
||||||
|
fatalf("--node, --device, and --artifact-ref are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(*workDir, 0o755); err != nil {
|
||||||
|
fatalf("mkdir workdir: %v", err)
|
||||||
|
}
|
||||||
|
emitStageProgress("flash_pull", 84, fmt.Sprintf("Pulling the latest Harbor artifact for %s", *node))
|
||||||
|
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
|
||||||
|
fatalf("oras login: %v", err)
|
||||||
|
}
|
||||||
|
if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil {
|
||||||
|
fatalf("oras pull: %v", err)
|
||||||
|
}
|
||||||
|
emitStageProgress("flash_prepare", 88, fmt.Sprintf("Preparing the downloaded image for %s", *node))
|
||||||
|
imagePath, compressed, err := resolvePulledArtifact(*workDir)
|
||||||
|
if err != nil {
|
||||||
|
fatalf("resolve artifact: %v", err)
|
||||||
|
}
|
||||||
|
rawImage := imagePath
|
||||||
|
if compressed {
|
||||||
|
emitStageProgress("flash_unpack", 90, fmt.Sprintf("Decompressing the image for %s before writing", *node))
|
||||||
|
rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
|
||||||
|
cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage))
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
destPath := *device
|
||||||
|
if strings.HasPrefix(destPath, "hosttmp://") {
|
||||||
|
if err := os.MkdirAll(*hostTmpDir, 0o755); err != nil {
|
||||||
|
fatalf("mkdir host tmp dir: %v", err)
|
||||||
|
}
|
||||||
|
destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node))
|
||||||
|
}
|
||||||
|
info, err := os.Stat(rawImage)
|
||||||
|
if err != nil {
|
||||||
|
fatalf("stat raw image: %v", err)
|
||||||
|
}
|
||||||
|
imageSize := info.Size()
|
||||||
|
emitStageProgress("flash_write", 92, fmt.Sprintf("Writing the latest image for %s to %s", *node, destPath))
|
||||||
|
writeEmitter := newProgressEmitter("flash_write", 92, 98, fmt.Sprintf("Writing the latest image for %s", *node), true)
|
||||||
|
if err := writer.WriteImageWithProgress(context.Background(), rawImage, destPath, writeEmitter); err != nil {
|
||||||
|
fatalf("write image: %v", err)
|
||||||
|
}
|
||||||
|
flushFlashTarget(destPath, *node)
|
||||||
|
result, err := verifyFlashDestination(destPath)
|
||||||
|
if err != nil {
|
||||||
|
fatalf("verify flash output: %v", err)
|
||||||
|
}
|
||||||
|
result.Node = *node
|
||||||
|
result.Device = *device
|
||||||
|
result.DestPath = destPath
|
||||||
|
result.SizeBytes = imageSize
|
||||||
|
writeStructuredResult(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func flushFlashTarget(destPath, node string) {
|
||||||
|
emitStageProgress("flash_flush", 98.5, fmt.Sprintf("Flushing the finished image for %s", node))
|
||||||
|
_ = exec.Command("sync").Run()
|
||||||
|
if !strings.HasPrefix(destPath, "/dev/") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = exec.Command("blockdev", "--flushbufs", destPath).Run()
|
||||||
|
_ = exec.Command("blockdev", "--rereadpt", destPath).Run()
|
||||||
|
_ = exec.Command("partprobe", destPath).Run()
|
||||||
|
_ = exec.Command("udevadm", "settle", "--timeout=10").Run()
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyFlashDestination(destPath string) (service.RemoteFlashResult, error) {
|
||||||
|
emitStageProgress("flash_verify", 99.2, fmt.Sprintf("Verifying the flashed recovery media at %s", destPath))
|
||||||
|
if strings.HasPrefix(destPath, "/dev/") {
|
||||||
|
return verifyBlockDeviceFlash(destPath)
|
||||||
|
}
|
||||||
|
return verifyImageFileFlash(destPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyImageFileFlash(destPath string) (service.RemoteFlashResult, error) {
|
||||||
|
table, err := readFlashPartitionTable(destPath)
|
||||||
|
if err != nil {
|
||||||
|
return service.RemoteFlashResult{}, err
|
||||||
|
}
|
||||||
|
hasBoot := false
|
||||||
|
hasRoot := false
|
||||||
|
for _, part := range table.PartitionTable.Partitions {
|
||||||
|
if isBootPartitionType(part.Type) {
|
||||||
|
hasBoot = true
|
||||||
|
}
|
||||||
|
if isLinuxPartitionType(part.Type) {
|
||||||
|
hasRoot = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !hasBoot || !hasRoot {
|
||||||
|
return service.RemoteFlashResult{}, fmt.Errorf("image %s does not expose the expected boot and writable partitions", destPath)
|
||||||
|
}
|
||||||
|
return service.RemoteFlashResult{
|
||||||
|
Verified: true,
|
||||||
|
VerificationKind: "image-file",
|
||||||
|
VerificationSummary: fmt.Sprintf("Verified image layout at %s; boot and writable partitions are present.", destPath),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyBlockDeviceFlash(destPath string) (service.RemoteFlashResult, error) {
|
||||||
|
deadline := time.Now().Add(15 * time.Second)
|
||||||
|
var lastErr error
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
parts, err := readBlockDevicePartitions(destPath)
|
||||||
|
if err == nil {
|
||||||
|
boot, root, classifyErr := classifyFlashPartitions(parts)
|
||||||
|
if classifyErr == nil {
|
||||||
|
checkedFiles, verifyErr := verifyBootPartitionFiles(boot.Path)
|
||||||
|
if verifyErr == nil {
|
||||||
|
bootLabel := firstNonEmpty(boot.Label, filepath.Base(boot.Path))
|
||||||
|
rootLabel := firstNonEmpty(root.Label, filepath.Base(root.Path))
|
||||||
|
return service.RemoteFlashResult{
|
||||||
|
Verified: true,
|
||||||
|
VerificationKind: "block-device",
|
||||||
|
VerificationSummary: fmt.Sprintf("Verified %s; %s and %s are present and the boot files look correct.", destPath, bootLabel, rootLabel),
|
||||||
|
BootPartition: boot.Path,
|
||||||
|
RootPartition: root.Path,
|
||||||
|
BootLabel: boot.Label,
|
||||||
|
RootLabel: root.Label,
|
||||||
|
BootFSType: boot.FSType,
|
||||||
|
RootFSType: root.FSType,
|
||||||
|
CheckedFiles: checkedFiles,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
lastErr = verifyErr
|
||||||
|
} else {
|
||||||
|
lastErr = classifyErr
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
lastErr = err
|
||||||
|
}
|
||||||
|
_ = exec.Command("blockdev", "--rereadpt", destPath).Run()
|
||||||
|
_ = exec.Command("partprobe", destPath).Run()
|
||||||
|
_ = exec.Command("udevadm", "settle", "--timeout=10").Run()
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
}
|
||||||
|
if lastErr == nil {
|
||||||
|
lastErr = fmt.Errorf("timed out waiting for the flashed partitions on %s", destPath)
|
||||||
|
}
|
||||||
|
return service.RemoteFlashResult{}, lastErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func readFlashPartitionTable(destPath string) (flashPartitionTable, error) {
|
||||||
|
out, err := exec.Command("sfdisk", "-J", destPath).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return flashPartitionTable{}, fmt.Errorf("sfdisk -J %s: %v: %s", destPath, err, strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
var table flashPartitionTable
|
||||||
|
if err := json.Unmarshal(out, &table); err != nil {
|
||||||
|
return flashPartitionTable{}, fmt.Errorf("decode partition table for %s: %w", destPath, err)
|
||||||
|
}
|
||||||
|
return table, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readBlockDevicePartitions(destPath string) ([]flashBlockDevice, error) {
|
||||||
|
out, err := exec.Command("lsblk", "-J", "-o", "PATH,TYPE,FSTYPE,LABEL", destPath).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("lsblk %s: %v: %s", destPath, err, strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
var payload flashBlockDevicePayload
|
||||||
|
if err := json.Unmarshal(out, &payload); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode lsblk output for %s: %w", destPath, err)
|
||||||
|
}
|
||||||
|
for _, device := range payload.Blockdevices {
|
||||||
|
if device.Path == destPath {
|
||||||
|
return device.Children, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(payload.Blockdevices) == 1 {
|
||||||
|
return payload.Blockdevices[0].Children, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("lsblk did not report partitions for %s", destPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func classifyFlashPartitions(parts []flashBlockDevice) (flashBlockDevice, flashBlockDevice, error) {
|
||||||
|
var boot flashBlockDevice
|
||||||
|
var root flashBlockDevice
|
||||||
|
for _, part := range parts {
|
||||||
|
if part.Type != "part" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
normalizedLabel := strings.ToLower(strings.TrimSpace(part.Label))
|
||||||
|
normalizedFS := strings.ToLower(strings.TrimSpace(part.FSType))
|
||||||
|
if boot.Path == "" && (normalizedLabel == "system-boot" || normalizedFS == "vfat" || normalizedFS == "fat" || normalizedFS == "fat32") {
|
||||||
|
boot = part
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if root.Path == "" && (normalizedLabel == "writable" || normalizedFS == "ext4") {
|
||||||
|
root = part
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if boot.Path == "" {
|
||||||
|
return flashBlockDevice{}, flashBlockDevice{}, fmt.Errorf("could not find a boot partition with a FAT filesystem")
|
||||||
|
}
|
||||||
|
if root.Path == "" {
|
||||||
|
return flashBlockDevice{}, flashBlockDevice{}, fmt.Errorf("could not find a writable ext4 partition")
|
||||||
|
}
|
||||||
|
return boot, root, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyBootPartitionFiles(partitionPath string) ([]string, error) {
|
||||||
|
mountDir, err := os.MkdirTemp("", "metis-boot-")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(mountDir)
|
||||||
|
cmd := exec.Command("mount", "-o", "ro", partitionPath, mountDir)
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
return nil, fmt.Errorf("mount %s read-only: %v: %s", partitionPath, err, strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
defer exec.Command("umount", mountDir).Run()
|
||||||
|
|
||||||
|
checked := make([]string, 0, len(requiredBootFiles))
|
||||||
|
for _, name := range requiredBootFiles {
|
||||||
|
if _, err := os.Stat(filepath.Join(mountDir, name)); err != nil {
|
||||||
|
return nil, fmt.Errorf("boot partition %s is missing %s", partitionPath, name)
|
||||||
|
}
|
||||||
|
checked = append(checked, name)
|
||||||
|
}
|
||||||
|
return checked, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func isBootPartitionType(partType string) bool {
|
||||||
|
normalized := strings.ToLower(strings.TrimSpace(partType))
|
||||||
|
switch normalized {
|
||||||
|
case "b", "c", "0b", "0c", "ef", "ef00":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return normalized == "c12a7328-f81f-11d2-ba4b-00a0c93ec93b"
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLinuxPartitionType(partType string) bool {
|
||||||
|
normalized := strings.ToLower(strings.TrimSpace(partType))
|
||||||
|
switch normalized {
|
||||||
|
case "83", "8300":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return normalized == "0fc63daf-8483-4772-8e79-3d69d8477de4"
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstNonEmpty(values ...string) string {
|
||||||
|
for _, value := range values {
|
||||||
|
if trimmed := strings.TrimSpace(value); trimmed != "" {
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
246
cmd/metis/remote_flash_test.go
Normal file
246
cmd/metis/remote_flash_test.go
Normal file
@ -0,0 +1,246 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVerifyFlashDestinationForImageFilesAndBlockDevices(t *testing.T) {
|
||||||
|
t.Run("image file success and helpers", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"sfdisk": `cat <<'JSON'
|
||||||
|
{"partitiontable":{"partitions":[{"type":"ef00"},{"type":"8300"}]}}
|
||||||
|
JSON`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
result, err := verifyFlashDestination(filepath.Join(t.TempDir(), "titan-15.img"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("verifyFlashDestination(image): %v", err)
|
||||||
|
}
|
||||||
|
if !result.Verified || result.VerificationKind != "image-file" {
|
||||||
|
t.Fatalf("unexpected image verification result: %#v", result)
|
||||||
|
}
|
||||||
|
if !isBootPartitionType("ef00") || !isLinuxPartitionType("8300") {
|
||||||
|
t.Fatal("expected helper partition type recognizers to accept GPT aliases")
|
||||||
|
}
|
||||||
|
if got := firstNonEmpty("", " writable ", "ignored"); got != "writable" {
|
||||||
|
t.Fatalf("firstNonEmpty = %q", got)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("image file missing writable partition", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"sfdisk": `cat <<'JSON'
|
||||||
|
{"partitiontable":{"partitions":[{"type":"ef00"}]}}
|
||||||
|
JSON`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
if _, err := verifyImageFileFlash(filepath.Join(t.TempDir(), "broken.img")); err == nil || !strings.Contains(err.Error(), "boot and writable partitions") {
|
||||||
|
t.Fatalf("expected missing partition error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("image file missing boot partition", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"sfdisk": `cat <<'JSON'
|
||||||
|
{"partitiontable":{"partitions":[{"type":"8300"}]}}
|
||||||
|
JSON`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
if _, err := verifyImageFileFlash(filepath.Join(t.TempDir(), "broken-boot.img")); err == nil || !strings.Contains(err.Error(), "boot and writable partitions") {
|
||||||
|
t.Fatalf("expected missing boot partition error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("block device success", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"lsblk": `cat <<'JSON'
|
||||||
|
{"blockdevices":[{"path":"/dev/sdk","children":[{"path":"/dev/sdk1","type":"part","fstype":"vfat","label":"system-boot"},{"path":"/dev/sdk2","type":"part","fstype":"ext4","label":"writable"}]}]}
|
||||||
|
JSON`,
|
||||||
|
"mount": `mount_dir="${4:-}"
|
||||||
|
mkdir -p "${mount_dir}"
|
||||||
|
printf 'ok' > "${mount_dir}/config.txt"
|
||||||
|
printf 'console=ttyAMA0' > "${mount_dir}/cmdline.txt"
|
||||||
|
printf 'boot script' > "${mount_dir}/boot.scr"`,
|
||||||
|
"umount": `exit 0`,
|
||||||
|
"blockdev": `exit 0`,
|
||||||
|
"partprobe": `exit 0`,
|
||||||
|
"udevadm": `exit 0`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
result, err := verifyFlashDestination("/dev/sdk")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("verifyFlashDestination(block): %v", err)
|
||||||
|
}
|
||||||
|
if !result.Verified || result.VerificationKind != "block-device" {
|
||||||
|
t.Fatalf("unexpected block-device verification result: %#v", result)
|
||||||
|
}
|
||||||
|
if result.BootPartition != "/dev/sdk1" || result.RootPartition != "/dev/sdk2" {
|
||||||
|
t.Fatalf("unexpected partition details: %#v", result)
|
||||||
|
}
|
||||||
|
if len(result.CheckedFiles) != 3 {
|
||||||
|
t.Fatalf("expected checked boot files, got %#v", result.CheckedFiles)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("block device missing boot file", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"lsblk": `cat <<'JSON'
|
||||||
|
{"blockdevices":[{"path":"/dev/sdk","children":[{"path":"/dev/sdk1","type":"part","fstype":"vfat","label":"system-boot"},{"path":"/dev/sdk2","type":"part","fstype":"ext4","label":"writable"}]}]}
|
||||||
|
JSON`,
|
||||||
|
"mount": `mount_dir="${4:-}"
|
||||||
|
mkdir -p "${mount_dir}"
|
||||||
|
printf 'ok' > "${mount_dir}/config.txt"
|
||||||
|
printf 'console=ttyAMA0' > "${mount_dir}/cmdline.txt"`,
|
||||||
|
"umount": `exit 0`,
|
||||||
|
"blockdev": `exit 0`,
|
||||||
|
"partprobe": `exit 0`,
|
||||||
|
"udevadm": `exit 0`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
if _, err := verifyBlockDeviceFlash("/dev/sdk"); err == nil || !strings.Contains(err.Error(), "missing boot.scr") {
|
||||||
|
t.Fatalf("expected missing boot file error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlushFlashTargetAndClassifyHelpers(t *testing.T) {
|
||||||
|
logPath := filepath.Join(t.TempDir(), "commands.log")
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"sync": `printf 'sync %s\n' "$*" >> "` + logPath + `"`,
|
||||||
|
"blockdev": `printf 'blockdev %s\n' "$*" >> "` + logPath + `"`,
|
||||||
|
"partprobe": `printf 'partprobe %s\n' "$*" >> "` + logPath + `"`,
|
||||||
|
"udevadm": `printf 'udevadm %s\n' "$*" >> "` + logPath + `"`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
flushFlashTarget("/tmp/test.img", "titan-15")
|
||||||
|
flushFlashTarget("/dev/sdk", "titan-15")
|
||||||
|
|
||||||
|
logged, err := os.ReadFile(logPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read flush log: %v", err)
|
||||||
|
}
|
||||||
|
text := string(logged)
|
||||||
|
if strings.Count(text, "sync ") != 2 {
|
||||||
|
t.Fatalf("expected sync for file and block targets, got %q", text)
|
||||||
|
}
|
||||||
|
for _, want := range []string{"blockdev --flushbufs /dev/sdk", "blockdev --rereadpt /dev/sdk", "partprobe /dev/sdk", "udevadm settle --timeout=10"} {
|
||||||
|
if !strings.Contains(text, want) {
|
||||||
|
t.Fatalf("expected %q in %q", want, text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, _, err := classifyFlashPartitions([]flashBlockDevice{{Path: "/dev/sdk1", Type: "part", FSType: "vfat", Label: "system-boot"}}); err == nil || !strings.Contains(err.Error(), "writable ext4") {
|
||||||
|
t.Fatalf("expected classify failure, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadFlashPartitionTableAndBlockPartitionErrors(t *testing.T) {
|
||||||
|
t.Run("partition table decode error", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{"sfdisk": `printf '{'`})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
if _, err := readFlashPartitionTable(filepath.Join(t.TempDir(), "broken.img")); err == nil || !strings.Contains(err.Error(), "decode partition table") {
|
||||||
|
t.Fatalf("expected partition table decode error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("lsblk decode error", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{"lsblk": `printf '{'`})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
if _, err := readBlockDevicePartitions("/dev/sdk"); err == nil || !strings.Contains(err.Error(), "decode lsblk output") {
|
||||||
|
t.Fatalf("expected lsblk decode error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("lsblk finds explicit device match", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"lsblk": `cat <<'JSON'
|
||||||
|
{"blockdevices":[{"path":"/dev/other","children":[{"path":"/dev/other1","type":"part"}]},{"path":"/dev/sdk","children":[{"path":"/dev/sdk1","type":"part","fstype":"vfat","label":"system-boot"}]}]}
|
||||||
|
JSON`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
parts, err := readBlockDevicePartitions("/dev/sdk")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("readBlockDevicePartitions match: %v", err)
|
||||||
|
}
|
||||||
|
if len(parts) != 1 || parts[0].Path != "/dev/sdk1" {
|
||||||
|
t.Fatalf("unexpected matching partitions: %#v", parts)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("lsblk missing device match", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"lsblk": `cat <<'JSON'
|
||||||
|
{"blockdevices":[{"path":"/dev/other","children":[]},{"path":"/dev/another","children":[]}]}
|
||||||
|
JSON`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
if _, err := readBlockDevicePartitions("/dev/sdk"); err == nil || !strings.Contains(err.Error(), "did not report partitions") {
|
||||||
|
t.Fatalf("expected missing block-device match error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyBootPartitionFilesMountFailure(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"mount": `printf 'mount failed' >&2; exit 9`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
if _, err := verifyBootPartitionFiles("/dev/sdk1"); err == nil || !strings.Contains(err.Error(), "mount /dev/sdk1 read-only") {
|
||||||
|
t.Fatalf("expected mount failure, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRemoteFlashHelperAdditionalFailureBranches(t *testing.T) {
|
||||||
|
t.Run("partition table command failure", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{"sfdisk": `printf 'bad disk' >&2; exit 7`})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
if _, err := readFlashPartitionTable(filepath.Join(t.TempDir(), "broken.img")); err == nil || !strings.Contains(err.Error(), "sfdisk -J") {
|
||||||
|
t.Fatalf("expected sfdisk failure, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("lsblk command failure", func(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{"lsblk": `printf 'lsblk failed' >&2; exit 4`})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
if _, err := readBlockDevicePartitions("/dev/sdk"); err == nil || !strings.Contains(err.Error(), "lsblk /dev/sdk") {
|
||||||
|
t.Fatalf("expected lsblk command failure, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("boot partition classification failure", func(t *testing.T) {
|
||||||
|
if _, _, err := classifyFlashPartitions([]flashBlockDevice{{Path: "/dev/sdk2", Type: "part", FSType: "ext4", Label: "writable"}}); err == nil || !strings.Contains(err.Error(), "boot partition") {
|
||||||
|
t.Fatalf("expected missing boot partition error, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("firstNonEmpty returns empty when every value is blank", func(t *testing.T) {
|
||||||
|
if got := firstNonEmpty("", " "); got != "" {
|
||||||
|
t.Fatalf("expected empty firstNonEmpty result, got %q", got)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadBlockDevicePartitionsFallsBackToSingleReturnedDevice(t *testing.T) {
|
||||||
|
tools := fakeCommandDir(t, map[string]string{
|
||||||
|
"lsblk": `cat <<'JSON'
|
||||||
|
{"blockdevices":[{"path":"/dev/mmcblk0","children":[{"path":"/dev/mmcblk0p1","type":"part","fstype":"vfat","label":"system-boot"}]}]}
|
||||||
|
JSON`,
|
||||||
|
})
|
||||||
|
t.Setenv("PATH", tools+string(os.PathListSeparator)+os.Getenv("PATH"))
|
||||||
|
parts, err := readBlockDevicePartitions("/dev/sdk")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("readBlockDevicePartitions fallback: %v", err)
|
||||||
|
}
|
||||||
|
if len(parts) != 1 || parts[0].Path != "/dev/mmcblk0p1" {
|
||||||
|
t.Fatalf("unexpected fallback partitions: %#v", parts)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -221,6 +221,31 @@ func (a *App) Build(node string) (*Job, error) {
|
|||||||
return job, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flash starts a background flash-only workflow for the latest published node image.
|
||||||
|
func (a *App) Flash(node, host, device string) (*Job, error) {
|
||||||
|
if host == "" {
|
||||||
|
host = a.settings.DefaultFlashHost
|
||||||
|
}
|
||||||
|
if err := a.ensureReplacementReady(node); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if active := a.activeJobForNode(node); active != nil {
|
||||||
|
return nil, &activeNodeJobError{Node: node, Kind: active.Kind, JobID: active.ID}
|
||||||
|
}
|
||||||
|
if summary, ok := a.artifacts()[node]; !ok || strings.TrimSpace(summary.Ref) == "" {
|
||||||
|
return nil, fmt.Errorf("no published image recorded for %s yet; run a build first", node)
|
||||||
|
}
|
||||||
|
if _, err := a.ensureDevice(host, device); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
job, err := a.reserveJob("flash", node, host, device)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
go a.runFlash(job)
|
||||||
|
return job, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Replace starts a background build+flash workflow for a node.
|
// Replace starts a background build+flash workflow for a node.
|
||||||
func (a *App) Replace(node, host, device string) (*Job, error) {
|
func (a *App) Replace(node, host, device string) (*Job, error) {
|
||||||
if host == "" {
|
if host == "" {
|
||||||
|
|||||||
@ -68,7 +68,7 @@ func (a *App) activeJobForNodeLocked(node string) *Job {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
switch job.Kind {
|
switch job.Kind {
|
||||||
case "build", "replace":
|
case "build", "flash", "replace":
|
||||||
default:
|
default:
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@ -407,7 +407,7 @@ func TestServiceClusterAndRemotePodBranches(t *testing.T) {
|
|||||||
"metadata": map[string]any{"name": filepath.Base(r.URL.Path)},
|
"metadata": map[string]any{"name": filepath.Base(r.URL.Path)},
|
||||||
"status": map[string]any{
|
"status": map[string]any{
|
||||||
"phase": "Succeeded",
|
"phase": "Succeeded",
|
||||||
"message": `{"dest_path":"/tmp/out.img"}`,
|
"message": `{"dest_path":"/tmp/out.img","verified":true,"verification_kind":"image-file","verification_summary":"Verified image layout at /tmp/out.img; boot and writable partitions are present."}`,
|
||||||
"reason": "Completed",
|
"reason": "Completed",
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|||||||
182
pkg/service/flash_workflow_test.go
Normal file
182
pkg/service/flash_workflow_test.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFlashOnlyWorkflowRequiresPublishedArtifact(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
|
||||||
|
if _, err := app.Flash("titan-15", "titan-22", "/dev/sdz"); err == nil || !strings.Contains(err.Error(), "run a build first") {
|
||||||
|
t.Fatalf("expected missing artifact error, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlashOnlyWorkflowCompletesAndRecordsVerification(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{flashMessage: `{"dest_path":"/dev/sdz","verified":true,"verification_kind":"block-device","verification_summary":"Verified /dev/sdz; system-boot and writable are present and the boot files look correct.","boot_partition":"/dev/sdz1","root_partition":"/dev/sdz2","boot_label":"system-boot","root_label":"writable","boot_fstype":"vfat","root_fstype":"ext4","checked_files":["config.txt","cmdline.txt","boot.scr"]}`})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
app.artifactStore["titan-15"] = ArtifactSummary{Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}
|
||||||
|
|
||||||
|
job, err := app.Flash("titan-15", "titan-22", "/dev/sdz")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Flash: %v", err)
|
||||||
|
}
|
||||||
|
waitForJobState(t, app, job.ID, JobDone)
|
||||||
|
|
||||||
|
got := app.job(job.ID)
|
||||||
|
if got == nil || got.Kind != "flash" || got.Status != JobDone {
|
||||||
|
t.Fatalf("unexpected flash job: %#v", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got.Message, "Move the card into titan-15") {
|
||||||
|
t.Fatalf("unexpected completion message: %#v", got)
|
||||||
|
}
|
||||||
|
if got.ProgressPct != 100 {
|
||||||
|
t.Fatalf("expected complete flash progress, got %#v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
events := app.recentEvents(5)
|
||||||
|
if len(events) == 0 || events[0].Kind != "image.flash" {
|
||||||
|
t.Fatalf("expected flash event, got %#v", events)
|
||||||
|
}
|
||||||
|
if verified, ok := events[0].Details["verified"].(bool); !ok || !verified {
|
||||||
|
t.Fatalf("expected verified flash details, got %#v", events[0].Details)
|
||||||
|
}
|
||||||
|
if kind, ok := events[0].Details["verification_kind"].(string); !ok || kind != "block-device" {
|
||||||
|
t.Fatalf("expected block-device verification, got %#v", events[0].Details)
|
||||||
|
}
|
||||||
|
if bootPartition, ok := events[0].Details["boot_partition"].(string); !ok || bootPartition != "/dev/sdz1" {
|
||||||
|
t.Fatalf("expected boot partition details, got %#v", events[0].Details)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandleFlashRouteStartsFlashOnlyJob(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
app.artifactStore["titan-15"] = ArtifactSummary{Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}
|
||||||
|
handler := app.Handler()
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/api/jobs/flash", strings.NewReader(`{"node":"titan-15","host":"titan-22","device":"/dev/sdz"}`))
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("X-Auth-Request-User", "brad")
|
||||||
|
req.Header.Set("X-Auth-Request-Groups", "admin")
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(resp, req)
|
||||||
|
if resp.Code != http.StatusAccepted {
|
||||||
|
t.Fatalf("flash route response: %d %s", resp.Code, resp.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var job Job
|
||||||
|
if err := json.Unmarshal(resp.Body.Bytes(), &job); err != nil {
|
||||||
|
t.Fatalf("decode flash job: %v", err)
|
||||||
|
}
|
||||||
|
waitForJobState(t, app, job.ID, JobDone)
|
||||||
|
if got := app.job(job.ID); got == nil || got.Kind != "flash" {
|
||||||
|
t.Fatalf("expected flash job, got %#v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlashAndReplaceDefaultHostBranches(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{})
|
||||||
|
harbor := fakeHarborServer(t, true)
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, harbor)
|
||||||
|
app.artifactStore["titan-15"] = ArtifactSummary{Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}
|
||||||
|
|
||||||
|
flashJob, err := app.Flash("titan-15", "", hostTmpDevicePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Flash default host: %v", err)
|
||||||
|
}
|
||||||
|
waitForJobState(t, app, flashJob.ID, JobDone)
|
||||||
|
if got := app.job(flashJob.ID); got == nil || got.Host != "titan-22" || !strings.Contains(got.Message, "host /tmp") {
|
||||||
|
t.Fatalf("unexpected flash default host job: %#v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
replaceJob, err := app.Replace("titan-15", "", hostTmpDevicePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Replace default host: %v", err)
|
||||||
|
}
|
||||||
|
waitForJobState(t, app, replaceJob.ID, JobDone)
|
||||||
|
if got := app.job(replaceJob.ID); got == nil || got.Host != "titan-22" {
|
||||||
|
t.Fatalf("unexpected replace default host job: %#v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlashRejectsDuplicateActiveNodeJob(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
app.artifactStore["titan-15"] = ArtifactSummary{Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}
|
||||||
|
app.newJob("flash", "titan-15", "titan-22", "/dev/sdz")
|
||||||
|
|
||||||
|
if _, err := app.Flash("titan-15", "titan-22", "/dev/sdz"); err == nil || !strings.Contains(err.Error(), "already has an active") {
|
||||||
|
t.Fatalf("expected flash conflict, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlashArtifactUpdatesProgressWhenVerificationSummaryIsMissing(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{flashMessage: `{"dest_path":"/dev/sdz","size_bytes":4096,"verified":true}`})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
job := app.newJob("flash", "titan-15", "titan-22", "/dev/sdz")
|
||||||
|
|
||||||
|
result, err := app.flashArtifact(job.ID, "registry.example/metis/titan-15")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("flashArtifact: %v", err)
|
||||||
|
}
|
||||||
|
if result.SizeBytes != 4096 {
|
||||||
|
t.Fatalf("unexpected flash result: %#v", result)
|
||||||
|
}
|
||||||
|
got := app.job(job.ID)
|
||||||
|
if got == nil || got.Written != 4096 || got.Total != 4096 || !strings.Contains(got.Message, "Verified the latest image write") {
|
||||||
|
t.Fatalf("unexpected flash artifact state: %#v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlashOnlyWorkflowFailsVerification(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{flashMessage: `{"dest_path":"/dev/sdz","verified":false}`})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
app.artifactStore["titan-15"] = ArtifactSummary{Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}
|
||||||
|
|
||||||
|
job, err := app.Flash("titan-15", "titan-22", "/dev/sdz")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Flash: %v", err)
|
||||||
|
}
|
||||||
|
deadline := time.Now().Add(5 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if got := app.job(job.ID); got != nil && got.Status == JobError {
|
||||||
|
if !strings.Contains(got.Error, "verification did not succeed") {
|
||||||
|
t.Fatalf("unexpected flash verification error: %#v", got)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
}
|
||||||
|
t.Fatalf("flash job %s never failed verification: %#v", job.ID, app.job(job.ID))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildFlashAndReplaceRejectInvalidInputs(t *testing.T) {
|
||||||
|
kube := remoteWorkflowKubeServer(t, remoteKubeOptions{})
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
app := remoteTestApp(t, nil)
|
||||||
|
app.artifactStore["titan-15"] = ArtifactSummary{Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}
|
||||||
|
|
||||||
|
if _, err := app.Build("missing-node"); err == nil || !strings.Contains(err.Error(), "missing-node") {
|
||||||
|
t.Fatalf("expected build inventory error, got %v", err)
|
||||||
|
}
|
||||||
|
if _, err := app.Flash("titan-15", "titan-22", "/dev/sda"); err == nil || !strings.Contains(err.Error(), "not a current flash candidate") {
|
||||||
|
t.Fatalf("expected flash device validation error, got %v", err)
|
||||||
|
}
|
||||||
|
if _, err := app.Replace("titan-15", "titan-22", "/dev/sda"); err == nil || !strings.Contains(err.Error(), "not a current flash candidate") {
|
||||||
|
t.Fatalf("expected replace device validation error, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -74,7 +74,7 @@ func (a *App) RefreshDevices(host string) ([]Device, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) runBuild(job *Job, flash bool) {
|
func (a *App) runBuild(job *Job, flash bool) {
|
||||||
nodeSpec, class, err := a.inventory.FindNode(job.Node)
|
_, class, err := a.inventory.FindNode(job.Node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.failJob(job.ID, err)
|
a.failJob(job.ID, err)
|
||||||
a.metrics.RecordBuild(job.Node, "error")
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
@ -158,18 +158,42 @@ func (a *App) runBuild(job *Job, flash bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
a.setJob(job.ID, func(j *Job) {
|
result, err := a.runFlashSequence(job, artifactRef)
|
||||||
j.Stage = "preflight"
|
if err != nil {
|
||||||
j.StageStartedAt = time.Now().UTC()
|
|
||||||
j.Message = fmt.Sprintf("Validating %s and preparing the latest Harbor artifact for %s", prettyDeviceTarget(j.Device), j.Host)
|
|
||||||
j.ProgressPct = 78
|
|
||||||
j.Artifact = artifactRef + ":latest"
|
|
||||||
})
|
|
||||||
if _, err := a.ensureDevice(job.Host, job.Device); err != nil {
|
|
||||||
a.failJob(job.ID, err)
|
a.failJob(job.ID, err)
|
||||||
a.metrics.RecordFlash(job.Node, job.Host, "error")
|
a.metrics.RecordFlash(job.Node, job.Host, "error")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
a.metrics.RecordFlash(job.Node, job.Host, "ok")
|
||||||
|
a.appendFlashEvent(job, artifactRef, result)
|
||||||
|
a.completeFlashJob(job.ID, artifactRef, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) runFlash(job *Job) {
|
||||||
|
artifactRef := a.artifactRepo(job.Node)
|
||||||
|
result, err := a.runFlashSequence(job, artifactRef)
|
||||||
|
if err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordFlash(job.Node, job.Host, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.metrics.RecordFlash(job.Node, job.Host, "ok")
|
||||||
|
a.appendFlashEvent(job, artifactRef, result)
|
||||||
|
a.completeFlashJob(job.ID, artifactRef, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) runFlashSequence(job *Job, artifactRef string) (RemoteFlashResult, error) {
|
||||||
|
a.setJob(job.ID, func(j *Job) {
|
||||||
|
j.Status = JobRunning
|
||||||
|
j.Stage = "preflight"
|
||||||
|
j.StageStartedAt = time.Now().UTC()
|
||||||
|
j.Message = fmt.Sprintf("Validating %s and preparing the latest Harbor artifact for %s", prettyDeviceTarget(j.Device), j.Host)
|
||||||
|
j.ProgressPct = 80
|
||||||
|
j.Artifact = artifactRef + ":latest"
|
||||||
|
})
|
||||||
|
if _, err := a.ensureDevice(job.Host, job.Device); err != nil {
|
||||||
|
return RemoteFlashResult{}, err
|
||||||
|
}
|
||||||
if !strings.HasPrefix(job.Device, "hosttmp://") {
|
if !strings.HasPrefix(job.Device, "hosttmp://") {
|
||||||
if err := deleteNodeObject(job.Node); err != nil {
|
if err := deleteNodeObject(job.Node); err != nil {
|
||||||
a.appendEvent(Event{
|
a.appendEvent(Event{
|
||||||
@ -180,66 +204,111 @@ func (a *App) runBuild(job *Job, flash bool) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := a.flashArtifact(job.ID, artifactRef); err != nil {
|
return a.flashArtifact(job.ID, artifactRef)
|
||||||
a.failJob(job.ID, err)
|
}
|
||||||
a.metrics.RecordFlash(job.Node, job.Host, "error")
|
|
||||||
return
|
func (a *App) appendFlashEvent(job *Job, artifactRef string, result RemoteFlashResult) {
|
||||||
|
summary := fmt.Sprintf("Flashed and verified %s latest image on %s", job.Node, job.Host)
|
||||||
|
if strings.HasPrefix(job.Device, "hosttmp://") {
|
||||||
|
summary = fmt.Sprintf("Verified %s latest image on %s host scratch", job.Node, job.Host)
|
||||||
|
}
|
||||||
|
details := map[string]any{
|
||||||
|
"node": job.Node,
|
||||||
|
"device": job.Device,
|
||||||
|
"host": job.Host,
|
||||||
|
"artifact": artifactRef + ":latest",
|
||||||
|
"dest_path": result.DestPath,
|
||||||
|
"verified": result.Verified,
|
||||||
|
"verification_kind": result.VerificationKind,
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(result.BootPartition) != "" {
|
||||||
|
details["boot_partition"] = result.BootPartition
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(result.RootPartition) != "" {
|
||||||
|
details["root_partition"] = result.RootPartition
|
||||||
|
}
|
||||||
|
if len(result.CheckedFiles) > 0 {
|
||||||
|
details["checked_files"] = append([]string{}, result.CheckedFiles...)
|
||||||
}
|
}
|
||||||
a.metrics.RecordFlash(job.Node, job.Host, "ok")
|
|
||||||
a.appendEvent(Event{
|
a.appendEvent(Event{
|
||||||
Time: time.Now().UTC(),
|
Time: time.Now().UTC(),
|
||||||
Kind: "image.flash",
|
Kind: "image.flash",
|
||||||
Summary: fmt.Sprintf("Flashed %s latest image on %s", job.Node, job.Host),
|
Summary: summary,
|
||||||
Details: map[string]any{"node": job.Node, "device": job.Device, "host": job.Host, "artifact": artifactRef + ":latest"},
|
Details: details,
|
||||||
})
|
})
|
||||||
a.completeJob(job.ID, func(j *Job) {
|
|
||||||
j.Stage = "complete"
|
|
||||||
if strings.HasPrefix(j.Device, "hosttmp://") {
|
|
||||||
j.Message = fmt.Sprintf("Test flash complete on %s host /tmp.", j.Host)
|
|
||||||
} else {
|
|
||||||
j.Message = fmt.Sprintf("Flash complete on %s. Move the card into %s and power-cycle it.", j.Host, j.Node)
|
|
||||||
}
|
|
||||||
j.ProgressPct = 100
|
|
||||||
j.Artifact = artifactRef + ":latest"
|
|
||||||
})
|
|
||||||
|
|
||||||
_ = nodeSpec
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) flashArtifact(jobID, artifactRef string) error {
|
func (a *App) completeFlashJob(jobID, artifactRef string, result RemoteFlashResult) {
|
||||||
|
a.completeJob(jobID, func(j *Job) {
|
||||||
|
j.Stage = "complete"
|
||||||
|
j.ProgressPct = 100
|
||||||
|
j.Artifact = artifactRef + ":latest"
|
||||||
|
if strings.HasPrefix(j.Device, "hosttmp://") {
|
||||||
|
if strings.TrimSpace(result.VerificationSummary) != "" {
|
||||||
|
j.Message = fmt.Sprintf("%s Test flash complete on %s host /tmp.", strings.TrimRight(result.VerificationSummary, "."), j.Host)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
j.Message = fmt.Sprintf("Verified test flash on %s host /tmp.", j.Host)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(result.VerificationSummary) != "" {
|
||||||
|
j.Message = fmt.Sprintf("%s Move the card into %s and power-cycle it.", strings.TrimRight(result.VerificationSummary, "."), j.Node)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
j.Message = fmt.Sprintf("Flash verified on %s. Move the card into %s and power-cycle it.", j.Host, j.Node)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) flashArtifact(jobID, artifactRef string) (RemoteFlashResult, error) {
|
||||||
|
job := a.job(jobID)
|
||||||
|
if job == nil {
|
||||||
|
return RemoteFlashResult{}, fmt.Errorf("job %s no longer exists", jobID)
|
||||||
|
}
|
||||||
nodes := clusterNodes()
|
nodes := clusterNodes()
|
||||||
nodeMap := map[string]clusterNode{}
|
nodeMap := map[string]clusterNode{}
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
nodeMap[node.Name] = node
|
nodeMap[node.Name] = node
|
||||||
}
|
}
|
||||||
target, ok := nodeMap[a.job(jobID).Host]
|
target, ok := nodeMap[job.Host]
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("flash host %s is not a current cluster node", a.job(jobID).Host)
|
return RemoteFlashResult{}, fmt.Errorf("flash host %s is not a current cluster node", job.Host)
|
||||||
}
|
}
|
||||||
image := a.podImageForArch(target.Arch)
|
image := a.podImageForArch(target.Arch)
|
||||||
if image == "" {
|
if image == "" {
|
||||||
return fmt.Errorf("no runner image configured for arch %s", target.Arch)
|
return RemoteFlashResult{}, fmt.Errorf("no runner image configured for arch %s", target.Arch)
|
||||||
}
|
}
|
||||||
a.setJob(jobID, func(j *Job) {
|
a.setJob(jobID, func(j *Job) {
|
||||||
j.Stage = "flash"
|
j.Stage = "flash_pull"
|
||||||
j.StageStartedAt = time.Now().UTC()
|
j.StageStartedAt = time.Now().UTC()
|
||||||
j.Message = fmt.Sprintf("Pulling %s and writing it on %s", artifactRef+":latest", j.Host)
|
j.Message = fmt.Sprintf("Pulling %s and preparing it for %s", artifactRef+":latest", prettyDeviceTarget(j.Device))
|
||||||
j.ProgressPct = 84
|
j.ProgressPct = 84
|
||||||
})
|
})
|
||||||
podName := fmt.Sprintf("metis-flash-%d", time.Now().UTC().UnixNano())
|
podName := fmt.Sprintf("metis-flash-%d", time.Now().UTC().UnixNano())
|
||||||
logs, err := a.runRemotePod(jobID, podName, a.remoteFlashPodSpec(podName, target.Name, image, a.job(jobID).Node, a.job(jobID).Device, artifactRef))
|
logs, err := a.runRemotePod(jobID, podName, a.remoteFlashPodSpec(podName, target.Name, image, job.Node, job.Device, artifactRef))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return RemoteFlashResult{}, err
|
||||||
}
|
}
|
||||||
var payload map[string]any
|
var result RemoteFlashResult
|
||||||
if err := json.Unmarshal([]byte(strings.TrimSpace(logs)), &payload); err == nil {
|
if err := json.Unmarshal([]byte(strings.TrimSpace(logs)), &result); err != nil {
|
||||||
a.setJob(jobID, func(j *Job) {
|
return RemoteFlashResult{}, fmt.Errorf("decode remote flash output: %w: %s", err, strings.TrimSpace(logs))
|
||||||
if dest, ok := payload["dest_path"].(string); ok && dest != "" {
|
|
||||||
j.Message = fmt.Sprintf("Wrote latest artifact to %s", dest)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return nil
|
if !result.Verified {
|
||||||
|
return RemoteFlashResult{}, fmt.Errorf("flash verification did not succeed for %s on %s", prettyDeviceTarget(job.Device), job.Host)
|
||||||
|
}
|
||||||
|
a.setJob(jobID, func(j *Job) {
|
||||||
|
if result.SizeBytes > 0 {
|
||||||
|
j.Written = result.SizeBytes
|
||||||
|
j.Total = result.SizeBytes
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(result.VerificationSummary) != "" {
|
||||||
|
j.Message = result.VerificationSummary
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(result.DestPath) != "" {
|
||||||
|
j.Message = fmt.Sprintf("Verified the latest image write at %s", result.DestPath)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) heartbeatRemoteJob(jobID string) {
|
func (a *App) heartbeatRemoteJob(jobID string) {
|
||||||
@ -255,8 +324,8 @@ func (a *App) heartbeatRemoteJob(jobID string) {
|
|||||||
stageStart = j.StartedAt
|
stageStart = j.StartedAt
|
||||||
}
|
}
|
||||||
elapsed := time.Since(stageStart)
|
elapsed := time.Since(stageStart)
|
||||||
switch j.Stage {
|
switch {
|
||||||
case "build":
|
case j.Stage == "build":
|
||||||
progress, message := buildStageHeartbeat(j.Node, j.Builder, elapsed)
|
progress, message := buildStageHeartbeat(j.Node, j.Builder, elapsed)
|
||||||
if progress > j.ProgressPct {
|
if progress > j.ProgressPct {
|
||||||
j.ProgressPct = progress
|
j.ProgressPct = progress
|
||||||
@ -264,14 +333,14 @@ func (a *App) heartbeatRemoteJob(jobID string) {
|
|||||||
if strings.TrimSpace(message) != "" {
|
if strings.TrimSpace(message) != "" {
|
||||||
j.Message = message
|
j.Message = message
|
||||||
}
|
}
|
||||||
case "preflight":
|
case j.Stage == "preflight":
|
||||||
if j.ProgressPct < 80 {
|
if j.ProgressPct < 80 {
|
||||||
j.ProgressPct = 80
|
j.ProgressPct = 80
|
||||||
}
|
}
|
||||||
j.Message = fmt.Sprintf("Validating %s on %s and resolving the latest Harbor artifact", prettyDeviceTarget(j.Device), j.Host)
|
j.Message = fmt.Sprintf("Validating %s on %s and resolving the latest Harbor artifact", prettyDeviceTarget(j.Device), j.Host)
|
||||||
case "flash":
|
case isFlashStage(j.Stage):
|
||||||
if j.Total > 0 && j.Written > 0 {
|
if j.Stage == "flash_write" && j.Total > 0 && j.Written > 0 {
|
||||||
actual := 88 + (float64(j.Written)/float64(j.Total))*10
|
actual := 92 + (float64(j.Written)/float64(j.Total))*6
|
||||||
if actual > 98 {
|
if actual > 98 {
|
||||||
actual = 98
|
actual = 98
|
||||||
}
|
}
|
||||||
@ -281,7 +350,7 @@ func (a *App) heartbeatRemoteJob(jobID string) {
|
|||||||
j.Message = fmt.Sprintf("Writing %s of %s on %s", humanBytes(j.Written), humanBytes(j.Total), j.Host)
|
j.Message = fmt.Sprintf("Writing %s of %s on %s", humanBytes(j.Written), humanBytes(j.Total), j.Host)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
progress, message := flashStageHeartbeat(j.Host, j.Artifact, elapsed)
|
progress, message := flashStagePhaseHeartbeat(j.Stage, j.Host, j.Artifact, elapsed)
|
||||||
if progress > j.ProgressPct {
|
if progress > j.ProgressPct {
|
||||||
j.ProgressPct = progress
|
j.ProgressPct = progress
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,7 +29,7 @@ func TestRemoteWorkflowErrorBranches(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
job = app.newJob("flash", "titan-15", "titan-22", "/dev/sdz")
|
job = app.newJob("flash", "titan-15", "titan-22", "/dev/sdz")
|
||||||
if err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil {
|
if _, err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil {
|
||||||
t.Fatal("expected flashArtifact error")
|
t.Fatal("expected flashArtifact error")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,7 +260,7 @@ func TestFlashArtifactAndHeartbeatBranches(t *testing.T) {
|
|||||||
app := remoteTestApp(t, nil)
|
app := remoteTestApp(t, nil)
|
||||||
|
|
||||||
job := app.newJob("replace", "titan-15", "missing-host", "/dev/sdz")
|
job := app.newJob("replace", "titan-15", "missing-host", "/dev/sdz")
|
||||||
if err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil || !strings.Contains(err.Error(), "not a current cluster node") {
|
if _, err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil || !strings.Contains(err.Error(), "not a current cluster node") {
|
||||||
t.Fatalf("expected missing host flashArtifact error, got %v", err)
|
t.Fatalf("expected missing host flashArtifact error, got %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,7 +283,7 @@ func TestFlashArtifactAndHeartbeatBranches(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
app.setJob(job.ID, func(j *Job) {
|
app.setJob(job.ID, func(j *Job) {
|
||||||
j.Stage = "flash"
|
j.Stage = "flash_write"
|
||||||
j.ProgressPct = 80
|
j.ProgressPct = 80
|
||||||
j.Written = 120
|
j.Written = 120
|
||||||
j.Total = 100
|
j.Total = 100
|
||||||
@ -341,7 +341,7 @@ func remoteWorkflowKubeServer(t *testing.T, opts remoteKubeOptions) *httptest.Se
|
|||||||
buildPhase := defaultString(opts.buildPhase, "Succeeded")
|
buildPhase := defaultString(opts.buildPhase, "Succeeded")
|
||||||
buildMessage := defaultString(opts.buildMessage, `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}`)
|
buildMessage := defaultString(opts.buildMessage, `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}`)
|
||||||
flashPhase := defaultString(opts.flashPhase, "Succeeded")
|
flashPhase := defaultString(opts.flashPhase, "Succeeded")
|
||||||
flashMessage := defaultString(opts.flashMessage, `{"dest_path":"/var/tmp/metis-flash-test/titan-15.img"}`)
|
flashMessage := defaultString(opts.flashMessage, `{"dest_path":"/var/tmp/metis-flash-test/titan-15.img","verified":true,"verification_kind":"image-file","verification_summary":"Verified image layout at /var/tmp/metis-flash-test/titan-15.img; boot and writable partitions are present."}`)
|
||||||
nodes := opts.nodes
|
nodes := opts.nodes
|
||||||
if nodes == nil {
|
if nodes == nil {
|
||||||
nodes = []map[string]any{
|
nodes = []map[string]any{
|
||||||
|
|||||||
@ -28,17 +28,36 @@ func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) {
|
func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) {
|
||||||
|
return flashStagePhaseHeartbeat("flash", host, artifact, elapsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func flashStagePhaseHeartbeat(stage, host, artifact string, elapsed time.Duration) (float64, string) {
|
||||||
seconds := elapsed.Seconds()
|
seconds := elapsed.Seconds()
|
||||||
switch {
|
switch strings.TrimSpace(stage) {
|
||||||
case seconds < 10:
|
case "flash_pull":
|
||||||
return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host)
|
return math.Min(88, ramp(seconds, 0, 12, 84, 88)), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host)
|
||||||
case seconds < 45:
|
case "flash_prepare", "flash_unpack":
|
||||||
return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host)
|
return math.Min(92, ramp(seconds, 0, 30, 88, 92)), fmt.Sprintf("Preparing the latest %s artifact for writing on %s", artifact, host)
|
||||||
|
case "flash_flush":
|
||||||
|
return math.Min(99, ramp(seconds, 0, 30, 98, 99)), fmt.Sprintf("Flushing the finished image write on %s", host)
|
||||||
|
case "flash_verify":
|
||||||
|
return math.Min(99.8, ramp(seconds, 0, 30, 99, 99.8)), fmt.Sprintf("Verifying the flashed recovery media on %s", host)
|
||||||
default:
|
default:
|
||||||
return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host)
|
switch {
|
||||||
|
case seconds < 10:
|
||||||
|
return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host)
|
||||||
|
case seconds < 45:
|
||||||
|
return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host)
|
||||||
|
default:
|
||||||
|
return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isFlashStage(stage string) bool {
|
||||||
|
return strings.HasPrefix(strings.TrimSpace(stage), "flash")
|
||||||
|
}
|
||||||
|
|
||||||
func prettyDeviceTarget(path string) string {
|
func prettyDeviceTarget(path string) string {
|
||||||
switch {
|
switch {
|
||||||
case strings.HasPrefix(path, "hosttmp://"):
|
case strings.HasPrefix(path, "hosttmp://"):
|
||||||
|
|||||||
@ -384,3 +384,31 @@ func TestSelectBuilderHostTieBreaksByName(t *testing.T) {
|
|||||||
t.Fatalf("expected alphabetical tie-breaker, got %s", node.Name)
|
t.Fatalf("expected alphabetical tie-breaker, got %s", node.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFlashPhaseHeartbeatAndManagedPathHelpers(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
stage string
|
||||||
|
minimum float64
|
||||||
|
phrase string
|
||||||
|
}{
|
||||||
|
{stage: "flash_pull", minimum: 84, phrase: "Pulling"},
|
||||||
|
{stage: "flash_prepare", minimum: 88, phrase: "Preparing"},
|
||||||
|
{stage: "flash_unpack", minimum: 90, phrase: "Preparing"},
|
||||||
|
{stage: "flash_flush", minimum: 98, phrase: "Flushing"},
|
||||||
|
{stage: "flash_verify", minimum: 99, phrase: "Verifying"},
|
||||||
|
{stage: "flash", minimum: 90, phrase: "Writing"},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
got, msg := flashStagePhaseHeartbeat(tc.stage, "titan-22", "registry.example/metis/titan-15:latest", 20*time.Second)
|
||||||
|
if got < tc.minimum || !strings.Contains(msg, tc.phrase) {
|
||||||
|
t.Fatalf("flashStagePhaseHeartbeat(%s) = %v %q", tc.stage, got, msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !managedPathsContain("/var/log/pods_/var/tmp", "/var/tmp") {
|
||||||
|
t.Fatal("expected managedPathsContain to match child paths")
|
||||||
|
}
|
||||||
|
if managedPathsContain("/var/log/pods", "/home/atlas") {
|
||||||
|
t.Fatal("managedPathsContain should reject unrelated paths")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -19,6 +19,24 @@ type RemoteProgressUpdate struct {
|
|||||||
TotalBytes int64 `json:"total_bytes,omitempty"`
|
TotalBytes int64 `json:"total_bytes,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RemoteFlashResult captures the verified outcome of a remote flash worker run.
|
||||||
|
type RemoteFlashResult struct {
|
||||||
|
Node string `json:"node,omitempty"`
|
||||||
|
Device string `json:"device,omitempty"`
|
||||||
|
DestPath string `json:"dest_path,omitempty"`
|
||||||
|
SizeBytes int64 `json:"size_bytes,omitempty"`
|
||||||
|
Verified bool `json:"verified,omitempty"`
|
||||||
|
VerificationKind string `json:"verification_kind,omitempty"`
|
||||||
|
VerificationSummary string `json:"verification_summary,omitempty"`
|
||||||
|
BootPartition string `json:"boot_partition,omitempty"`
|
||||||
|
RootPartition string `json:"root_partition,omitempty"`
|
||||||
|
BootLabel string `json:"boot_label,omitempty"`
|
||||||
|
RootLabel string `json:"root_label,omitempty"`
|
||||||
|
BootFSType string `json:"boot_fstype,omitempty"`
|
||||||
|
RootFSType string `json:"root_fstype,omitempty"`
|
||||||
|
CheckedFiles []string `json:"checked_files,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// ProgressLogLine formats a progress update for remote worker stdout.
|
// ProgressLogLine formats a progress update for remote worker stdout.
|
||||||
func ProgressLogLine(update RemoteProgressUpdate) string {
|
func ProgressLogLine(update RemoteProgressUpdate) string {
|
||||||
data, err := json.Marshal(update)
|
data, err := json.Marshal(update)
|
||||||
|
|||||||
@ -30,6 +30,7 @@ func (a *App) Handler() http.Handler {
|
|||||||
mux.HandleFunc("/api/state", a.withUIAuth(a.handleState))
|
mux.HandleFunc("/api/state", a.withUIAuth(a.handleState))
|
||||||
mux.HandleFunc("/api/devices", a.withUIAuth(a.handleDevices))
|
mux.HandleFunc("/api/devices", a.withUIAuth(a.handleDevices))
|
||||||
mux.HandleFunc("/api/jobs/build", a.withUIAuth(a.handleBuild))
|
mux.HandleFunc("/api/jobs/build", a.withUIAuth(a.handleBuild))
|
||||||
|
mux.HandleFunc("/api/jobs/flash", a.withUIAuth(a.handleFlash))
|
||||||
mux.HandleFunc("/api/jobs/replace", a.withUIAuth(a.handleReplace))
|
mux.HandleFunc("/api/jobs/replace", a.withUIAuth(a.handleReplace))
|
||||||
mux.HandleFunc("/api/sentinel/watch", a.withUIAuth(a.handleWatch))
|
mux.HandleFunc("/api/sentinel/watch", a.withUIAuth(a.handleWatch))
|
||||||
mux.HandleFunc("/", a.withUIAuth(a.handleIndex))
|
mux.HandleFunc("/", a.withUIAuth(a.handleIndex))
|
||||||
@ -106,6 +107,23 @@ func (a *App) handleBuild(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeJSON(w, http.StatusAccepted, job)
|
writeJSON(w, http.StatusAccepted, job)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) handleFlash(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
values := requestValues(r)
|
||||||
|
node := values["node"]
|
||||||
|
host := values["host"]
|
||||||
|
device := values["device"]
|
||||||
|
job, err := a.Flash(node, host, device)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), statusForJobError(err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusAccepted, job)
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) handleReplace(w http.ResponseWriter, r *http.Request) {
|
func (a *App) handleReplace(w http.ResponseWriter, r *http.Request) {
|
||||||
if r.Method != http.MethodPost {
|
if r.Method != http.MethodPost {
|
||||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
|||||||
@ -174,7 +174,7 @@
|
|||||||
}
|
}
|
||||||
.actions{
|
.actions{
|
||||||
display:grid;
|
display:grid;
|
||||||
grid-template-columns:repeat(3,minmax(0,1fr));
|
grid-template-columns:repeat(2,minmax(0,1fr));
|
||||||
gap:.7rem;
|
gap:.7rem;
|
||||||
margin-top:.9rem;
|
margin-top:.9rem;
|
||||||
}
|
}
|
||||||
@ -277,7 +277,7 @@
|
|||||||
<div class="stack">
|
<div class="stack">
|
||||||
<article class="card">
|
<article class="card">
|
||||||
<h2>Replacement Run</h2>
|
<h2>Replacement Run</h2>
|
||||||
<p class="hint">This UI is meant for the one-shot recovery path: build the node image, verify the card on the flash host, then write it and hand off only the physical swap.</p>
|
<p class="hint">Use the mode that matches the recovery step you are in: build a fresh image, flash the latest published image, or run the full build-and-flash path in one shot.</p>
|
||||||
<div class="form-grid">
|
<div class="form-grid">
|
||||||
<label>Target node
|
<label>Target node
|
||||||
<select id="node-select"></select>
|
<select id="node-select"></select>
|
||||||
@ -296,6 +296,7 @@
|
|||||||
<div class="actions">
|
<div class="actions">
|
||||||
<button class="secondary" id="refresh-devices">Refresh media</button>
|
<button class="secondary" id="refresh-devices">Refresh media</button>
|
||||||
<button class="secondary" id="build-only">Build image only</button>
|
<button class="secondary" id="build-only">Build image only</button>
|
||||||
|
<button class="secondary" id="flash-only">Flash latest image</button>
|
||||||
<button id="replace-run">Build and flash</button>
|
<button id="replace-run">Build and flash</button>
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
@ -395,6 +396,22 @@
|
|||||||
return seconds + 's';
|
return seconds + 's';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function prettyLabel(value){
|
||||||
|
return String(value || '')
|
||||||
|
.replaceAll('_', ' ')
|
||||||
|
.replace(/\b\w/g, (match)=>match.toUpperCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
function prettyJobKind(kind){
|
||||||
|
switch(kind){
|
||||||
|
case 'build': return 'Build';
|
||||||
|
case 'flash': return 'Flash Latest';
|
||||||
|
case 'replace': return 'Build + Flash';
|
||||||
|
case 'idle': return 'Idle';
|
||||||
|
default: return prettyLabel(kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function banner(kind, title, text){
|
function banner(kind, title, text){
|
||||||
bannerEl.className = 'banner ' + kind;
|
bannerEl.className = 'banner ' + kind;
|
||||||
bannerTitleEl.textContent = title;
|
bannerTitleEl.textContent = title;
|
||||||
@ -445,10 +462,10 @@
|
|||||||
const wrap = document.createElement('div');
|
const wrap = document.createElement('div');
|
||||||
wrap.className = 'item';
|
wrap.className = 'item';
|
||||||
const statusClass = job.status === 'error' ? 'error' : (job.status === 'done' ? 'done' : (job.status === 'running' ? 'running' : ''));
|
const statusClass = job.status === 'error' ? 'error' : (job.status === 'done' ? 'done' : (job.status === 'running' ? 'running' : ''));
|
||||||
const title = job.kind.toUpperCase() + (job.node ? ' · ' + job.node : '');
|
const title = prettyJobKind(job.kind) + (job.node ? ' · ' + job.node : '');
|
||||||
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
|
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
|
||||||
const timingBits = [];
|
const timingBits = [];
|
||||||
if(job.stage){ timingBits.push('stage: ' + job.stage); }
|
if(job.stage){ timingBits.push('stage: ' + prettyLabel(job.stage)); }
|
||||||
const stageDuration = fmtDuration(job.stage_started_at || job.started_at, job.finished_at);
|
const stageDuration = fmtDuration(job.stage_started_at || job.started_at, job.finished_at);
|
||||||
if(stageDuration){
|
if(stageDuration){
|
||||||
timingBits.push((job.status === 'running' ? 'stage elapsed ' : 'stage duration ') + stageDuration);
|
timingBits.push((job.status === 'running' ? 'stage elapsed ' : 'stage duration ') + stageDuration);
|
||||||
@ -538,7 +555,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
const selectedHost = hostSelect.value || state.default_flash_host;
|
const selectedHost = hostSelect.value || state.default_flash_host;
|
||||||
hostNoteEl.textContent = 'Metis will inspect media and run the flash writer on ' + selectedHost + ' through a short-lived in-cluster worker. ' + state.default_flash_host + ' remains the default flash host.';
|
hostNoteEl.textContent = 'Metis will inspect media and run the flash writer/verifier on ' + selectedHost + ' through a short-lived in-cluster worker. ' + state.default_flash_host + ' remains the default flash host.';
|
||||||
|
|
||||||
if(state.device_error){
|
if(state.device_error){
|
||||||
deviceNoteEl.textContent = state.device_error;
|
deviceNoteEl.textContent = state.device_error;
|
||||||
@ -549,11 +566,13 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
const artifact = (state.artifacts || {})[nodeSelect.value];
|
const artifact = (state.artifacts || {})[nodeSelect.value];
|
||||||
artifactNoteEl.textContent = artifact && artifact.ref
|
const hasArtifact = !!(artifact && artifact.ref);
|
||||||
? 'Latest published image: ' + artifact.ref + ' (Metis keeps the newest 3 builds in Harbor).'
|
artifactNoteEl.textContent = hasArtifact
|
||||||
: 'Successful build-only runs publish <node>:latest into Harbor and keep the newest 3 builds per node.';
|
? 'Latest published image: ' + artifact.ref + (artifact.build_tag ? ' · build ' + artifact.build_tag : '') + '. Flash latest writes this image directly; build and flash rebuilds first.'
|
||||||
|
: 'Build image only publishes <node>:latest into Harbor. Flash latest stays disabled until Metis has a recorded latest image.';
|
||||||
|
|
||||||
document.getElementById('build-only').disabled = busy || !nodeSelect.value;
|
document.getElementById('build-only').disabled = busy || !nodeSelect.value;
|
||||||
|
document.getElementById('flash-only').disabled = busy || !nodeSelect.value || !deviceSelect.value || !!state.device_error || !hasArtifact;
|
||||||
document.getElementById('refresh-devices').disabled = busy;
|
document.getElementById('refresh-devices').disabled = busy;
|
||||||
document.getElementById('replace-run').disabled = busy || !nodeSelect.value || !deviceSelect.value || !!state.device_error;
|
document.getElementById('replace-run').disabled = busy || !nodeSelect.value || !deviceSelect.value || !!state.device_error;
|
||||||
document.getElementById('sentinel-watch').disabled = busy;
|
document.getElementById('sentinel-watch').disabled = busy;
|
||||||
@ -654,6 +673,24 @@
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
document.getElementById('flash-only').addEventListener('click', async ()=>{
|
||||||
|
if(!requireValue(nodeSelect.value, 'Choose the target node whose latest published image should be flashed.')){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(!requireValue(deviceSelect.value, 'Choose removable media before starting a flash-only run.')){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(state.device_error){
|
||||||
|
banner('error', 'Flash host unavailable', state.device_error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await runAction('Starting flash latest', 'Queueing a flash-only run from the latest published Harbor image.', async ()=>{
|
||||||
|
await post('/api/jobs/flash', {node: nodeSelect.value, host: hostSelect.value, device: deviceSelect.value});
|
||||||
|
await refreshState({silent:true});
|
||||||
|
banner('success', 'Flash-only workflow queued', 'Metis is flashing the latest published image for ' + nodeSelect.value + ' onto ' + deviceSelect.value + ' and will verify it before finishing.');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
document.getElementById('replace-run').addEventListener('click', async ()=>{
|
document.getElementById('replace-run').addEventListener('click', async ()=>{
|
||||||
if(!requireValue(nodeSelect.value, 'Choose the target node whose SD card image should be built and flashed.')){
|
if(!requireValue(nodeSelect.value, 'Choose the target node whose SD card image should be built and flashed.')){
|
||||||
return;
|
return;
|
||||||
@ -668,7 +705,7 @@
|
|||||||
await runAction('Starting build and flash', 'Queueing the full replacement workflow now.', async ()=>{
|
await runAction('Starting build and flash', 'Queueing the full replacement workflow now.', async ()=>{
|
||||||
await post('/api/jobs/replace', {node: nodeSelect.value, host: hostSelect.value, device: deviceSelect.value});
|
await post('/api/jobs/replace', {node: nodeSelect.value, host: hostSelect.value, device: deviceSelect.value});
|
||||||
await refreshState({silent:true});
|
await refreshState({silent:true});
|
||||||
banner('success', 'Replacement workflow queued', 'Metis is building the image for ' + nodeSelect.value + ' and will flash ' + deviceSelect.value + '.');
|
banner('success', 'Replacement workflow queued', 'Metis is rebuilding the image for ' + nodeSelect.value + ', flashing ' + deviceSelect.value + ', and verifying the media before finishing.');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -185,7 +185,7 @@ func fakeKubeServer(t *testing.T) *httptest.Server {
|
|||||||
case strings.Contains(podName, "build"):
|
case strings.Contains(podName, "build"):
|
||||||
message = `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}`
|
message = `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}`
|
||||||
case strings.Contains(podName, "flash"):
|
case strings.Contains(podName, "flash"):
|
||||||
message = `{"dest_path":"/var/tmp/metis-flash-test/titan-15.img"}`
|
message = `{"dest_path":"/var/tmp/metis-flash-test/titan-15.img","verified":true,"verification_kind":"image-file","verification_summary":"Verified image layout at /var/tmp/metis-flash-test/titan-15.img; boot and writable partitions are present."}`
|
||||||
}
|
}
|
||||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
"metadata": map[string]any{"name": podName},
|
"metadata": map[string]any{"name": podName},
|
||||||
|
|||||||
@ -57,4 +57,4 @@ if [ "${test_rc}" -ne 0 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
cd "${repo_root}/testing"
|
cd "${repo_root}/testing"
|
||||||
METIS_USE_EXISTING_COVERAGE=1 go test -v ./...
|
METIS_USE_EXISTING_COVERAGE=1 go test -count=1 -v ./...
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
"metis/cmd/metis/inject_cmd.go": 95,
|
"metis/cmd/metis/inject_cmd.go": 95,
|
||||||
"metis/cmd/metis/main.go": 95,
|
"metis/cmd/metis/main.go": 95,
|
||||||
"metis/cmd/metis/remote_cmd.go": 95,
|
"metis/cmd/metis/remote_cmd.go": 95,
|
||||||
|
"metis/cmd/metis/remote_flash.go": 95,
|
||||||
"metis/cmd/metis/serve_cmd.go": 95,
|
"metis/cmd/metis/serve_cmd.go": 95,
|
||||||
"metis/pkg/config/config.go": 95,
|
"metis/pkg/config/config.go": 95,
|
||||||
"metis/pkg/facts/aggregate.go": 95,
|
"metis/pkg/facts/aggregate.go": 95,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user