metis/cmd/metis/remote_flash.go

305 lines
10 KiB
Go

package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"metis/pkg/service"
"metis/pkg/writer"
)
type flashPartitionTable struct {
PartitionTable struct {
Partitions []flashTablePartition `json:"partitions"`
} `json:"partitiontable"`
}
type flashTablePartition struct {
Type string `json:"type"`
}
type flashBlockDevicePayload struct {
Blockdevices []flashBlockDevice `json:"blockdevices"`
}
type flashBlockDevice struct {
Path string `json:"path"`
Type string `json:"type"`
FSType string `json:"fstype"`
Label string `json:"label"`
Children []flashBlockDevice `json:"children"`
}
var requiredBootFiles = []string{"config.txt", "cmdline.txt", "boot.scr"}
func remoteFlashCmd(args []string) {
fs := flag.NewFlagSet("remote-flash", flag.ExitOnError)
node := fs.String("node", "", "target node")
device := fs.String("device", "", "target device path or test sink")
artifactRef := fs.String("artifact-ref", "", "harbor artifact ref without tag")
workDir := fs.String("work-dir", filepath.Join(os.TempDir(), "metis-flash"), "working directory")
harborRegistry := fs.String("harbor-registry", getenvOr("METIS_HARBOR_REGISTRY", "registry.bstein.dev"), "harbor registry host")
harborUsername := fs.String("harbor-username", getenvOr("METIS_HARBOR_USERNAME", ""), "harbor username")
harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password")
hostTmpDir := fs.String("host-tmp-dir", "/host-tmp", "mounted host tmp dir for test writes")
fs.Parse(args)
if *node == "" || *device == "" || *artifactRef == "" {
fatalf("--node, --device, and --artifact-ref are required")
}
if err := os.MkdirAll(*workDir, 0o755); err != nil {
fatalf("mkdir workdir: %v", err)
}
emitStageProgress("flash_pull", 84, fmt.Sprintf("Pulling the latest Harbor artifact for %s", *node))
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
fatalf("oras login: %v", err)
}
if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil {
fatalf("oras pull: %v", err)
}
emitStageProgress("flash_prepare", 88, fmt.Sprintf("Preparing the downloaded image for %s", *node))
imagePath, compressed, err := resolvePulledArtifact(*workDir)
if err != nil {
fatalf("resolve artifact: %v", err)
}
rawImage := imagePath
if compressed {
emitStageProgress("flash_unpack", 90, fmt.Sprintf("Decompressing the image for %s before writing", *node))
rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage))
if out, err := cmd.CombinedOutput(); err != nil {
fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out)))
}
}
destPath := *device
if strings.HasPrefix(destPath, "hosttmp://") {
if err := os.MkdirAll(*hostTmpDir, 0o755); err != nil {
fatalf("mkdir host tmp dir: %v", err)
}
destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node))
}
info, err := os.Stat(rawImage)
if err != nil {
fatalf("stat raw image: %v", err)
}
imageSize := info.Size()
emitStageProgress("flash_write", 92, fmt.Sprintf("Writing the latest image for %s to %s", *node, destPath))
writeEmitter := newProgressEmitter("flash_write", 92, 98, fmt.Sprintf("Writing the latest image for %s", *node), true)
if err := writer.WriteImageWithProgress(context.Background(), rawImage, destPath, writeEmitter); err != nil {
fatalf("write image: %v", err)
}
flushFlashTarget(destPath, *node)
result, err := verifyFlashDestination(destPath)
if err != nil {
fatalf("verify flash output: %v", err)
}
result.Node = *node
result.Device = *device
result.DestPath = destPath
result.SizeBytes = imageSize
writeStructuredResult(result)
}
func flushFlashTarget(destPath, node string) {
emitStageProgress("flash_flush", 98.5, fmt.Sprintf("Flushing the finished image for %s", node))
_ = exec.Command("sync").Run()
if !strings.HasPrefix(destPath, "/dev/") {
return
}
_ = exec.Command("blockdev", "--flushbufs", destPath).Run()
_ = exec.Command("blockdev", "--rereadpt", destPath).Run()
_ = exec.Command("partprobe", destPath).Run()
_ = exec.Command("udevadm", "settle", "--timeout=10").Run()
}
func verifyFlashDestination(destPath string) (service.RemoteFlashResult, error) {
emitStageProgress("flash_verify", 99.2, fmt.Sprintf("Verifying the flashed recovery media at %s", destPath))
if strings.HasPrefix(destPath, "/dev/") {
return verifyBlockDeviceFlash(destPath)
}
return verifyImageFileFlash(destPath)
}
func verifyImageFileFlash(destPath string) (service.RemoteFlashResult, error) {
table, err := readFlashPartitionTable(destPath)
if err != nil {
return service.RemoteFlashResult{}, err
}
hasBoot := false
hasRoot := false
for _, part := range table.PartitionTable.Partitions {
if isBootPartitionType(part.Type) {
hasBoot = true
}
if isLinuxPartitionType(part.Type) {
hasRoot = true
}
}
if !hasBoot || !hasRoot {
return service.RemoteFlashResult{}, fmt.Errorf("image %s does not expose the expected boot and writable partitions", destPath)
}
return service.RemoteFlashResult{
Verified: true,
VerificationKind: "image-file",
VerificationSummary: fmt.Sprintf("Verified image layout at %s; boot and writable partitions are present.", destPath),
}, nil
}
func verifyBlockDeviceFlash(destPath string) (service.RemoteFlashResult, error) {
deadline := time.Now().Add(15 * time.Second)
var lastErr error
for time.Now().Before(deadline) {
parts, err := readBlockDevicePartitions(destPath)
if err == nil {
boot, root, classifyErr := classifyFlashPartitions(parts)
if classifyErr == nil {
checkedFiles, verifyErr := verifyBootPartitionFiles(boot.Path)
if verifyErr == nil {
bootLabel := firstNonEmpty(boot.Label, filepath.Base(boot.Path))
rootLabel := firstNonEmpty(root.Label, filepath.Base(root.Path))
return service.RemoteFlashResult{
Verified: true,
VerificationKind: "block-device",
VerificationSummary: fmt.Sprintf("Verified %s; %s and %s are present and the boot files look correct.", destPath, bootLabel, rootLabel),
BootPartition: boot.Path,
RootPartition: root.Path,
BootLabel: boot.Label,
RootLabel: root.Label,
BootFSType: boot.FSType,
RootFSType: root.FSType,
CheckedFiles: checkedFiles,
}, nil
}
lastErr = verifyErr
} else {
lastErr = classifyErr
}
} else {
lastErr = err
}
_ = exec.Command("blockdev", "--rereadpt", destPath).Run()
_ = exec.Command("partprobe", destPath).Run()
_ = exec.Command("udevadm", "settle", "--timeout=10").Run()
time.Sleep(time.Second)
}
if lastErr == nil {
lastErr = fmt.Errorf("timed out waiting for the flashed partitions on %s", destPath)
}
return service.RemoteFlashResult{}, lastErr
}
func readFlashPartitionTable(destPath string) (flashPartitionTable, error) {
out, err := exec.Command("sfdisk", "-J", destPath).CombinedOutput()
if err != nil {
return flashPartitionTable{}, fmt.Errorf("sfdisk -J %s: %v: %s", destPath, err, strings.TrimSpace(string(out)))
}
var table flashPartitionTable
if err := json.Unmarshal(out, &table); err != nil {
return flashPartitionTable{}, fmt.Errorf("decode partition table for %s: %w", destPath, err)
}
return table, nil
}
func readBlockDevicePartitions(destPath string) ([]flashBlockDevice, error) {
out, err := exec.Command("lsblk", "-J", "-o", "PATH,TYPE,FSTYPE,LABEL", destPath).CombinedOutput()
if err != nil {
return nil, fmt.Errorf("lsblk %s: %v: %s", destPath, err, strings.TrimSpace(string(out)))
}
var payload flashBlockDevicePayload
if err := json.Unmarshal(out, &payload); err != nil {
return nil, fmt.Errorf("decode lsblk output for %s: %w", destPath, err)
}
for _, device := range payload.Blockdevices {
if device.Path == destPath {
return device.Children, nil
}
}
if len(payload.Blockdevices) == 1 {
return payload.Blockdevices[0].Children, nil
}
return nil, fmt.Errorf("lsblk did not report partitions for %s", destPath)
}
func classifyFlashPartitions(parts []flashBlockDevice) (flashBlockDevice, flashBlockDevice, error) {
var boot flashBlockDevice
var root flashBlockDevice
for _, part := range parts {
if part.Type != "part" {
continue
}
normalizedLabel := strings.ToLower(strings.TrimSpace(part.Label))
normalizedFS := strings.ToLower(strings.TrimSpace(part.FSType))
if boot.Path == "" && (normalizedLabel == "system-boot" || normalizedFS == "vfat" || normalizedFS == "fat" || normalizedFS == "fat32") {
boot = part
continue
}
if root.Path == "" && (normalizedLabel == "writable" || normalizedFS == "ext4") {
root = part
}
}
if boot.Path == "" {
return flashBlockDevice{}, flashBlockDevice{}, fmt.Errorf("could not find a boot partition with a FAT filesystem")
}
if root.Path == "" {
return flashBlockDevice{}, flashBlockDevice{}, fmt.Errorf("could not find a writable ext4 partition")
}
return boot, root, nil
}
func verifyBootPartitionFiles(partitionPath string) ([]string, error) {
mountDir, err := os.MkdirTemp("", "metis-boot-")
if err != nil {
return nil, err
}
defer os.RemoveAll(mountDir)
cmd := exec.Command("mount", "-o", "ro", partitionPath, mountDir)
if out, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("mount %s read-only: %v: %s", partitionPath, err, strings.TrimSpace(string(out)))
}
defer exec.Command("umount", mountDir).Run()
checked := make([]string, 0, len(requiredBootFiles))
for _, name := range requiredBootFiles {
if _, err := os.Stat(filepath.Join(mountDir, name)); err != nil {
return nil, fmt.Errorf("boot partition %s is missing %s", partitionPath, name)
}
checked = append(checked, name)
}
return checked, nil
}
func isBootPartitionType(partType string) bool {
normalized := strings.ToLower(strings.TrimSpace(partType))
switch normalized {
case "b", "c", "0b", "0c", "ef", "ef00":
return true
}
return normalized == "c12a7328-f81f-11d2-ba4b-00a0c93ec93b"
}
func isLinuxPartitionType(partType string) bool {
normalized := strings.ToLower(strings.TrimSpace(partType))
switch normalized {
case "83", "8300":
return true
}
return normalized == "0fc63daf-8483-4772-8e79-3d69d8477de4"
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if trimmed := strings.TrimSpace(value); trimmed != "" {
return trimmed
}
}
return ""
}