service: run remote build and flash workflows

This commit is contained in:
Brad Stein 2026-03-31 20:42:35 -03:00
parent a6ef5a0ff6
commit a148e77335
13 changed files with 1443 additions and 405 deletions

View File

@ -5,7 +5,7 @@ ARG TARGETPLATFORM
ARG TARGETOS ARG TARGETOS
ARG TARGETARCH ARG TARGETARCH
FROM --platform=$BUILDPLATFORM golang:1.22-bookworm AS build FROM --platform=$BUILDPLATFORM golang:1.23-bookworm AS build
ARG TARGETOS ARG TARGETOS
ARG TARGETARCH ARG TARGETARCH
@ -17,7 +17,8 @@ COPY . .
RUN --mount=type=cache,target=/root/.cache/go-build \ RUN --mount=type=cache,target=/root/.cache/go-build \
--mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/go/pkg/mod \
CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/metis ./cmd/metis && \ CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/metis ./cmd/metis && \
CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/metis-sentinel ./cmd/metis-sentinel CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/metis-sentinel ./cmd/metis-sentinel && \
CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/oras oras.land/oras/cmd/oras
FROM debian:bookworm-slim AS runtime-base FROM debian:bookworm-slim AS runtime-base
@ -28,6 +29,7 @@ RUN apt-get update \
WORKDIR /app WORKDIR /app
COPY --from=build /out/metis /usr/local/bin/metis COPY --from=build /out/metis /usr/local/bin/metis
COPY --from=build /out/metis-sentinel /usr/local/bin/metis-sentinel COPY --from=build /out/metis-sentinel /usr/local/bin/metis-sentinel
COPY --from=build /out/oras /usr/local/bin/oras
COPY inventory.example.yaml /app/inventory.example.yaml COPY inventory.example.yaml /app/inventory.example.yaml
COPY inventory.titan-rpi4.yaml /app/inventory.titan-rpi4.yaml COPY inventory.titan-rpi4.yaml /app/inventory.titan-rpi4.yaml
COPY overlays /app/overlays COPY overlays /app/overlays

2
Jenkinsfile vendored
View File

@ -49,7 +49,7 @@ spec:
- name: harbor-config - name: harbor-config
mountPath: /docker-config mountPath: /docker-config
- name: tester - name: tester
image: golang:1.22-bookworm image: golang:1.23-bookworm
command: ["cat"] command: ["cat"]
tty: true tty: true
volumeMounts: volumeMounts:

View File

@ -32,6 +32,12 @@ func main() {
configCmd(os.Args[2:]) configCmd(os.Args[2:])
case "facts": case "facts":
factsCmd(os.Args[2:]) factsCmd(os.Args[2:])
case "remote-devices":
remoteDevicesCmd(os.Args[2:])
case "remote-build":
remoteBuildCmd(os.Args[2:])
case "remote-flash":
remoteFlashCmd(os.Args[2:])
default: default:
usage() usage()
os.Exit(1) os.Exit(1)
@ -39,7 +45,7 @@ func main() {
} }
func usage() { func usage() {
fmt.Fprintf(os.Stderr, "Usage: metis <plan|burn|image|serve|inject|config|facts> [options]\n") fmt.Fprintf(os.Stderr, "Usage: metis <plan|burn|image|serve|inject|config|facts|remote-devices|remote-build|remote-flash> [options]\n")
} }
func loadInventory(path string) *inventory.Inventory { func loadInventory(path string) *inventory.Inventory {

349
cmd/metis/remote_cmd.go Normal file
View File

@ -0,0 +1,349 @@
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
"metis/pkg/plan"
"metis/pkg/service"
"metis/pkg/writer"
)
func remoteDevicesCmd(args []string) {
fs := flag.NewFlagSet("remote-devices", flag.ExitOnError)
maxBytes := fs.Int64("max-device-bytes", 300000000000, "max real removable device size")
hostTmpDir := fs.String("host-tmp-dir", "/tmp/metis-flash-test", "host tmp dir for test writes")
fs.Parse(args)
devices, err := localFlashDevices(*maxBytes, *hostTmpDir)
if err != nil {
log.Fatalf("remote devices: %v", err)
}
sort.Slice(devices, func(i, j int) bool {
left := localDeviceScore(devices[i])
right := localDeviceScore(devices[j])
if left != right {
return left > right
}
if devices[i].SizeBytes != devices[j].SizeBytes {
return devices[i].SizeBytes < devices[j].SizeBytes
}
return devices[i].Path < devices[j].Path
})
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
_ = enc.Encode(map[string]any{"devices": devices})
}
func remoteBuildCmd(args []string) {
fs := flag.NewFlagSet("remote-build", flag.ExitOnError)
invPath := fs.String("inventory", "inventory.yaml", "inventory file")
node := fs.String("node", "", "target node")
cacheDir := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
workDir := fs.String("work-dir", filepath.Join(os.TempDir(), "metis-work"), "working directory")
artifactRef := fs.String("artifact-ref", "", "harbor artifact ref without tag")
buildTag := fs.String("build-tag", "", "artifact build tag")
harborRegistry := fs.String("harbor-registry", getenvOr("METIS_HARBOR_REGISTRY", "registry.bstein.dev"), "harbor registry host")
harborUsername := fs.String("harbor-username", getenvOr("METIS_HARBOR_USERNAME", ""), "harbor username")
harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password")
fs.Parse(args)
if *node == "" || *artifactRef == "" || *buildTag == "" {
log.Fatalf("--node, --artifact-ref, and --build-tag are required")
}
if err := os.MkdirAll(*workDir, 0o755); err != nil {
log.Fatalf("mkdir workdir: %v", err)
}
output := filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
inv := loadInventory(*invPath)
if err := plan.BuildImageFile(context.Background(), inv, *node, *cacheDir, output); err != nil {
log.Fatalf("build image: %v", err)
}
if err := exec.Command("xz", "-T0", "-z", "-f", output).Run(); err != nil {
log.Fatalf("xz compress: %v", err)
}
compressedPath := output + ".xz"
info, err := os.Stat(compressedPath)
if err != nil {
log.Fatalf("stat compressed image: %v", err)
}
metadataPath := filepath.Join(*workDir, "metadata.json")
builtAt := time.Now().UTC()
meta := map[string]any{
"node": *node,
"artifact_ref": *artifactRef,
"build_tag": *buildTag,
"built_at": builtAt.Format(time.RFC3339),
"size_bytes": info.Size(),
"compressed": true,
}
metaBytes, err := json.MarshalIndent(meta, "", " ")
if err != nil {
log.Fatalf("encode metadata: %v", err)
}
if err := os.WriteFile(metadataPath, metaBytes, 0o644); err != nil {
log.Fatalf("write metadata: %v", err)
}
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
log.Fatalf("oras login: %v", err)
}
taggedRef := fmt.Sprintf("%s:%s", *artifactRef, *buildTag)
if err := orasPush(taggedRef, compressedPath, metadataPath); err != nil {
log.Fatalf("oras push: %v", err)
}
if err := orasTag(taggedRef, "latest"); err != nil {
log.Fatalf("oras tag latest: %v", err)
}
summary := service.ArtifactSummary{
Node: *node,
Ref: fmt.Sprintf("%s:latest", *artifactRef),
BuildTag: *buildTag,
LocalPath: compressedPath,
Compressed: true,
UpdatedAt: builtAt,
SizeBytes: info.Size(),
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
_ = enc.Encode(summary)
}
func remoteFlashCmd(args []string) {
fs := flag.NewFlagSet("remote-flash", flag.ExitOnError)
node := fs.String("node", "", "target node")
device := fs.String("device", "", "target device path or test sink")
artifactRef := fs.String("artifact-ref", "", "harbor artifact ref without tag")
workDir := fs.String("work-dir", filepath.Join(os.TempDir(), "metis-flash"), "working directory")
harborRegistry := fs.String("harbor-registry", getenvOr("METIS_HARBOR_REGISTRY", "registry.bstein.dev"), "harbor registry host")
harborUsername := fs.String("harbor-username", getenvOr("METIS_HARBOR_USERNAME", ""), "harbor username")
harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password")
hostTmpDir := fs.String("host-tmp-dir", "/host-tmp/metis-flash-test", "mounted host tmp dir for test writes")
fs.Parse(args)
if *node == "" || *device == "" || *artifactRef == "" {
log.Fatalf("--node, --device, and --artifact-ref are required")
}
if err := os.MkdirAll(*workDir, 0o755); err != nil {
log.Fatalf("mkdir workdir: %v", err)
}
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
log.Fatalf("oras login: %v", err)
}
if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil {
log.Fatalf("oras pull: %v", err)
}
imagePath, compressed, err := resolvePulledArtifact(*workDir)
if err != nil {
log.Fatalf("resolve artifact: %v", err)
}
rawImage := imagePath
if compressed {
rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage))
if out, err := cmd.CombinedOutput(); err != nil {
log.Fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out)))
}
}
destPath := *device
if strings.HasPrefix(destPath, "hosttmp://") {
if err := os.MkdirAll(*hostTmpDir, 0o755); err != nil {
log.Fatalf("mkdir host tmp dir: %v", err)
}
destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node))
}
if err := writer.WriteImage(context.Background(), rawImage, destPath); err != nil {
log.Fatalf("write image: %v", err)
}
_ = exec.Command("sync").Run()
if strings.HasPrefix(destPath, "/dev/") {
_ = exec.Command("blockdev", "--flushbufs", destPath).Run()
}
info, err := os.Stat(destPath)
if err != nil {
log.Fatalf("stat destination: %v", err)
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
_ = enc.Encode(map[string]any{
"node": *node,
"device": *device,
"dest_path": destPath,
"size_bytes": info.Size(),
})
}
func localFlashDevices(maxBytes int64, hostTmpDir string) ([]service.Device, error) {
cmd := exec.Command("lsblk", "-J", "-b", "-o", "NAME,PATH,RM,HOTPLUG,SIZE,MODEL,TRAN,TYPE")
out, err := cmd.Output()
if err != nil {
return nil, err
}
var payload struct {
Blockdevices []struct {
Name string `json:"name"`
Path string `json:"path"`
RM bool `json:"rm"`
Hotplug bool `json:"hotplug"`
Size any `json:"size"`
Model string `json:"model"`
Tran string `json:"tran"`
Type string `json:"type"`
} `json:"blockdevices"`
}
if err := json.Unmarshal(out, &payload); err != nil {
return nil, err
}
devices := make([]service.Device, 0, len(payload.Blockdevices)+1)
for _, dev := range payload.Blockdevices {
if dev.Type != "disk" {
continue
}
size := int64(0)
switch value := dev.Size.(type) {
case string:
size, _ = strconv.ParseInt(value, 10, 64)
case float64:
size = int64(value)
}
if size <= 0 || size > maxBytes {
continue
}
if dev.Tran != "usb" && !dev.RM && !dev.Hotplug {
continue
}
devices = append(devices, service.Device{
Name: dev.Name,
Path: dev.Path,
Model: strings.TrimSpace(dev.Model),
Transport: dev.Tran,
Type: dev.Type,
Removable: dev.RM,
Hotplug: dev.Hotplug,
SizeBytes: size,
})
}
devices = append(devices, service.Device{
Name: "host-tmp",
Path: "hosttmp:///tmp",
Model: "Host /tmp",
Transport: "test",
Type: "file",
Note: fmt.Sprintf("Test-only host write target under %s", hostTmpDir),
Removable: false,
Hotplug: false,
SizeBytes: 1,
})
return devices, nil
}
func localDeviceScore(device service.Device) int {
score := 0
if strings.HasPrefix(device.Path, "hosttmp://") {
return -100
}
if device.Transport == "usb" {
score += 50
}
if device.Removable {
score += 30
}
if device.Hotplug {
score += 20
}
if strings.Contains(strings.ToLower(device.Model), "sd") {
score += 10
}
return score
}
func orasLogin(registry, username, password string) error {
if strings.TrimSpace(username) == "" || strings.TrimSpace(password) == "" {
return fmt.Errorf("harbor credentials missing")
}
cmd := exec.Command("oras", "login", registry, "-u", username, "-p", password)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("%w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}
func orasPush(ref, imagePath, metadataPath string) error {
cmd := exec.Command("oras", "push", ref,
fmt.Sprintf("%s:application/x-raw-disk-image", imagePath),
fmt.Sprintf("%s:application/json", metadataPath),
)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("%w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}
func orasTag(ref string, tags ...string) error {
args := append([]string{"tag", ref}, tags...)
cmd := exec.Command("oras", args...)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("%w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}
func orasPull(ref, outDir string) error {
cmd := exec.Command("oras", "pull", ref, "-o", outDir)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("%w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}
func resolvePulledArtifact(dir string) (string, bool, error) {
var rawPath string
var compressedPath string
err := filepath.WalkDir(dir, func(path string, d os.DirEntry, walkErr error) error {
if walkErr != nil {
return walkErr
}
if d.IsDir() {
return nil
}
switch {
case strings.HasSuffix(path, ".img.xz"):
compressedPath = path
case strings.HasSuffix(path, ".img"):
rawPath = path
}
return nil
})
if err != nil {
return "", false, err
}
if compressedPath != "" {
return compressedPath, true, nil
}
if rawPath != "" {
return rawPath, false, nil
}
return "", false, fmt.Errorf("no .img or .img.xz artifact found in %s", dir)
}
func getenvOr(key, fallback string) string {
value := strings.TrimSpace(os.Getenv(key))
if value == "" {
return fallback
}
return value
}

29
go.mod
View File

@ -1,5 +1,32 @@
module metis module metis
go 1.22.0 go 1.23.1
require gopkg.in/yaml.v3 v3.0.1 require gopkg.in/yaml.v3 v3.0.1
require (
dario.cat/mergo v1.0.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.3.0 // indirect
github.com/Masterminds/sprig/v3 v3.3.0 // indirect
github.com/containerd/console v1.0.4 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/spf13/cast v1.7.0 // indirect
github.com/spf13/cobra v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/term v0.27.0 // indirect
oras.land/oras v1.2.2 // indirect
oras.land/oras-go/v2 v2.5.0 // indirect
)

58
go.sum
View File

@ -1,4 +1,62 @@
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/containerd/console v1.0.4 h1:F2g4+oChYvBTsASRTz8NP6iIAi97J3TtSAsLbIFn4ro=
github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
oras.land/oras v1.2.2 h1:TyeLkSI1D4RltfA0alTkBNa5ttMwmpUJMGc97QIdmuw=
oras.land/oras v1.2.2/go.mod h1:qtLROGNZulPzlI/pAr9s6j41IeVYQF1VAm+KRU+vkB4=
oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
oras.land/oras-go/v2 v2.5.0/go.mod h1:z4eisnLP530vwIOUOJeBIj0aGI0L1C3d53atvCBqZHg=

View File

@ -2,29 +2,20 @@ package service
import ( import (
"bufio" "bufio"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"net/http"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
"metis/pkg/facts" "metis/pkg/facts"
"metis/pkg/image"
"metis/pkg/inventory" "metis/pkg/inventory"
"metis/pkg/plan"
"metis/pkg/sentinel" "metis/pkg/sentinel"
"metis/pkg/writer"
) )
type JobStatus string type JobStatus string
@ -43,6 +34,7 @@ type Device struct {
Model string `json:"model,omitempty"` Model string `json:"model,omitempty"`
Transport string `json:"transport,omitempty"` Transport string `json:"transport,omitempty"`
Type string `json:"type,omitempty"` Type string `json:"type,omitempty"`
Note string `json:"note,omitempty"`
Removable bool `json:"removable"` Removable bool `json:"removable"`
Hotplug bool `json:"hotplug"` Hotplug bool `json:"hotplug"`
SizeBytes int64 `json:"size_bytes"` SizeBytes int64 `json:"size_bytes"`
@ -54,6 +46,7 @@ type Job struct {
Kind string `json:"kind"` Kind string `json:"kind"`
Node string `json:"node,omitempty"` Node string `json:"node,omitempty"`
Host string `json:"host,omitempty"` Host string `json:"host,omitempty"`
Builder string `json:"builder,omitempty"`
Device string `json:"device,omitempty"` Device string `json:"device,omitempty"`
Status JobStatus `json:"status"` Status JobStatus `json:"status"`
Stage string `json:"stage,omitempty"` Stage string `json:"stage,omitempty"`
@ -101,9 +94,15 @@ type PageState struct {
// ArtifactSummary describes the latest built image for a node. // ArtifactSummary describes the latest built image for a node.
type ArtifactSummary struct { type ArtifactSummary struct {
Path string `json:"path"` Node string `json:"node,omitempty"`
UpdatedAt time.Time `json:"updated_at"` Ref string `json:"ref,omitempty"`
SizeBytes int64 `json:"size_bytes"` BuildTag string `json:"build_tag,omitempty"`
LocalPath string `json:"local_path,omitempty"`
HostPath string `json:"host_path,omitempty"`
BuilderHost string `json:"builder_host,omitempty"`
Compressed bool `json:"compressed,omitempty"`
UpdatedAt time.Time `json:"updated_at"`
SizeBytes int64 `json:"size_bytes"`
} }
// App coordinates builds, flashes, sentinel snapshots, and the web UI state. // App coordinates builds, flashes, sentinel snapshots, and the web UI state.
@ -112,10 +111,11 @@ type App struct {
inventory *inventory.Inventory inventory *inventory.Inventory
metrics *Metrics metrics *Metrics
mu sync.RWMutex mu sync.RWMutex
jobs map[string]*Job jobs map[string]*Job
snapshots map[string]SnapshotRecord snapshots map[string]SnapshotRecord
targets map[string]facts.Targets targets map[string]facts.Targets
artifactStore map[string]ArtifactSummary
} }
// NewApp creates a Metis service app instance. // NewApp creates a Metis service app instance.
@ -134,15 +134,17 @@ func NewApp(settings Settings) (*App, error) {
return nil, err return nil, err
} }
app := &App{ app := &App{
settings: settings, settings: settings,
inventory: inv, inventory: inv,
metrics: NewMetrics(), metrics: NewMetrics(),
jobs: map[string]*Job{}, jobs: map[string]*Job{},
snapshots: map[string]SnapshotRecord{}, snapshots: map[string]SnapshotRecord{},
targets: map[string]facts.Targets{}, targets: map[string]facts.Targets{},
artifactStore: map[string]ArtifactSummary{},
} }
_ = app.loadSnapshots() _ = app.loadSnapshots()
_ = app.loadTargets() _ = app.loadTargets()
_ = app.loadArtifacts()
return app, nil return app, nil
} }
@ -302,231 +304,6 @@ func (a *App) WatchSentinel() (*Event, error) {
return event, nil return event, nil
} }
// ListDevices returns locally attached removable media that are safe candidates for flashing.
func (a *App) ListDevices(host string) ([]Device, error) {
if host == "" {
host = a.settings.DefaultFlashHost
}
if !a.supportsLocalMedia(host) {
return nil, fmt.Errorf("flash host %s is listed for planning, but this Metis instance only has direct removable-media access on %s", host, a.settings.LocalHost)
}
cmd := exec.Command("lsblk", "-J", "-b", "-o", "NAME,PATH,RM,HOTPLUG,SIZE,MODEL,TRAN,TYPE")
out, err := cmd.Output()
if err != nil {
return nil, err
}
var payload struct {
Blockdevices []struct {
Name string `json:"name"`
Path string `json:"path"`
RM bool `json:"rm"`
Hotplug bool `json:"hotplug"`
Size any `json:"size"`
Model string `json:"model"`
Tran string `json:"tran"`
Type string `json:"type"`
} `json:"blockdevices"`
}
if err := json.Unmarshal(out, &payload); err != nil {
return nil, err
}
devices := make([]Device, 0)
for _, dev := range payload.Blockdevices {
if dev.Type != "disk" {
continue
}
size := int64(0)
switch value := dev.Size.(type) {
case string:
size, _ = strconv.ParseInt(value, 10, 64)
case float64:
size = int64(value)
}
if size <= 0 || size > a.settings.MaxDeviceBytes {
continue
}
if dev.Tran != "usb" && !dev.RM && !dev.Hotplug {
continue
}
devices = append(devices, Device{
Name: dev.Name,
Path: dev.Path,
Model: strings.TrimSpace(dev.Model),
Transport: dev.Tran,
Type: dev.Type,
Removable: dev.RM,
Hotplug: dev.Hotplug,
SizeBytes: size,
})
}
sort.Slice(devices, func(i, j int) bool {
left := deviceScore(devices[i])
right := deviceScore(devices[j])
if left != right {
return left > right
}
if devices[i].SizeBytes != devices[j].SizeBytes {
return devices[i].SizeBytes < devices[j].SizeBytes
}
return devices[i].Path < devices[j].Path
})
return devices, nil
}
func (a *App) runBuild(job *Job, flash bool) {
a.setJob(job.ID, func(j *Job) {
j.Status = JobRunning
j.Stage = "download"
j.Message = "Fetching and verifying base image"
j.ProgressPct = 5
})
output := a.artifactPath(job.Node)
cacheDir := a.settings.CacheDir
planData, err := plan.Build(a.inventory, job.Node, output, cacheDir)
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
_, class, err := a.inventory.FindNode(job.Node)
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
cacheImage := filepath.Join(cacheDir, cachedImageName(planData.Image))
cacheImage, err = image.DownloadAndVerify(planData.Image, cacheImage, class.Checksum)
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
a.setJob(job.ID, func(j *Job) {
j.Stage = "copy"
j.Message = "Copying base image into artifact"
j.ProgressPct = 24
})
if err := writer.WriteImage(context.Background(), cacheImage, output); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
files, err := plan.Files(a.inventory, job.Node)
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
a.setJob(job.ID, func(j *Job) {
j.Stage = "inject"
j.Message = "Injecting node-specific rootfs config"
j.ProgressPct = 70
})
if err := image.InjectRootFS(output, files); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
a.metrics.RecordBuild(job.Node, "ok")
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "image.build",
Summary: fmt.Sprintf("Built replacement image for %s", job.Node),
Details: map[string]any{"node": job.Node, "artifact": output},
})
if !flash {
a.completeJob(job.ID, func(j *Job) {
j.Stage = "complete"
j.Message = "Image build complete"
j.ProgressPct = 100
j.Artifact = output
})
return
}
a.setJob(job.ID, func(j *Job) {
j.Stage = "preflight"
j.Message = "Validating device and deleting stale node object"
j.ProgressPct = 78
j.Artifact = output
})
if _, err := a.ensureDevice(job.Host, job.Device); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordFlash(job.Node, job.Host, "error")
return
}
if err := deleteNodeObject(job.Node); err != nil {
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "node.delete.warning",
Summary: fmt.Sprintf("Could not delete stale Kubernetes node object for %s", job.Node),
Details: map[string]any{"node": job.Node, "error": err.Error()},
})
}
if err := a.flashArtifact(job.ID, output); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordFlash(job.Node, job.Host, "error")
return
}
a.metrics.RecordFlash(job.Node, job.Host, "ok")
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "image.flash",
Summary: fmt.Sprintf("Flashed %s image to %s on %s", job.Node, job.Device, job.Host),
Details: map[string]any{"node": job.Node, "device": job.Device, "host": job.Host},
})
a.completeJob(job.ID, func(j *Job) {
j.Stage = "complete"
j.Message = fmt.Sprintf("Flash complete. Move the card into %s and power-cycle it.", j.Node)
j.ProgressPct = 100
j.Artifact = output
})
}
func (a *App) flashArtifact(jobID, artifact string) error {
info, err := os.Stat(artifact)
if err != nil {
return err
}
a.setJob(jobID, func(j *Job) {
j.Stage = "flash"
j.Message = "Writing image to removable media"
j.ProgressPct = 82
j.Total = info.Size()
})
err = writer.WriteImageWithProgress(context.Background(), artifact, a.job(jobID).Device, func(written, total int64) {
pct := 82.0
if total > 0 {
pct = 82.0 + (float64(written)/float64(total))*17.0
}
a.setJob(jobID, func(j *Job) {
j.Written = written
j.Total = total
j.ProgressPct = pct
j.Message = fmt.Sprintf("Flashing %s of %s", humanBytes(written), humanBytes(total))
})
})
return err
}
func (a *App) ensureDevice(host, path string) (*Device, error) {
if strings.TrimSpace(path) == "" {
return nil, fmt.Errorf("select removable media before starting a flash run")
}
devices, err := a.ListDevices(host)
if err != nil {
return nil, err
}
for _, device := range devices {
if device.Path == path {
return &device, nil
}
}
return nil, fmt.Errorf("device %s is not a current removable flash candidate", path)
}
func (a *App) newJob(kind, node, host, device string) *Job { func (a *App) newJob(kind, node, host, device string) *Job {
job := &Job{ job := &Job{
ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()), ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()),
@ -619,27 +396,6 @@ func (a *App) recentEvents(limit int) []Event {
return events return events
} }
func (a *App) artifacts() map[string]ArtifactSummary {
result := map[string]ArtifactSummary{}
for _, node := range a.inventory.Nodes {
path := a.artifactPath(node.Name)
info, err := os.Stat(path)
if err != nil {
continue
}
result[node.Name] = ArtifactSummary{
Path: path,
UpdatedAt: info.ModTime().UTC(),
SizeBytes: info.Size(),
}
}
return result
}
func (a *App) artifactPath(node string) string {
return filepath.Join(a.settings.ArtifactDir, fmt.Sprintf("%s.img", node))
}
func cachedImageName(source string) string { func cachedImageName(source string) string {
return strings.TrimSuffix(filepath.Base(source), ".xz") return strings.TrimSuffix(filepath.Base(source), ".xz")
} }
@ -656,8 +412,10 @@ func (a *App) flashHosts() []string {
hosts[value] = struct{}{} hosts[value] = struct{}{}
} }
} }
for _, host := range clusterNodeNames() { for _, node := range clusterNodes() {
hosts[host] = struct{}{} if value := strings.TrimSpace(node.Name); value != "" {
hosts[value] = struct{}{}
}
} }
out := make([]string, 0, len(hosts)) out := make([]string, 0, len(hosts))
for host := range hosts { for host := range hosts {
@ -798,11 +556,6 @@ func errorString(err error) string {
return err.Error() return err.Error()
} }
func (a *App) supportsLocalMedia(host string) bool {
host = strings.TrimSpace(host)
return host == "" || host == a.settings.LocalHost || host == a.settings.DefaultFlashHost
}
func deviceScore(device Device) int { func deviceScore(device Device) int {
score := 0 score := 0
model := strings.ToLower(strings.TrimSpace(device.Model)) model := strings.ToLower(strings.TrimSpace(device.Model))
@ -857,99 +610,9 @@ func deleteNodeObject(node string) error {
} }
func deleteNodeObjectInCluster(node string) error { func deleteNodeObjectInCluster(node string) error {
host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST")) kube, err := inClusterKubeClient()
port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT")) if err != nil {
if host == "" || port == "" {
return errors.New("not running in cluster") return errors.New("not running in cluster")
} }
token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") return kube.deleteRequest(fmt.Sprintf("/api/v1/nodes/%s", node))
if err != nil {
return err
}
caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt")
if err != nil {
return err
}
pool := x509.NewCertPool()
if !pool.AppendCertsFromPEM(caPEM) {
return errors.New("append kubernetes CA")
}
client := &http.Client{
Timeout: 15 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{RootCAs: pool},
},
}
req, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("https://%s:%s/api/v1/nodes/%s", host, port, node), nil)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token)))
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusAccepted {
return nil
}
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return fmt.Errorf("delete node %s failed: %s: %s", node, resp.Status, strings.TrimSpace(string(body)))
}
func clusterNodeNames() []string {
host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST"))
port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT"))
if host == "" || port == "" {
return nil
}
token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
if err != nil {
return nil
}
caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt")
if err != nil {
return nil
}
pool := x509.NewCertPool()
if !pool.AppendCertsFromPEM(caPEM) {
return nil
}
client := &http.Client{
Timeout: 10 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{RootCAs: pool},
},
}
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("https://%s:%s/api/v1/nodes", host, port), nil)
if err != nil {
return nil
}
req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token)))
resp, err := client.Do(req)
if err != nil {
return nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil
}
var payload struct {
Items []struct {
Metadata struct {
Name string `json:"name"`
} `json:"metadata"`
} `json:"items"`
}
if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&payload); err != nil {
return nil
}
names := make([]string, 0, len(payload.Items))
for _, item := range payload.Items {
if name := strings.TrimSpace(item.Metadata.Name); name != "" {
names = append(names, name)
}
}
sort.Strings(names)
return names
} }

52
pkg/service/artifacts.go Normal file
View File

@ -0,0 +1,52 @@
package service
import (
"encoding/json"
"os"
"path/filepath"
)
func (a *App) artifacts() map[string]ArtifactSummary {
a.mu.RLock()
defer a.mu.RUnlock()
result := make(map[string]ArtifactSummary, len(a.artifactStore))
for key, value := range a.artifactStore {
result[key] = value
}
return result
}
func (a *App) loadArtifacts() error {
data, err := os.ReadFile(a.settings.ArtifactStatePath)
if err != nil {
return err
}
var artifacts map[string]ArtifactSummary
if err := json.Unmarshal(data, &artifacts); err != nil {
return err
}
a.mu.Lock()
a.artifactStore = artifacts
a.mu.Unlock()
return nil
}
func (a *App) persistArtifacts() error {
a.mu.RLock()
data, err := json.MarshalIndent(a.artifactStore, "", " ")
a.mu.RUnlock()
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(a.settings.ArtifactStatePath), 0o755); err != nil {
return err
}
return os.WriteFile(a.settings.ArtifactStatePath, data, 0o644)
}
func (a *App) recordArtifact(summary ArtifactSummary) error {
a.mu.Lock()
a.artifactStore[summary.Node] = summary
a.mu.Unlock()
return a.persistArtifacts()
}

278
pkg/service/cluster.go Normal file
View File

@ -0,0 +1,278 @@
package service
import (
"bytes"
"crypto/tls"
"crypto/x509"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
"sort"
"strings"
"time"
)
type clusterNode struct {
Name string
Arch string
Hardware string
Worker bool
ControlPlane bool
Unschedulable bool
}
type podState struct {
Name string
Phase string
Reason string
Message string
}
type kubeClient struct {
baseURL string
token string
client *http.Client
}
func inClusterKubeClient() (*kubeClient, error) {
host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST"))
port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT"))
if host == "" || port == "" {
return nil, fmt.Errorf("not running in cluster")
}
token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
if err != nil {
return nil, err
}
caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt")
if err != nil {
return nil, err
}
pool := x509.NewCertPool()
if !pool.AppendCertsFromPEM(caPEM) {
return nil, fmt.Errorf("append kubernetes CA")
}
return &kubeClient{
baseURL: fmt.Sprintf("https://%s:%s", host, port),
token: strings.TrimSpace(string(token)),
client: &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{RootCAs: pool},
},
},
}, nil
}
func (k *kubeClient) jsonRequest(method, path string, body any, out any) error {
var reader io.Reader
if body != nil {
data, err := json.Marshal(body)
if err != nil {
return err
}
reader = bytes.NewReader(data)
}
req, err := http.NewRequest(method, k.baseURL+path, reader)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+k.token)
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := k.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
payload, _ := io.ReadAll(io.LimitReader(resp.Body, 8192))
return fmt.Errorf("%s %s failed: %s: %s", method, path, resp.Status, strings.TrimSpace(string(payload)))
}
if out == nil {
return nil
}
return json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(out)
}
func (k *kubeClient) deleteRequest(path string) error {
req, err := http.NewRequest(http.MethodDelete, k.baseURL+path, nil)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+k.token)
resp, err := k.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusAccepted {
return nil
}
payload, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return fmt.Errorf("delete %s failed: %s: %s", path, resp.Status, strings.TrimSpace(string(payload)))
}
func clusterNodes() []clusterNode {
kube, err := inClusterKubeClient()
if err != nil {
return nil
}
var payload struct {
Items []struct {
Metadata struct {
Name string `json:"name"`
Labels map[string]string `json:"labels"`
} `json:"metadata"`
Spec struct {
Unschedulable bool `json:"unschedulable"`
} `json:"spec"`
} `json:"items"`
}
if err := kube.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &payload); err != nil {
return nil
}
nodes := make([]clusterNode, 0, len(payload.Items))
for _, item := range payload.Items {
labels := item.Metadata.Labels
nodes = append(nodes, clusterNode{
Name: strings.TrimSpace(item.Metadata.Name),
Arch: strings.TrimSpace(labels["kubernetes.io/arch"]),
Hardware: strings.TrimSpace(labels["hardware"]),
Worker: labels["node-role.kubernetes.io/worker"] == "true",
ControlPlane: labels["node-role.kubernetes.io/control-plane"] != "" || labels["node-role.kubernetes.io/master"] != "",
Unschedulable: item.Spec.Unschedulable,
})
}
sort.Slice(nodes, func(i, j int) bool { return nodes[i].Name < nodes[j].Name })
return nodes
}
func (a *App) podImageForArch(arch string) string {
switch strings.TrimSpace(arch) {
case "arm64":
return strings.TrimSpace(a.settings.RunnerImageARM64)
case "amd64":
return strings.TrimSpace(a.settings.RunnerImageAMD64)
default:
return ""
}
}
func (a *App) runRemotePod(jobID, podName string, podSpec map[string]any) (string, error) {
kube, err := inClusterKubeClient()
if err != nil {
return "", err
}
ns := url.PathEscape(a.settings.Namespace)
_ = kube.deleteRequest(fmt.Sprintf("/api/v1/namespaces/%s/pods/%s", ns, url.PathEscape(podName)))
defer func() {
_ = kube.deleteRequest(fmt.Sprintf("/api/v1/namespaces/%s/pods/%s", ns, url.PathEscape(podName)))
}()
if err := kube.jsonRequest(http.MethodPost, fmt.Sprintf("/api/v1/namespaces/%s/pods", ns), podSpec, nil); err != nil {
return "", err
}
deadline := time.Now().Add(12 * time.Minute)
for time.Now().Before(deadline) {
state, err := a.remotePodState(kube, podName)
if err != nil {
return "", err
}
switch state.Phase {
case "Succeeded":
return a.remotePodLogs(kube, podName)
case "Failed":
logs, _ := a.remotePodLogs(kube, podName)
if strings.TrimSpace(logs) != "" {
return "", fmt.Errorf("remote pod %s failed: %s", podName, strings.TrimSpace(logs))
}
return "", fmt.Errorf("remote pod %s failed: %s %s", podName, state.Reason, state.Message)
}
time.Sleep(2 * time.Second)
}
return "", fmt.Errorf("remote pod %s timed out", podName)
}
func (a *App) remotePodState(kube *kubeClient, podName string) (podState, error) {
var payload struct {
Metadata struct {
Name string `json:"name"`
} `json:"metadata"`
Status struct {
Phase string `json:"phase"`
Reason string `json:"reason"`
Message string `json:"message"`
Conditions []struct {
Type string `json:"type"`
Status string `json:"status"`
Reason string `json:"reason"`
Message string `json:"message"`
} `json:"conditions"`
ContainerStatuses []struct {
State struct {
Waiting struct {
Reason string `json:"reason"`
Message string `json:"message"`
} `json:"waiting"`
Terminated struct {
Reason string `json:"reason"`
Message string `json:"message"`
} `json:"terminated"`
} `json:"state"`
} `json:"containerStatuses"`
} `json:"status"`
}
ns := url.PathEscape(a.settings.Namespace)
if err := kube.jsonRequest(http.MethodGet, fmt.Sprintf("/api/v1/namespaces/%s/pods/%s", ns, url.PathEscape(podName)), nil, &payload); err != nil {
return podState{}, err
}
out := podState{
Name: payload.Metadata.Name,
Phase: payload.Status.Phase,
Reason: payload.Status.Reason,
Message: payload.Status.Message,
}
if len(payload.Status.ContainerStatuses) > 0 {
waiting := payload.Status.ContainerStatuses[0].State.Waiting
terminated := payload.Status.ContainerStatuses[0].State.Terminated
if strings.TrimSpace(waiting.Reason) != "" {
out.Reason = waiting.Reason
out.Message = waiting.Message
}
if strings.TrimSpace(terminated.Reason) != "" {
out.Reason = terminated.Reason
if strings.TrimSpace(terminated.Message) != "" {
out.Message = terminated.Message
}
}
}
return out, nil
}
func (a *App) remotePodLogs(kube *kubeClient, podName string) (string, error) {
ns := url.PathEscape(a.settings.Namespace)
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/api/v1/namespaces/%s/pods/%s/log", kube.baseURL, ns, url.PathEscape(podName)), nil)
if err != nil {
return "", err
}
req.Header.Set("Authorization", "Bearer "+kube.token)
resp, err := kube.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return "", fmt.Errorf("pod logs %s failed: %s: %s", podName, resp.Status, strings.TrimSpace(string(body)))
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if err != nil {
return "", err
}
return string(body), nil
}

131
pkg/service/harbor.go Normal file
View File

@ -0,0 +1,131 @@
package service
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"sort"
"strings"
"time"
)
func (a *App) artifactRepo(node string) string {
return fmt.Sprintf("%s/%s/%s", strings.TrimRight(a.settings.HarborRegistry, "/"), strings.Trim(a.settings.HarborProject, "/"), node)
}
func (a *App) ensureHarborProject() error {
if strings.TrimSpace(a.settings.HarborAPIBase) == "" || strings.TrimSpace(a.settings.HarborPassword) == "" {
return fmt.Errorf("harbor admin credentials are not configured")
}
client := &http.Client{Timeout: 30 * time.Second}
project := strings.TrimSpace(a.settings.HarborProject)
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/projects?name=%s", strings.TrimRight(a.settings.HarborAPIBase, "/"), url.QueryEscape(project)), nil)
if err != nil {
return err
}
req.SetBasicAuth(strings.TrimSpace(a.settings.HarborUsername), strings.TrimSpace(a.settings.HarborPassword))
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return fmt.Errorf("harbor project lookup failed: %s: %s", resp.Status, strings.TrimSpace(string(body)))
}
var projects []struct {
Name string `json:"name"`
}
if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&projects); err != nil {
return err
}
for _, item := range projects {
if strings.EqualFold(strings.TrimSpace(item.Name), project) {
return nil
}
}
payload := map[string]any{
"project_name": project,
"metadata": map[string]string{"public": "false"},
}
data, err := json.Marshal(payload)
if err != nil {
return err
}
req, err = http.NewRequest(http.MethodPost, fmt.Sprintf("%s/projects", strings.TrimRight(a.settings.HarborAPIBase, "/")), bytes.NewReader(data))
if err != nil {
return err
}
req.SetBasicAuth(strings.TrimSpace(a.settings.HarborUsername), strings.TrimSpace(a.settings.HarborPassword))
req.Header.Set("Content-Type", "application/json")
resp, err = client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict {
return nil
}
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return fmt.Errorf("harbor project create failed: %s: %s", resp.Status, strings.TrimSpace(string(body)))
}
func (a *App) pruneHarborArtifacts(node string, keep int) error {
client := &http.Client{Timeout: 30 * time.Second}
repo := url.PathEscape(node)
apiBase := strings.TrimRight(a.settings.HarborAPIBase, "/")
project := url.PathEscape(strings.TrimSpace(a.settings.HarborProject))
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/projects/%s/repositories/%s/artifacts?page_size=100&with_tag=true", apiBase, project, repo), nil)
if err != nil {
return err
}
req.SetBasicAuth(strings.TrimSpace(a.settings.HarborUsername), strings.TrimSpace(a.settings.HarborPassword))
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
return nil
}
if resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return fmt.Errorf("harbor artifact list failed: %s: %s", resp.Status, strings.TrimSpace(string(body)))
}
var artifacts []struct {
Digest string `json:"digest"`
PushTime string `json:"push_time"`
Tags []struct {
Name string `json:"name"`
} `json:"tags"`
}
if err := json.NewDecoder(io.LimitReader(resp.Body, 2<<20)).Decode(&artifacts); err != nil {
return err
}
sort.Slice(artifacts, func(i, j int) bool {
return artifacts[i].PushTime > artifacts[j].PushTime
})
for idx, artifact := range artifacts {
if idx < keep {
continue
}
ref := url.PathEscape(artifact.Digest)
req, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s/projects/%s/repositories/%s/artifacts/%s", apiBase, project, repo, ref), nil)
if err != nil {
return err
}
req.SetBasicAuth(strings.TrimSpace(a.settings.HarborUsername), strings.TrimSpace(a.settings.HarborPassword))
resp, err := client.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted && resp.StatusCode != http.StatusNotFound {
return fmt.Errorf("harbor artifact delete failed for %s: %s", artifact.Digest, resp.Status)
}
}
return nil
}

455
pkg/service/remote.go Normal file
View File

@ -0,0 +1,455 @@
package service
import (
"encoding/json"
"fmt"
"path/filepath"
"sort"
"strings"
"time"
"metis/pkg/inventory"
)
const hostTmpDevicePath = "hosttmp:///tmp"
func (a *App) ListDevices(host string) ([]Device, error) {
if host == "" {
host = a.settings.DefaultFlashHost
}
nodeMap := map[string]clusterNode{}
for _, node := range clusterNodes() {
nodeMap[node.Name] = node
}
target, ok := nodeMap[host]
if !ok {
return nil, fmt.Errorf("flash host %s is not a current cluster node", host)
}
image := a.podImageForArch(target.Arch)
if image == "" {
return nil, fmt.Errorf("no runner image configured for arch %s", target.Arch)
}
podName := fmt.Sprintf("metis-devices-%d", time.Now().UTC().UnixNano())
logs, err := a.runRemotePod("", podName, a.remoteDevicePodSpec(podName, host, image))
if err != nil {
return nil, err
}
var payload struct {
Devices []Device `json:"devices"`
}
if err := json.Unmarshal([]byte(strings.TrimSpace(logs)), &payload); err != nil {
return nil, fmt.Errorf("decode remote devices: %w: %s", err, strings.TrimSpace(logs))
}
sort.Slice(payload.Devices, func(i, j int) bool {
left := deviceScore(payload.Devices[i])
right := deviceScore(payload.Devices[j])
if left != right {
return left > right
}
if payload.Devices[i].SizeBytes != payload.Devices[j].SizeBytes {
return payload.Devices[i].SizeBytes < payload.Devices[j].SizeBytes
}
return payload.Devices[i].Path < payload.Devices[j].Path
})
return payload.Devices, nil
}
func (a *App) runBuild(job *Job, flash bool) {
nodeSpec, class, err := a.inventory.FindNode(job.Node)
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
if err := a.ensureHarborProject(); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
builder, err := a.selectBuilderHost(class.Arch, job.Host)
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
job.Builder = builder.Name
buildTag := time.Now().UTC().Format("20060102t150405z")
artifactRef := a.artifactRepo(job.Node)
a.setJob(job.ID, func(j *Job) {
j.Status = JobRunning
j.Stage = "build"
j.Message = fmt.Sprintf("Building on %s (%s) and publishing to Harbor", builder.Name, builder.Arch)
j.ProgressPct = 8
j.Artifact = artifactRef + ":latest"
j.Builder = builder.Name
})
buildImage := a.podImageForArch(builder.Arch)
if buildImage == "" {
a.failJob(job.ID, fmt.Errorf("no runner image configured for arch %s", builder.Arch))
a.metrics.RecordBuild(job.Node, "error")
return
}
buildPod := fmt.Sprintf("metis-build-%d", time.Now().UTC().UnixNano())
logs, err := a.runRemotePod(job.ID, buildPod, a.remoteBuildPodSpec(buildPod, builder.Name, buildImage, job.Node, artifactRef, buildTag))
if err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
var summary ArtifactSummary
if err := json.Unmarshal([]byte(strings.TrimSpace(logs)), &summary); err != nil {
a.failJob(job.ID, fmt.Errorf("decode remote build output: %w: %s", err, strings.TrimSpace(logs)))
a.metrics.RecordBuild(job.Node, "error")
return
}
summary.Node = job.Node
summary.Ref = artifactRef + ":latest"
summary.BuilderHost = builder.Name
if err := a.recordArtifact(summary); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordBuild(job.Node, "error")
return
}
if err := a.pruneHarborArtifacts(job.Node, 3); err != nil {
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "artifact.prune.warning",
Summary: fmt.Sprintf("Harbor cleanup warning for %s", job.Node),
Details: map[string]any{"node": job.Node, "error": err.Error()},
})
}
a.metrics.RecordBuild(job.Node, "ok")
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "image.build",
Summary: fmt.Sprintf("Built replacement image for %s on %s", job.Node, builder.Name),
Details: map[string]any{"node": job.Node, "artifact": artifactRef + ":latest", "builder": builder.Name},
})
if !flash {
a.completeJob(job.ID, func(j *Job) {
j.Stage = "complete"
j.Message = "Image build complete"
j.ProgressPct = 100
j.Artifact = artifactRef + ":latest"
})
return
}
a.setJob(job.ID, func(j *Job) {
j.Stage = "preflight"
j.Message = fmt.Sprintf("Preparing to flash from Harbor on %s", j.Host)
j.ProgressPct = 78
j.Artifact = artifactRef + ":latest"
})
if _, err := a.ensureDevice(job.Host, job.Device); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordFlash(job.Node, job.Host, "error")
return
}
if !strings.HasPrefix(job.Device, "hosttmp://") {
if err := deleteNodeObject(job.Node); err != nil {
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "node.delete.warning",
Summary: fmt.Sprintf("Could not delete stale Kubernetes node object for %s", job.Node),
Details: map[string]any{"node": job.Node, "error": err.Error()},
})
}
}
if err := a.flashArtifact(job.ID, artifactRef); err != nil {
a.failJob(job.ID, err)
a.metrics.RecordFlash(job.Node, job.Host, "error")
return
}
a.metrics.RecordFlash(job.Node, job.Host, "ok")
a.appendEvent(Event{
Time: time.Now().UTC(),
Kind: "image.flash",
Summary: fmt.Sprintf("Flashed %s latest image on %s", job.Node, job.Host),
Details: map[string]any{"node": job.Node, "device": job.Device, "host": job.Host, "artifact": artifactRef + ":latest"},
})
a.completeJob(job.ID, func(j *Job) {
j.Stage = "complete"
if strings.HasPrefix(j.Device, "hosttmp://") {
j.Message = fmt.Sprintf("Test flash complete on %s host /tmp.", j.Host)
} else {
j.Message = fmt.Sprintf("Flash complete on %s. Move the card into %s and power-cycle it.", j.Host, j.Node)
}
j.ProgressPct = 100
j.Artifact = artifactRef + ":latest"
})
_ = nodeSpec
}
func (a *App) flashArtifact(jobID, artifactRef string) error {
nodes := clusterNodes()
nodeMap := map[string]clusterNode{}
for _, node := range nodes {
nodeMap[node.Name] = node
}
target, ok := nodeMap[a.job(jobID).Host]
if !ok {
return fmt.Errorf("flash host %s is not a current cluster node", a.job(jobID).Host)
}
image := a.podImageForArch(target.Arch)
if image == "" {
return fmt.Errorf("no runner image configured for arch %s", target.Arch)
}
a.setJob(jobID, func(j *Job) {
j.Stage = "flash"
j.Message = fmt.Sprintf("Pulling %s and writing it on %s", artifactRef+":latest", j.Host)
j.ProgressPct = 84
})
podName := fmt.Sprintf("metis-flash-%d", time.Now().UTC().UnixNano())
logs, err := a.runRemotePod(jobID, podName, a.remoteFlashPodSpec(podName, target.Name, image, a.job(jobID).Node, a.job(jobID).Device, artifactRef))
if err != nil {
return err
}
var payload map[string]any
if err := json.Unmarshal([]byte(strings.TrimSpace(logs)), &payload); err == nil {
a.setJob(jobID, func(j *Job) {
if dest, ok := payload["dest_path"].(string); ok && dest != "" {
j.Message = fmt.Sprintf("Wrote latest artifact to %s", dest)
}
})
}
return nil
}
func (a *App) ensureDevice(host, path string) (*Device, error) {
if strings.TrimSpace(path) == "" {
return nil, fmt.Errorf("select removable media before starting a flash run")
}
devices, err := a.ListDevices(host)
if err != nil {
return nil, err
}
for _, device := range devices {
if device.Path == path {
return &device, nil
}
}
return nil, fmt.Errorf("device %s is not a current flash candidate on %s", path, host)
}
func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) {
nodes := clusterNodes()
storageNodes := map[string]struct{}{}
for _, node := range a.inventory.Nodes {
if len(node.LonghornDisks) > 0 {
storageNodes[node.Name] = struct{}{}
}
}
type scored struct {
node clusterNode
score int
}
candidates := make([]scored, 0)
for _, node := range nodes {
if node.Arch != arch || node.Unschedulable || node.ControlPlane {
continue
}
score := 0
if node.Worker {
score += 40
}
switch arch {
case "arm64":
if node.Hardware == "rpi5" {
score += 30
}
if _, storage := storageNodes[node.Name]; storage {
score -= 50
}
case "amd64":
if node.Name == a.settings.DefaultFlashHost {
score += 30
}
if node.Name == "titan-24" {
score -= 10
}
}
if flashHost != "" && node.Name == flashHost {
score += 5
}
candidates = append(candidates, scored{node: node, score: score})
}
sort.Slice(candidates, func(i, j int) bool {
if candidates[i].score != candidates[j].score {
return candidates[i].score > candidates[j].score
}
return candidates[i].node.Name < candidates[j].node.Name
})
if len(candidates) == 0 {
return clusterNode{}, fmt.Errorf("no build host available for arch %s", arch)
}
return candidates[0].node, nil
}
func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "devices"},
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-devices",
"image": image,
"imagePullPolicy": "Always",
"command": []string{
"metis", "remote-devices",
"--max-device-bytes", fmt.Sprintf("%d", a.settings.MaxDeviceBytes),
"--host-tmp-dir", filepath.Join("/host-tmp", strings.TrimPrefix(a.settings.HostTmpDir, "/")),
},
"securityContext": map[string]any{"privileged": true, "runAsUser": 0},
"volumeMounts": []map[string]any{
{"name": "host-dev", "mountPath": "/dev"},
{"name": "host-sys", "mountPath": "/sys", "readOnly": true},
{"name": "host-udev", "mountPath": "/run/udev", "readOnly": true},
{"name": "host-tmp", "mountPath": "/host-tmp"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}},
{"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}},
{"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}},
{"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}},
},
},
}
}
func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "build"},
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-build",
"image": image,
"imagePullPolicy": "Always",
"command": []string{
"metis", "remote-build",
"--inventory", a.settings.InventoryPath,
"--node", node,
"--cache", "/workspace/cache",
"--work-dir", "/workspace/build",
"--artifact-ref", artifactRef,
"--build-tag", buildTag,
"--harbor-registry", a.settings.HarborRegistry,
},
"envFrom": []map[string]any{
{"configMapRef": map[string]any{"name": "metis"}},
{"secretRef": map[string]any{"name": "metis-harbor"}},
},
"env": []map[string]any{
{"name": "METIS_K3S_TOKEN", "valueFrom": map[string]any{"secretKeyRef": map[string]any{"name": "metis-runtime", "key": "k3s_token", "optional": true}}},
},
"volumeMounts": []map[string]any{
{"name": "workspace", "mountPath": "/workspace"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "workspace", "emptyDir": map[string]any{}},
},
},
}
}
func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "flash"},
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-flash",
"image": image,
"imagePullPolicy": "Always",
"command": []string{
"metis", "remote-flash",
"--node", node,
"--device", device,
"--artifact-ref", artifactRef,
"--work-dir", "/workspace/flash",
"--harbor-registry", a.settings.HarborRegistry,
"--host-tmp-dir", filepath.Join("/host-tmp", strings.TrimPrefix(a.settings.HostTmpDir, "/")),
},
"securityContext": map[string]any{"privileged": true, "runAsUser": 0},
"envFrom": []map[string]any{
{"configMapRef": map[string]any{"name": "metis"}},
{"secretRef": map[string]any{"name": "metis-harbor"}},
},
"volumeMounts": []map[string]any{
{"name": "workspace", "mountPath": "/workspace"},
{"name": "host-dev", "mountPath": "/dev"},
{"name": "host-sys", "mountPath": "/sys", "readOnly": true},
{"name": "host-udev", "mountPath": "/run/udev", "readOnly": true},
{"name": "host-tmp", "mountPath": "/host-tmp"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "workspace", "emptyDir": map[string]any{}},
{"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}},
{"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}},
{"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}},
{"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}},
},
},
}
}
func (a *App) remoteArtifactNote(node string) string {
if summary, ok := a.artifacts()[node]; ok && strings.TrimSpace(summary.Ref) != "" {
return summary.Ref
}
return a.artifactRepo(node) + ":latest"
}
func inventoryNodeArch(spec *inventory.NodeSpec, class *inventory.NodeClass) string {
if class != nil && strings.TrimSpace(class.Arch) != "" {
return strings.TrimSpace(class.Arch)
}
return "arm64"
}

View File

@ -725,10 +725,7 @@ var metisPage = template.Must(template.New("metis").Parse(`<!doctype html>
} }
const selectedHost = hostSelect.value || state.default_flash_host; const selectedHost = hostSelect.value || state.default_flash_host;
const hostIsLocal = selectedHost === state.local_host || selectedHost === state.default_flash_host; hostNoteEl.textContent = 'Metis will inspect media and run the flash writer on ' + selectedHost + ' through a short-lived in-cluster worker. ' + state.default_flash_host + ' remains the default flash host.';
hostNoteEl.textContent = hostIsLocal
? 'Metis is running on ' + state.local_host + ', so media detection and flashing are live for this host.'
: 'The selected host is listed from cluster inventory, but this Metis instance only has direct media access on ' + state.local_host + '.';
if(state.device_error){ if(state.device_error){
deviceNoteEl.textContent = state.device_error; deviceNoteEl.textContent = state.device_error;
@ -739,9 +736,9 @@ var metisPage = template.Must(template.New("metis").Parse(`<!doctype html>
} }
const artifact = (state.artifacts || {})[nodeSelect.value]; const artifact = (state.artifacts || {})[nodeSelect.value];
artifactNoteEl.textContent = artifact && artifact.path artifactNoteEl.textContent = artifact && artifact.ref
? 'Latest built image: ' + artifact.path ? 'Latest published image: ' + artifact.ref + ' (Metis keeps the newest 3 builds in Harbor).'
: 'Successful build-only runs are stored on ' + state.local_host + ' under /var/lib/metis/artifacts/<node>.img.'; : 'Successful build-only runs publish <node>:latest into Harbor and keep the newest 3 builds per node.';
document.getElementById('build-only').disabled = busy || !nodeSelect.value; document.getElementById('build-only').disabled = busy || !nodeSelect.value;
document.getElementById('refresh-devices').disabled = busy; document.getElementById('refresh-devices').disabled = busy;
@ -825,7 +822,7 @@ var metisPage = template.Must(template.New("metis").Parse(`<!doctype html>
await runAction('Starting image build', 'Queueing the node image build now.', async ()=>{ await runAction('Starting image build', 'Queueing the node image build now.', async ()=>{
await post('/api/jobs/build', {node: nodeSelect.value}); await post('/api/jobs/build', {node: nodeSelect.value});
await refreshState({silent:true}); await refreshState({silent:true});
banner('success', 'Image build queued', 'Metis started building the replacement image for ' + nodeSelect.value + '. Successful build-only runs land on ' + state.local_host + ' at /var/lib/metis/artifacts/' + nodeSelect.value + '.img.'); banner('success', 'Image build queued', 'Metis started building the replacement image for ' + nodeSelect.value + '. Successful build-only runs publish ' + nodeSelect.value + ':latest to Harbor and keep the newest 3 builds.');
}); });
}); });

View File

@ -9,18 +9,28 @@ import (
// Settings configures the Metis service runtime. // Settings configures the Metis service runtime.
type Settings struct { type Settings struct {
BindAddr string BindAddr string
InventoryPath string InventoryPath string
CacheDir string CacheDir string
ArtifactDir string ArtifactDir string
HistoryPath string ArtifactStatePath string
SnapshotsPath string HistoryPath string
TargetsPath string SnapshotsPath string
DefaultFlashHost string TargetsPath string
FlashHosts []string DefaultFlashHost string
LocalHost string FlashHosts []string
AllowedGroups []string LocalHost string
MaxDeviceBytes int64 AllowedGroups []string
MaxDeviceBytes int64
Namespace string
RunnerImageAMD64 string
RunnerImageARM64 string
HarborRegistry string
HarborProject string
HarborAPIBase string
HarborUsername string
HarborPassword string
HostTmpDir string
} }
// FromEnv builds service settings with sensible defaults for local dev and in-cluster use. // FromEnv builds service settings with sensible defaults for local dev and in-cluster use.
@ -30,18 +40,28 @@ func FromEnv() Settings {
defaultFlashHost := getenvDefault("METIS_DEFAULT_FLASH_HOST", localHost) defaultFlashHost := getenvDefault("METIS_DEFAULT_FLASH_HOST", localHost)
flashHosts := splitList(getenvDefault("METIS_FLASH_HOSTS", defaultFlashHost)) flashHosts := splitList(getenvDefault("METIS_FLASH_HOSTS", defaultFlashHost))
return Settings{ return Settings{
BindAddr: getenvDefault("METIS_BIND_ADDR", ":8080"), BindAddr: getenvDefault("METIS_BIND_ADDR", ":8080"),
InventoryPath: getenvDefault("METIS_INVENTORY_PATH", "inventory.titan-rpi4.yaml"), InventoryPath: getenvDefault("METIS_INVENTORY_PATH", "inventory.titan-rpi4.yaml"),
CacheDir: getenvDefault("METIS_CACHE_DIR", filepath.Join(dataDir, "cache")), CacheDir: getenvDefault("METIS_CACHE_DIR", filepath.Join(dataDir, "cache")),
ArtifactDir: getenvDefault("METIS_ARTIFACT_DIR", filepath.Join(dataDir, "artifacts")), ArtifactDir: getenvDefault("METIS_ARTIFACT_DIR", filepath.Join(dataDir, "artifacts")),
HistoryPath: getenvDefault("METIS_HISTORY_PATH", filepath.Join(dataDir, "history.jsonl")), ArtifactStatePath: getenvDefault("METIS_ARTIFACT_STATE_PATH", filepath.Join(dataDir, "artifacts.json")),
SnapshotsPath: getenvDefault("METIS_SNAPSHOTS_PATH", filepath.Join(dataDir, "snapshots.json")), HistoryPath: getenvDefault("METIS_HISTORY_PATH", filepath.Join(dataDir, "history.jsonl")),
TargetsPath: getenvDefault("METIS_TARGETS_PATH", filepath.Join(dataDir, "targets.json")), SnapshotsPath: getenvDefault("METIS_SNAPSHOTS_PATH", filepath.Join(dataDir, "snapshots.json")),
DefaultFlashHost: defaultFlashHost, TargetsPath: getenvDefault("METIS_TARGETS_PATH", filepath.Join(dataDir, "targets.json")),
FlashHosts: flashHosts, DefaultFlashHost: defaultFlashHost,
LocalHost: localHost, FlashHosts: flashHosts,
AllowedGroups: splitList(getenvDefault("METIS_ALLOWED_GROUPS", "admin,maintainer")), LocalHost: localHost,
MaxDeviceBytes: getenvInt64("METIS_MAX_DEVICE_BYTES", 300000000000), AllowedGroups: splitList(getenvDefault("METIS_ALLOWED_GROUPS", "admin,maintainer")),
MaxDeviceBytes: getenvInt64("METIS_MAX_DEVICE_BYTES", 300000000000),
Namespace: getenvDefault("METIS_NAMESPACE", "maintenance"),
RunnerImageAMD64: getenvDefault("METIS_RUNNER_IMAGE_AMD64", ""),
RunnerImageARM64: getenvDefault("METIS_RUNNER_IMAGE_ARM64", ""),
HarborRegistry: getenvDefault("METIS_HARBOR_REGISTRY", "registry.bstein.dev"),
HarborProject: getenvDefault("METIS_HARBOR_PROJECT", "metis"),
HarborAPIBase: getenvDefault("METIS_HARBOR_API_BASE", "https://registry.bstein.dev/api/v2.0"),
HarborUsername: getenvDefault("METIS_HARBOR_USERNAME", ""),
HarborPassword: getenvDefault("METIS_HARBOR_PASSWORD", ""),
HostTmpDir: getenvDefault("METIS_HOST_TMP_DIR", "/tmp/metis-flash-test"),
} }
} }