metis: tighten gate coverage and metrics

This commit is contained in:
Brad Stein 2026-04-11 00:17:10 -03:00
parent feeeeeda3a
commit cb2498b1df
80 changed files with 5988 additions and 1902 deletions

12
Jenkinsfile vendored
View File

@ -154,6 +154,18 @@ spec:
}
}
stage('Quality gate') {
steps {
container('tester') {
sh '''
set -eu
cd testing
go test -v ./...
'''
}
}
}
stage('Prep toolchain') {
steps {
container('builder') {

View File

@ -0,0 +1,38 @@
package main
import (
"os"
"path/filepath"
"testing"
"metis/pkg/sentinel"
)
func TestSentinelNsenterAndErrorBranches(t *testing.T) {
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("nsenter", `while [[ "${1:-}" != "--" ]]; do shift; done
shift
exec "$@"`)
write("hostname", `printf 'titan-13\n'`)
write("uname", `printf '6.6.63\n'`)
write("k3s", `printf 'v1.31.5+k3s1\n'`)
write("containerd", `printf '1.7.99\n'`)
write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`)
write("dpkg-query", `printf '1.0.0\n'`)
write("rpm", `printf '1.0.0\n'`)
t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH"))
t.Setenv("METIS_SENTINEL_NSENTER", "1")
snap := sentinel.Collect()
if snap.Hostname != "titan-13" || snap.OSImage != "Metis OS" {
t.Fatalf("Collect via nsenter = %#v", snap)
}
if err := pushSnapshot("http://127.0.0.1:1", snap); err == nil {
t.Fatal("expected pushSnapshot error")
}
}

View File

@ -14,6 +14,8 @@ import (
"metis/pkg/sentinel"
)
var fatalf = log.Fatalf
func main() {
interval := time.Duration(getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300)) * time.Second
pushURL := os.Getenv("METIS_SENTINEL_PUSH_URL")
@ -24,7 +26,7 @@ func main() {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
if err := enc.Encode(snap); err != nil {
log.Fatalf("encode: %v", err)
fatalf("encode: %v", err)
}
if out := os.Getenv("METIS_SENTINEL_OUT"); out != "" {
writeHistory(out, snap)

View File

@ -0,0 +1,115 @@
package main
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"metis/pkg/sentinel"
)
func TestSentinelMainWritesHistoryAndPushesSnapshot(t *testing.T) {
fakeDir := fakeSentinelCommands(t)
t.Setenv("PATH", fakeDir+string(os.PathListSeparator)+os.Getenv("PATH"))
historyDir := filepath.Join(t.TempDir(), "history")
pushed := false
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
pushed = true
if r.Method != http.MethodPost {
t.Fatalf("expected POST, got %s", r.Method)
}
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
t.Setenv("METIS_SENTINEL_RUN_ONCE", "1")
t.Setenv("METIS_SENTINEL_OUT", historyDir)
t.Setenv("METIS_SENTINEL_PUSH_URL", srv.URL)
t.Setenv("METIS_SENTINEL_INTERVAL_SEC", "1")
main()
entries, err := os.ReadDir(historyDir)
if err != nil {
t.Fatalf("ReadDir history: %v", err)
}
if len(entries) != 1 {
t.Fatalf("expected one history entry, got %d", len(entries))
}
data, err := os.ReadFile(filepath.Join(historyDir, entries[0].Name()))
if err != nil {
t.Fatalf("ReadFile history: %v", err)
}
if !strings.Contains(string(data), `"hostname": "titan-13"`) {
t.Fatalf("history file missing snapshot data: %s", data)
}
if !pushed {
t.Fatal("expected pushSnapshot to POST to server")
}
}
func TestSentinelHelpers(t *testing.T) {
if got := getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300); got != 300 {
t.Fatalf("getenvInt fallback = %d", got)
}
t.Setenv("METIS_SENTINEL_INTERVAL_SEC", "5")
if got := getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300); got != 5 {
t.Fatalf("getenvInt = %d", got)
}
dir := t.TempDir()
snap := &sentinel.Snapshot{Hostname: "titan-13", Kernel: "6.6.63"}
writeHistory(dir, snap)
entries, err := os.ReadDir(dir)
if err != nil {
t.Fatalf("ReadDir: %v", err)
}
if len(entries) != 1 {
t.Fatalf("expected one file, got %d", len(entries))
}
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
t.Fatalf("expected POST, got %s", r.Method)
}
var payload map[string]any
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
t.Fatalf("decode push body: %v", err)
}
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
if err := pushSnapshot(srv.URL, snap); err != nil {
t.Fatalf("pushSnapshot: %v", err)
}
}
func fakeSentinelCommands(t *testing.T) string {
t.Helper()
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("hostname", `printf 'titan-13\n'`)
write("uname", `printf '6.6.63\n'`)
write("k3s", `printf 'v1.31.5+k3s1\n'`)
write("containerd", `printf '1.7.99\n'`)
write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`)
write("dpkg-query", `case "${@: -1}" in
containerd) printf '1.7.99\n' ;;
k3s) printf 'v1.31.5+k3s1\n' ;;
nvidia-container-toolkit) printf '1.16.2\n' ;;
linux-image-raspi) printf '6.6.63\n' ;;
*) printf '1.0.0\n' ;;
esac`)
write("rpm", `printf '1.0.0\n'`)
return dir
}

View File

@ -1,28 +1,27 @@
package main
import (
"encoding/json"
"flag"
"log"
"os"
"encoding/json"
"flag"
"os"
"metis/pkg/config"
"metis/pkg/config"
)
func configCmd(args []string) {
fs := flag.NewFlagSet("config", flag.ExitOnError)
invPath := fs.String("inventory", "inventory.yaml", "inventory file")
node := fs.String("node", "", "target node")
fs.Parse(args)
if *node == "" {
log.Fatalf("--node is required")
}
inv := loadInventory(*invPath)
cfg, err := config.Build(inv, *node)
if err != nil {
log.Fatalf("config build: %v", err)
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
_ = enc.Encode(cfg)
fs := flag.NewFlagSet("config", flag.ExitOnError)
invPath := fs.String("inventory", "inventory.yaml", "inventory file")
node := fs.String("node", "", "target node")
fs.Parse(args)
if *node == "" {
fatalf("--node is required")
}
inv := loadInventory(*invPath)
cfg, err := config.Build(inv, *node)
if err != nil {
fatalf("config build: %v", err)
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
_ = enc.Encode(cfg)
}

View File

@ -4,7 +4,6 @@ import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"metis/pkg/facts"
@ -18,11 +17,11 @@ func factsCmd(args []string) {
fs.Parse(args)
inv, err := inventory.Load(*invPath)
if err != nil {
log.Fatalf("load inventory: %v", err)
fatalf("load inventory: %v", err)
}
snaps, err := facts.LoadDir(*dir)
if err != nil {
log.Fatalf("load snapshots: %v", err)
fatalf("load snapshots: %v", err)
}
sum := facts.Aggregate(inv, snaps)
enc := json.NewEncoder(os.Stdout)

244
cmd/metis/gate_test.go Normal file
View File

@ -0,0 +1,244 @@
package main
import (
"net/http"
"os"
"path/filepath"
"strings"
"testing"
)
func TestMainDispatchAllCommands(t *testing.T) {
root := t.TempDir()
invPath, baseImage := writeTestInventory(t, root)
snapDir := filepath.Join(root, "snapshots")
if err := os.MkdirAll(snapDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(snapDir, "snap.json"), []byte(`{"hostname":"titan-15","kernel":"6.6.63","package_sample":{"containerd":"1.7"}}`), 0o644); err != nil {
t.Fatal(err)
}
fakeTools := fakeCommandDir(t, map[string]string{
"lsblk": `cat <<'JSON'
{"blockdevices":[{"name":"sdz","path":"/dev/sdz","rm":true,"hotplug":true,"size":"32000000000","model":"Micro SD","tran":"usb","type":"disk"}]}
JSON`,
"sfdisk": `cat <<'JSON'
{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}}
JSON`,
"debugfs": `if [[ "${1:-}" == "-w" ]]; then
cp "${3:-}" "${4:-}.commands"
exit 0
fi
if [[ "${1:-}" == "-R" ]]; then
state="${3:-}.commands"
set -- $2
case "${1:-}" in
stat)
mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)"
mode="${mode: -4}"
printf 'Mode: %s\n' "${mode}"
exit 0
;;
dump)
local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)"
cat "${local_path}" > "${3:-}"
exit 0
;;
esac
fi
exit 0`,
"xz": `case "${1:-}" in
-T0) cp "${@: -1}" "${@: -1}.xz" ;;
-dc) cat "${2:-}" ;;
esac
exit 0`,
"oras": `case "${1:-}" in
login|tag) exit 0 ;;
push) exit 0 ;;
pull)
outdir="${@: -1}"
cp "` + baseImage + `" "${outdir}/titan-15.img"
exit 0
;;
esac
exit 0`,
})
t.Setenv("PATH", fakeTools+string(os.PathListSeparator)+os.Getenv("PATH"))
t.Setenv("METIS_INVENTORY_PATH", invPath)
t.Setenv("METIS_DATA_DIR", filepath.Join(root, "data"))
listenAndServe = func(addr string, _ http.Handler) error { return nil }
t.Cleanup(func() { listenAndServe = httpListenAndServe })
callMain := func(args ...string) {
oldArgs := os.Args
os.Args = append([]string{"metis"}, args...)
defer func() { os.Args = oldArgs }()
main()
}
callMain("config", "--inventory", invPath, "--node", "titan-15")
callMain("facts", "--inventory", invPath, "--snapshots", snapDir)
callMain("plan", "--inventory", invPath, "--node", "titan-15")
callMain("burn", "--inventory", invPath, "--node", "titan-15", "--device", "/dev/sdz")
callMain("image", "--inventory", invPath, "--node", "titan-15", "--output", filepath.Join(root, "out.img"))
callMain("inject", "--inventory", invPath, "--node", "titan-15", "--boot", filepath.Join(root, "boot"), "--root", filepath.Join(root, "root"))
callMain("serve", "--bind", ":0")
callMain("remote-devices", "--host-tmp-dir", filepath.Join(root, "host-tmp"))
callMain("remote-build", "--inventory", invPath, "--node", "titan-15", "--artifact-ref", "registry.example/metis/titan-15", "--build-tag", "build-1", "--work-dir", filepath.Join(root, "build"), "--cache", filepath.Join(root, "cache"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw")
callMain("remote-flash", "--node", "titan-15", "--device", filepath.Join(root, "flash.img"), "--artifact-ref", "registry.example/metis/titan-15", "--work-dir", filepath.Join(root, "flash"), "--host-tmp-dir", filepath.Join(root, "host-tmp"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw")
}
func TestMainAndCommandFatalBranches(t *testing.T) {
trap := func() {
fatalf = func(format string, args ...any) {
panic("fatal: " + format)
}
exit = func(code int) {
panic("exit")
}
t.Cleanup(func() {
fatalf = httpLogFatalf
exit = httpExit
})
}
trap()
mustPanic := func(fn func()) {
t.Helper()
defer func() {
if r := recover(); r == nil {
t.Fatal("expected panic")
}
}()
fn()
}
mustPanic(func() {
oldArgs := os.Args
os.Args = []string{"metis"}
defer func() { os.Args = oldArgs }()
main()
})
mustPanic(func() {
oldArgs := os.Args
os.Args = []string{"metis", "bogus"}
defer func() { os.Args = oldArgs }()
main()
})
mustPanic(func() { configCmd(nil) })
mustPanic(func() { planCmd([]string{"--inventory", "/nope", "--node", "titan-15"}) })
mustPanic(func() { burnCmd([]string{"--inventory", "/nope", "--node", "titan-15", "--device", "/dev/sdz"}) })
mustPanic(func() { imageCmd(nil) })
mustPanic(func() { injectCmd(nil) })
mustPanic(func() { factsCmd([]string{"--inventory", "/nope", "--snapshots", "/nope"}) })
mustPanic(func() { serveCmd([]string{"--bind", ":0"}) })
mustPanic(func() { remoteBuildCmd([]string{"--node", "n1"}) })
mustPanic(func() { remoteFlashCmd([]string{"--node", "n1"}) })
}
func TestRemoteCommandHelpers(t *testing.T) {
if !hasMountedChildren([]struct {
Mountpoint string `json:"mountpoint"`
}{{Mountpoint: "/mnt"}}) {
t.Fatal("hasMountedChildren should detect a mount point")
}
if got := humanHostPath("/host-tmp/metis-flash"); got != "/tmp/metis-flash" {
t.Fatalf("humanHostPath = %q", got)
}
t.Setenv("METIS_REMOTE_SAMPLE", "value")
if got := getenvOr("METIS_REMOTE_SAMPLE", "fallback"); got != "value" {
t.Fatalf("getenvOr = %q", got)
}
dir := t.TempDir()
base := filepath.Join(dir, "base.img")
if err := os.WriteFile(base, []byte("artifact"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(base+".meta", []byte(`{"meta":true}`), 0o644); err != nil {
t.Fatal(err)
}
fakeTools := fakeCommandDir(t, map[string]string{
"oras": `case "${1:-}" in
login|tag) exit 0 ;;
push) exit 0 ;;
pull)
outdir="${@: -1}"
cp "` + base + `" "${outdir}/artifact.img"
exit 0
;;
esac
exit 0`,
"lsblk": `cat <<'JSON'
{"blockdevices":[{"name":"sdz","path":"/dev/sdz","rm":true,"hotplug":true,"size":"32000000000","model":"Micro SD","tran":"usb","type":"disk","children":[{"mountpoint":""}]},{"name":"sdy","path":"/dev/sdy","rm":true,"hotplug":true,"size":"64000000000","model":"SSD","tran":"usb","type":"disk","children":[{"mountpoint":"/mnt"}]}]}
JSON`,
})
t.Setenv("PATH", fakeTools+string(os.PathListSeparator)+os.Getenv("PATH"))
if err := orasLogin("registry.example", "", ""); err == nil {
t.Fatal("expected orasLogin to reject missing creds")
}
if err := orasLogin("registry.example", "u", "p"); err != nil {
t.Fatalf("orasLogin: %v", err)
}
if _, _, err := orasPushInvocation("r", filepath.Join(dir, "one", "a.img"), filepath.Join(dir, "two", "b.meta")); err == nil {
t.Fatal("expected orasPushInvocation mismatch error")
}
pushDir := filepath.Join(dir, "push")
if err := os.MkdirAll(pushDir, 0o755); err != nil {
t.Fatal(err)
}
img := filepath.Join(pushDir, "a.img")
meta := filepath.Join(pushDir, "a.meta")
if err := os.WriteFile(img, []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(meta, []byte(`{}`), 0o644); err != nil {
t.Fatal(err)
}
if _, args, err := orasPushInvocation("ref", img, meta); err != nil || len(args) == 0 {
t.Fatalf("orasPushInvocation success = %#v %v", args, err)
}
if err := orasPush("ref", img, meta); err != nil {
t.Fatalf("orasPush: %v", err)
}
if err := orasTag("ref", "latest"); err != nil {
t.Fatalf("orasTag: %v", err)
}
pullDir := filepath.Join(dir, "pull")
if err := os.MkdirAll(pullDir, 0o755); err != nil {
t.Fatal(err)
}
if err := orasPull("ref", pullDir); err != nil {
t.Fatalf("orasPull: %v", err)
}
artifact, compressed, err := resolvePulledArtifact(pullDir)
if err != nil || compressed || !strings.HasSuffix(artifact, ".img") {
t.Fatalf("resolvePulledArtifact raw = %q compressed=%v err=%v", artifact, compressed, err)
}
if err := os.WriteFile(filepath.Join(pullDir, "artifact.img.xz"), []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
artifact, compressed, err = resolvePulledArtifact(pullDir)
if err != nil || !compressed || !strings.HasSuffix(artifact, ".img.xz") {
t.Fatalf("resolvePulledArtifact xz = %q compressed=%v err=%v", artifact, compressed, err)
}
if _, _, err := resolvePulledArtifact(filepath.Join(dir, "missing")); err == nil {
t.Fatal("expected resolvePulledArtifact error")
}
devices, err := localFlashDevices(40000000000, filepath.Join(dir, "host-tmp"))
if err != nil {
t.Fatalf("localFlashDevices: %v", err)
}
if len(devices) == 0 || devices[0].Path != "/dev/sdz" {
t.Fatalf("localFlashDevices = %#v", devices)
}
writeStructuredResult(map[string]any{"ok": true})
}
var (
httpLogFatalf = fatalf
httpExit = exit
httpListenAndServe = listenAndServe
)

View File

@ -4,7 +4,6 @@ import (
"context"
"flag"
"fmt"
"log"
"os"
"path/filepath"
@ -19,7 +18,7 @@ func imageCmd(args []string) {
cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
fs.Parse(args)
if *node == "" {
log.Fatalf("--node is required")
fatalf("--node is required")
}
inv := loadInventory(*invPath)
@ -29,7 +28,7 @@ func imageCmd(args []string) {
}
if err := plan.BuildImageFile(context.Background(), inv, *node, *cache, targetOutput); err != nil {
log.Fatalf("build image: %v", err)
fatalf("build image: %v", err)
}
fmt.Printf("Wrote %s\n", targetOutput)

View File

@ -2,7 +2,6 @@ package main
import (
"flag"
"log"
"metis/pkg/plan"
)
@ -15,13 +14,13 @@ func injectCmd(args []string) {
root := fs.String("root", "", "mounted root path")
fs.Parse(args)
if *node == "" {
log.Fatalf("--node is required")
fatalf("--node is required")
}
if *boot == "" && *root == "" {
log.Fatalf("--boot or --root is required")
fatalf("--boot or --root is required")
}
inv := loadInventory(*invPath)
if err := plan.Inject(inv, *node, *boot, *root); err != nil {
log.Fatalf("inject: %v", err)
fatalf("inject: %v", err)
}
}

View File

@ -12,10 +12,15 @@ import (
"metis/pkg/plan"
)
var (
fatalf = log.Fatalf
exit = os.Exit
)
func main() {
if len(os.Args) < 2 {
usage()
os.Exit(1)
exit(1)
}
switch os.Args[1] {
case "plan":
@ -40,7 +45,7 @@ func main() {
remoteFlashCmd(os.Args[2:])
default:
usage()
os.Exit(1)
exit(1)
}
}
@ -51,7 +56,7 @@ func usage() {
func loadInventory(path string) *inventory.Inventory {
inv, err := inventory.Load(path)
if err != nil {
log.Fatalf("load inventory: %v", err)
fatalf("load inventory: %v", err)
}
return inv
}
@ -66,7 +71,7 @@ func planCmd(args []string) {
root := fs.String("root", "", "mounted root path for injection (optional)")
fs.Parse(args)
if *node == "" {
log.Fatalf("--node is required")
fatalf("--node is required")
}
inv := loadInventory(*invPath)
if *boot != "" {
@ -77,7 +82,7 @@ func planCmd(args []string) {
}
p, err := plan.Build(inv, *node, *device, *cache)
if err != nil {
log.Fatalf("build plan: %v", err)
fatalf("build plan: %v", err)
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
@ -96,7 +101,7 @@ func burnCmd(args []string) {
confirm := fs.Bool("yes", false, "actually write to device")
fs.Parse(args)
if *node == "" || *device == "" {
log.Fatalf("--node and --device are required")
fatalf("--node and --device are required")
}
inv := loadInventory(*invPath)
if *boot != "" {
@ -110,7 +115,7 @@ func burnCmd(args []string) {
}
p, err := plan.Execute(inv, *node, *device, *cache, *confirm)
if err != nil {
log.Fatalf("burn: %v", err)
fatalf("burn: %v", err)
}
fmt.Printf("Plan for %s to %s:\n", p.Node, p.Device)
for _, a := range p.Actions {

291
cmd/metis/main_test.go Normal file
View File

@ -0,0 +1,291 @@
package main
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"testing"
)
func TestUsageWritesSupportedCommands(t *testing.T) {
stdout, stderr := captureStreams(t, func() {
usage()
})
if stdout != "" {
t.Fatalf("usage wrote stdout: %q", stdout)
}
if !strings.Contains(stderr, "remote-flash") || !strings.Contains(stderr, "plan") {
t.Fatalf("usage output missing commands: %q", stderr)
}
}
func TestConfigFactsPlanBurnImageInjectAndServeCommands(t *testing.T) {
root := t.TempDir()
invPath, baseImage := writeTestInventory(t, root)
t.Setenv("METIS_INVENTORY_PATH", invPath)
t.Setenv("METIS_DATA_DIR", filepath.Join(root, "data"))
t.Setenv("METIS_BOOT_PATH", "")
t.Setenv("METIS_ROOT_PATH", "")
stdout, _ := captureStreams(t, func() {
configCmd([]string{"--inventory", invPath, "--node", "titan-15"})
})
if !strings.Contains(stdout, `"hostname": "titan-15"`) {
t.Fatalf("config output missing hostname: %s", stdout)
}
snapDir := filepath.Join(root, "snapshots")
if err := os.MkdirAll(snapDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(snapDir, "snap.json"), []byte(`{"hostname":"titan-15","kernel":"6.6.63","package_sample":{"containerd":"1.7"}}`), 0o644); err != nil {
t.Fatal(err)
}
stdout, _ = captureStreams(t, func() {
factsCmd([]string{"--inventory", invPath, "--snapshots", snapDir})
})
if !strings.Contains(stdout, `"class": "rpi4"`) {
t.Fatalf("facts output missing class summary: %s", stdout)
}
stdout, _ = captureStreams(t, func() {
planCmd([]string{"--inventory", invPath, "--node", "titan-15", "--device", "/dev/sdz", "--cache", filepath.Join(root, "cache")})
})
if !strings.Contains(stdout, `"node": "titan-15"`) || !strings.Contains(stdout, `"actions"`) {
t.Fatalf("plan output missing plan JSON: %s", stdout)
}
rootTools := fakeCommandDir(t, map[string]string{
"sfdisk": `cat <<'JSON'
{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}}
JSON`,
"debugfs": `if [[ "${1:-}" == "-w" ]]; then
cp "${3:-}" "${4:-}.commands"
exit 0
fi
if [[ "${1:-}" == "-R" ]]; then
state="${3:-}.commands"
set -- $2
case "${1:-}" in
stat)
mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)"
mode="${mode: -4}"
printf 'Mode: %s\n' "${mode}"
exit 0
;;
dump)
local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)"
cat "${local_path}" > "${3:-}"
exit 0
;;
esac
fi
exit 0`,
})
t.Setenv("PATH", rootTools+string(os.PathListSeparator)+os.Getenv("PATH"))
stdout, _ = captureStreams(t, func() {
imageCmd([]string{"--inventory", invPath, "--node", "titan-15", "--output", filepath.Join(root, "out.img"), "--cache", filepath.Join(root, "cache")})
})
if !strings.Contains(stdout, "Wrote ") {
t.Fatalf("image output missing write confirmation: %s", stdout)
}
stdout, _ = captureStreams(t, func() {
burnCmd([]string{"--inventory", invPath, "--node", "titan-15", "--device", "/dev/sdz", "--cache", filepath.Join(root, "cache")})
})
if !strings.Contains(stdout, "Plan for titan-15 to /dev/sdz") {
t.Fatalf("burn output missing plan header: %s", stdout)
}
bootDir := filepath.Join(root, "boot")
rootDir := filepath.Join(root, "root")
if err := os.MkdirAll(bootDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(rootDir, 0o755); err != nil {
t.Fatal(err)
}
captureStreams(t, func() {
injectCmd([]string{"--inventory", invPath, "--node", "titan-15", "--boot", bootDir, "--root", rootDir})
})
if _, err := os.Stat(filepath.Join(rootDir, "etc/metis/node.json")); err != nil {
t.Fatalf("injectCmd did not write root file: %v", err)
}
listenAndServe = func(addr string, handler http.Handler) error {
if addr != ":0" {
t.Fatalf("unexpected bind addr: %s", addr)
}
return nil
}
t.Cleanup(func() { listenAndServe = http.ListenAndServe })
t.Setenv("METIS_BIND_ADDR", ":0")
t.Setenv("METIS_INVENTORY_PATH", invPath)
serveCmd([]string{"--bind", ":0"})
_ = baseImage
}
func TestMainDispatchesConfig(t *testing.T) {
root := t.TempDir()
invPath, _ := writeTestInventory(t, root)
oldArgs := os.Args
os.Args = []string{"metis", "config", "--inventory", invPath, "--node", "titan-15"}
t.Cleanup(func() { os.Args = oldArgs })
main()
}
func TestRemoteCommandsAndHelpers(t *testing.T) {
root := t.TempDir()
invPath, baseImage := writeTestInventory(t, root)
fakeTools := fakeCommandDir(t, map[string]string{
"lsblk": `cat <<'JSON'
{"blockdevices":[{"name":"sdz","path":"/dev/sdz","rm":true,"hotplug":true,"size":"32000000000","model":"Micro SD","tran":"usb","type":"disk"}]}
JSON`,
"sfdisk": `cat <<'JSON'
{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}}
JSON`,
"debugfs": `if [[ "${1:-}" == "-w" ]]; then
cp "${3:-}" "${4:-}.commands"
exit 0
fi
if [[ "${1:-}" == "-R" ]]; then
state="${3:-}.commands"
set -- $2
case "${1:-}" in
stat)
mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)"
mode="${mode: -4}"
printf 'Mode: %s\n' "${mode}"
exit 0
;;
dump)
local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)"
cat "${local_path}" > "${3:-}"
exit 0
;;
esac
fi
exit 0`,
"xz": `dest="${@: -1}"; cp "$dest" "$dest.xz"`,
"oras": `case "${1:-}" in
login|tag) exit 0 ;;
push) exit 0 ;;
pull)
outdir="${@: -1}"
cp "` + baseImage + `" "${outdir}/titan-15.img"
exit 0
;;
esac
exit 0`,
})
t.Setenv("PATH", fakeTools+string(os.PathListSeparator)+os.Getenv("PATH"))
stdout, _ := captureStreams(t, func() {
remoteDevicesCmd([]string{"--max-device-bytes", "40000000000", "--host-tmp-dir", filepath.Join(root, "host-tmp")})
})
if !strings.Contains(stdout, `"path":"/dev/sdz"`) {
t.Fatalf("remoteDevicesCmd output missing device: %s", stdout)
}
stdout, _ = captureStreams(t, func() {
remoteBuildCmd([]string{"--inventory", invPath, "--node", "titan-15", "--artifact-ref", "registry.example/metis/titan-15", "--build-tag", "build-1", "--work-dir", filepath.Join(root, "build"), "--cache", filepath.Join(root, "cache"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw"})
})
if !strings.Contains(stdout, `"build_tag":"build-1"`) {
t.Fatalf("remoteBuildCmd output missing build tag: %s", stdout)
}
stdout, _ = captureStreams(t, func() {
remoteFlashCmd([]string{"--node", "titan-15", "--device", "hosttmp:///tmp", "--artifact-ref", "registry.example/metis/titan-15", "--work-dir", filepath.Join(root, "flash"), "--host-tmp-dir", filepath.Join(root, "host-tmp"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw"})
})
if !strings.Contains(stdout, `"dest_path"`) {
t.Fatalf("remoteFlashCmd output missing dest_path: %s", stdout)
}
}
func captureStreams(t *testing.T, fn func()) (string, string) {
t.Helper()
oldStdout := os.Stdout
oldStderr := os.Stderr
stdoutR, stdoutW, _ := os.Pipe()
stderrR, stderrW, _ := os.Pipe()
os.Stdout = stdoutW
os.Stderr = stderrW
defer func() {
os.Stdout = oldStdout
os.Stderr = oldStderr
}()
done := make(chan struct {
out string
err string
}, 1)
go func() {
var outBuf bytes.Buffer
var errBuf bytes.Buffer
_, _ = io.Copy(&outBuf, stdoutR)
_, _ = io.Copy(&errBuf, stderrR)
done <- struct {
out string
err string
}{out: outBuf.String(), err: errBuf.String()}
}()
fn()
_ = stdoutW.Close()
_ = stderrW.Close()
captured := <-done
return captured.out, captured.err
}
func writeTestInventory(t *testing.T, root string) (string, string) {
t.Helper()
baseImage := filepath.Join(root, "base.img")
if err := os.WriteFile(baseImage, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
invPath := filepath.Join(root, "inventory.yaml")
inv := `classes:
- name: rpi4
arch: arm64
os: armbian
image: file://` + baseImage + `
checksum: sha256:` + sha256SumHex(t, make([]byte, 4096)) + `
k3s_version: v1.31.5+k3s1
nodes:
- name: titan-15
class: rpi4
hostname: titan-15
ip: 192.168.22.43
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: token
ssh_user: atlas
ssh_authorized_keys:
- ssh-ed25519 AAA
`
if err := os.WriteFile(invPath, []byte(inv), 0o644); err != nil {
t.Fatal(err)
}
return invPath, baseImage
}
func sha256SumHex(t *testing.T, data []byte) string {
t.Helper()
sum := sha256.Sum256(data)
return hex.EncodeToString(sum[:])
}
func fakeCommandDir(t *testing.T, scripts map[string]string) string {
t.Helper()
dir := t.TempDir()
for name, body := range scripts {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
return dir
}

View File

@ -5,7 +5,6 @@ import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
@ -27,7 +26,7 @@ func remoteDevicesCmd(args []string) {
devices, err := localFlashDevices(*maxBytes, *hostTmpDir)
if err != nil {
log.Fatalf("remote devices: %v", err)
fatalf("remote devices: %v", err)
}
sort.Slice(devices, func(i, j int) bool {
left := localDeviceScore(devices[i])
@ -56,24 +55,24 @@ func remoteBuildCmd(args []string) {
harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password")
fs.Parse(args)
if *node == "" || *artifactRef == "" || *buildTag == "" {
log.Fatalf("--node, --artifact-ref, and --build-tag are required")
fatalf("--node, --artifact-ref, and --build-tag are required")
}
if err := os.MkdirAll(*workDir, 0o755); err != nil {
log.Fatalf("mkdir workdir: %v", err)
fatalf("mkdir workdir: %v", err)
}
output := filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
inv := loadInventory(*invPath)
if err := plan.BuildImageFile(context.Background(), inv, *node, *cacheDir, output); err != nil {
log.Fatalf("build image: %v", err)
fatalf("build image: %v", err)
}
if err := exec.Command("xz", "-T0", "-z", "-f", output).Run(); err != nil {
log.Fatalf("xz compress: %v", err)
fatalf("xz compress: %v", err)
}
compressedPath := output + ".xz"
info, err := os.Stat(compressedPath)
if err != nil {
log.Fatalf("stat compressed image: %v", err)
fatalf("stat compressed image: %v", err)
}
metadataPath := filepath.Join(*workDir, "metadata.json")
@ -88,20 +87,20 @@ func remoteBuildCmd(args []string) {
}
metaBytes, err := json.MarshalIndent(meta, "", " ")
if err != nil {
log.Fatalf("encode metadata: %v", err)
fatalf("encode metadata: %v", err)
}
if err := os.WriteFile(metadataPath, metaBytes, 0o644); err != nil {
log.Fatalf("write metadata: %v", err)
fatalf("write metadata: %v", err)
}
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
log.Fatalf("oras login: %v", err)
fatalf("oras login: %v", err)
}
taggedRef := fmt.Sprintf("%s:%s", *artifactRef, *buildTag)
if err := orasPush(taggedRef, compressedPath, metadataPath); err != nil {
log.Fatalf("oras push: %v", err)
fatalf("oras push: %v", err)
}
if err := orasTag(taggedRef, "latest"); err != nil {
log.Fatalf("oras tag latest: %v", err)
fatalf("oras tag latest: %v", err)
}
summary := service.ArtifactSummary{
@ -128,40 +127,40 @@ func remoteFlashCmd(args []string) {
hostTmpDir := fs.String("host-tmp-dir", "/host-tmp/metis-flash-test", "mounted host tmp dir for test writes")
fs.Parse(args)
if *node == "" || *device == "" || *artifactRef == "" {
log.Fatalf("--node, --device, and --artifact-ref are required")
fatalf("--node, --device, and --artifact-ref are required")
}
if err := os.MkdirAll(*workDir, 0o755); err != nil {
log.Fatalf("mkdir workdir: %v", err)
fatalf("mkdir workdir: %v", err)
}
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
log.Fatalf("oras login: %v", err)
fatalf("oras login: %v", err)
}
if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil {
log.Fatalf("oras pull: %v", err)
fatalf("oras pull: %v", err)
}
imagePath, compressed, err := resolvePulledArtifact(*workDir)
if err != nil {
log.Fatalf("resolve artifact: %v", err)
fatalf("resolve artifact: %v", err)
}
rawImage := imagePath
if compressed {
rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage))
if out, err := cmd.CombinedOutput(); err != nil {
log.Fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out)))
fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out)))
}
}
destPath := *device
if strings.HasPrefix(destPath, "hosttmp://") {
if err := os.MkdirAll(*hostTmpDir, 0o755); err != nil {
log.Fatalf("mkdir host tmp dir: %v", err)
fatalf("mkdir host tmp dir: %v", err)
}
destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node))
}
if err := writer.WriteImage(context.Background(), rawImage, destPath); err != nil {
log.Fatalf("write image: %v", err)
fatalf("write image: %v", err)
}
_ = exec.Command("sync").Run()
if strings.HasPrefix(destPath, "/dev/") {
@ -170,7 +169,7 @@ func remoteFlashCmd(args []string) {
info, err := os.Stat(destPath)
if err != nil {
log.Fatalf("stat destination: %v", err)
fatalf("stat destination: %v", err)
}
writeStructuredResult(map[string]any{
"node": *node,
@ -183,10 +182,10 @@ func remoteFlashCmd(args []string) {
func writeStructuredResult(payload any) {
data, err := json.Marshal(payload)
if err != nil {
log.Fatalf("encode result: %v", err)
fatalf("encode result: %v", err)
}
if _, err := os.Stdout.Write(append(data, '\n')); err != nil {
log.Fatalf("write stdout result: %v", err)
fatalf("write stdout result: %v", err)
}
// Keep the result available in pod status so Metis does not depend on the
// kubelet log endpoint for successful worker runs.

View File

@ -8,6 +8,8 @@ import (
"metis/pkg/service"
)
var listenAndServe = http.ListenAndServe
func serveCmd(args []string) {
fs := flag.NewFlagSet("serve", flag.ExitOnError)
bindAddr := fs.String("bind", "", "override bind address")
@ -19,10 +21,10 @@ func serveCmd(args []string) {
}
app, err := service.NewApp(settings)
if err != nil {
log.Fatalf("init service: %v", err)
fatalf("init service: %v", err)
}
log.Printf("metis listening on %s", settings.BindAddr)
if err := http.ListenAndServe(settings.BindAddr, app.Handler()); err != nil {
log.Fatalf("serve: %v", err)
if err := listenAndServe(settings.BindAddr, app.Handler()); err != nil {
fatalf("serve: %v", err)
}
}

54
pkg/config/config_test.go Normal file
View File

@ -0,0 +1,54 @@
package config
import (
"testing"
"metis/pkg/inventory"
)
func TestBuildUsesNodeOverridesAndDefaultFilesystem(t *testing.T) {
inv := inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "c1",
Arch: "arm64",
OS: "linux",
Image: "file:///tmp/base.img",
K3sVersion: "v1.30.0+k3s1",
DefaultLabels: map[string]string{"role": "worker"},
}},
Nodes: []inventory.NodeSpec{{
Name: "n1",
Class: "c1",
Hostname: "n1",
IP: "1.1.1.1",
K3sRole: "server",
K3sVersion: "v1.31.5+k3s1",
SSHUser: "atlas",
SSHAuthorized: []string{"key"},
LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/mnt/data", UUID: "uuid-1"}},
}},
}
cfg, err := Build(&inv, "n1")
if err != nil {
t.Fatalf("Build: %v", err)
}
if got, want := cfg.K3s.Version, "v1.31.5+k3s1"; got != want {
t.Fatalf("k3s version = %q, want %q", got, want)
}
if got := cfg.Fstab[0].FS; got != "ext4" {
t.Fatalf("expected default filesystem ext4, got %q", got)
}
if got := cfg.Labels["role"]; got != "worker" {
t.Fatalf("label merge lost default label: %q", got)
}
if cfg.K3s.Role != "server" {
t.Fatalf("expected server role, got %q", cfg.K3s.Role)
}
}
func TestBuildReturnsErrorForMissingNode(t *testing.T) {
inv := inventory.Inventory{}
if _, err := Build(&inv, "missing"); err == nil {
t.Fatal("expected missing node error")
}
}

View File

@ -0,0 +1,51 @@
package config
import (
"testing"
"metis/pkg/inventory"
)
func TestBuildBranches(t *testing.T) {
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "rpi4",
Arch: "arm64",
OS: "armbian",
Image: "file:///tmp/base.img",
K3sVersion: "v1.31.5+k3s1",
DefaultLabels: map[string]string{"a": "class", "b": "class"},
DefaultTaints: []string{"class-taint"},
}},
Nodes: []inventory.NodeSpec{{
Name: "titan-15",
Class: "rpi4",
Hostname: "titan-15",
IP: "192.168.22.43",
K3sRole: "agent",
K3sVersion: "v1.31.5+k3s2",
K3sURL: "https://192.168.22.7:6443",
K3sToken: "token",
Labels: map[string]string{"c": "node"},
Taints: []string{"node-taint"},
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
LonghornDisks: []inventory.LonghornDisk{{UUID: "u1", Mountpoint: "/var/lib/longhorn"}},
}},
}
cfg, err := Build(inv, "titan-15")
if err != nil {
t.Fatalf("Build: %v", err)
}
if cfg.K3s.Version != "v1.31.5+k3s2" || len(cfg.Fstab) != 1 || cfg.Fstab[0].FS != "ext4" {
t.Fatalf("unexpected config: %#v", cfg)
}
if _, err := Build(&inventory.Inventory{}, "missing"); err == nil {
t.Fatal("expected Build to fail for missing node")
}
inv.Nodes[0].Hostname = ""
if _, err := Build(inv, "titan-15"); err == nil {
t.Fatal("expected Build to fail without hostname")
}
}

View File

@ -6,13 +6,13 @@ import (
// ClassSummary captures aggregated sentinel facts per class.
type ClassSummary struct {
Class string `json:"class"`
Nodes []string `json:"nodes"`
Kernels map[string]int `json:"kernels,omitempty"`
OSImages map[string]int `json:"os_images,omitempty"`
Containerd map[string]int `json:"containerd,omitempty"`
K3sVersions map[string]int `json:"k3s_versions,omitempty"`
PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count
Class string `json:"class"`
Nodes []string `json:"nodes"`
Kernels map[string]int `json:"kernels,omitempty"`
OSImages map[string]int `json:"os_images,omitempty"`
Containerd map[string]int `json:"containerd,omitempty"`
K3sVersions map[string]int `json:"k3s_versions,omitempty"`
PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count
}
// Aggregate groups snapshots by inventory class and tallies version drift.

View File

@ -31,3 +31,32 @@ func TestAggregateGroupsByClass(t *testing.T) {
t.Fatalf("package stats not tallied: %#v", c1.PackageStats)
}
}
func TestAggregateKeepsUnknownHostnames(t *testing.T) {
sum := Aggregate(nil, []Snapshot{{Hostname: "ghost", Kernel: "k"}})
if sum["unknown"].Nodes[0] != "ghost" {
t.Fatalf("unexpected unknown aggregate: %#v", sum["unknown"])
}
}
func TestChooseTargetsHandlesTiesAndEmptyValues(t *testing.T) {
sum := &ClassSummary{
Kernels: map[string]int{"k1": 2, "k2": 2},
OSImages: map[string]int{
"img": 1,
},
PackageStats: map[string]map[string]int{
"p": {"": 3, "1": 1},
},
}
targets := ChooseTargets(sum)
if targets.Kernel != "" {
t.Fatalf("expected kernel tie to return empty, got %q", targets.Kernel)
}
if targets.OSImage != "img" {
t.Fatalf("expected OS image img, got %q", targets.OSImage)
}
if _, ok := targets.Packages["p"]; ok {
t.Fatalf("expected empty package version to be skipped: %+v", targets.Packages)
}
}

View File

@ -0,0 +1,52 @@
package facts
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"metis/pkg/inventory"
)
func TestAggregateAndLoadBranches(t *testing.T) {
dir := t.TempDir()
nested := filepath.Join(dir, "nested")
if err := os.MkdirAll(nested, 0o755); err != nil {
t.Fatal(err)
}
snapPath := filepath.Join(dir, "one.json")
data, _ := json.Marshal(Snapshot{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"p": "1"}})
if err := os.WriteFile(snapPath, data, 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(nested, "two.json"), data, 0o644); err != nil {
t.Fatal(err)
}
snaps, err := LoadDir(dir)
if err != nil || len(snaps) != 2 {
t.Fatalf("LoadDir = %#v err=%v", snaps, err)
}
if _, err := LoadDir(filepath.Join(dir, "missing")); err == nil {
t.Fatal("expected LoadDir to fail for missing dir")
}
sums := Aggregate(nil, snaps)
if sums["unknown"] == nil || len(sums["unknown"].Nodes) != 2 {
t.Fatalf("Aggregate unknown = %#v", sums)
}
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{Name: "rpi4"}},
Nodes: []inventory.NodeSpec{{Name: "n1", Class: "rpi4"}},
}
sums = Aggregate(inv, snaps)
if sums["rpi4"] == nil {
t.Fatalf("expected class summary: %#v", sums)
}
if got := ChooseTargets(&ClassSummary{Kernels: map[string]int{"a": 1, "b": 1}}); got.Kernel != "" {
t.Fatalf("tie should clear target: %#v", got)
}
if got := ChooseTargets(nil); got.Packages == nil {
t.Fatal("ChooseTargets should return initialized package map")
}
}

View File

@ -20,3 +20,34 @@ func TestLoadDirReadsSnapshots(t *testing.T) {
t.Fatalf("unexpected snapshot: %+v", got)
}
}
func TestLoadDirRejectsInvalidJSON(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "broken.json"), []byte(`{"hostname":`), 0o644); err != nil {
t.Fatal(err)
}
if _, err := LoadDir(dir); err == nil {
t.Fatal("expected JSON parse error")
}
}
func TestLoadDirReadsNestedDirectoriesAndMissingDir(t *testing.T) {
dir := t.TempDir()
nested := filepath.Join(dir, "nested")
if err := os.MkdirAll(nested, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(nested, "snap.json"), []byte(`{"hostname":"n2"}`), 0o644); err != nil {
t.Fatal(err)
}
got, err := LoadDir(dir)
if err != nil {
t.Fatalf("LoadDir nested: %v", err)
}
if len(got) != 1 || got[0].Hostname != "n2" {
t.Fatalf("unexpected nested snapshots: %+v", got)
}
if _, err := LoadDir(filepath.Join(dir, "missing")); err == nil {
t.Fatal("expected missing dir error")
}
}

View File

@ -24,3 +24,10 @@ func TestChooseTargetsPicksMostCommon(t *testing.T) {
t.Fatalf("package target wrong: %+v", tg.Packages)
}
}
func TestChooseTargetsHandlesNilSummary(t *testing.T) {
tg := ChooseTargets(nil)
if tg.Kernel != "" || len(tg.Packages) != 0 {
t.Fatalf("expected zero targets, got %+v", tg)
}
}

View File

@ -6,10 +6,10 @@ type ClassFacts struct {
Kernel string `json:"kernel,omitempty"`
K3sVersion string `json:"k3s_version,omitempty"`
Containerd string `json:"containerd,omitempty"`
Packages map[string]string `json:"packages,omitempty"` // name -> version
DropIns map[string]string `json:"dropins,omitempty"` // path -> content
Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value
CGroupConfig map[string]string `json:"cgroup_config,omitempty"`// key -> value
Packages map[string]string `json:"packages,omitempty"` // name -> version
DropIns map[string]string `json:"dropins,omitempty"` // path -> content
Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value
CGroupConfig map[string]string `json:"cgroup_config,omitempty"` // key -> value
Notes string `json:"notes,omitempty"`
}

View File

@ -0,0 +1,63 @@
package image
import (
"archive/zip"
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"testing"
)
func TestDownloadAndVerifyXZAndZIPBranches(t *testing.T) {
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
if err := os.WriteFile(src, []byte("xz-contents"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(dir, "xz"), []byte("#!/usr/bin/env bash\nset -eu\nif [[ ${1:-} == -dc ]]; then\n cat \"$2\"\n exit 0\nfi\ncp \"${@: -1}\" \"${@: -1}.xz\"\n"), 0o755); err != nil {
t.Fatal(err)
}
t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH"))
if err := os.WriteFile(src+".xz", []byte("xz-contents"), 0o644); err != nil {
t.Fatal(err)
}
zipPath := filepath.Join(dir, "archive.zip")
zf, err := os.Create(zipPath)
if err != nil {
t.Fatal(err)
}
zw := zip.NewWriter(zf)
w, err := zw.Create("image.img")
if err != nil {
t.Fatal(err)
}
if _, err := w.Write([]byte("zip-contents")); err != nil {
t.Fatal(err)
}
if err := zw.Close(); err != nil {
t.Fatal(err)
}
if err := zf.Close(); err != nil {
t.Fatal(err)
}
checksum := sha256.Sum256([]byte("xz-contents"))
if _, err := DownloadAndVerify("file://"+src+".xz", filepath.Join(dir, "out-xz.img"), "sha256:"+hex.EncodeToString(checksum[:])); err != nil {
t.Fatalf("DownloadAndVerify xz: %v", err)
}
if _, err := DownloadAndVerify("file://"+zipPath, filepath.Join(dir, "out-zip.img"), ""); err != nil {
t.Fatalf("DownloadAndVerify zip: %v", err)
}
}
func TestDownloadAndVerifyErrorBranches(t *testing.T) {
dir := t.TempDir()
if err := VerifyChecksum(filepath.Join(dir, "missing"), "bogus"); err == nil {
t.Fatal("expected invalid checksum format error")
}
if _, err := DownloadAndVerify("file://"+filepath.Join(dir, "missing.img"), filepath.Join(dir, "out.img"), "sha256:deadbeef"); err == nil {
t.Fatal("expected missing source error")
}
}

View File

@ -5,6 +5,8 @@ import (
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"path/filepath"
@ -173,6 +175,81 @@ func TestVerifyChecksumAcceptsMD5(t *testing.T) {
}
}
func TestDownloadAndVerifyUsesHTTPAndCachedFile(t *testing.T) {
body := []byte("metis-http-test")
sum := sha256.Sum256(body)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/image.img" {
http.NotFound(w, r)
return
}
_, _ = w.Write(body)
}))
defer srv.Close()
dir := t.TempDir()
dest := filepath.Join(dir, "image.img")
path, err := DownloadAndVerify(srv.URL+"/image.img", dest, "sha256:"+hex.EncodeToString(sum[:]))
if err != nil {
t.Fatalf("DownloadAndVerify: %v", err)
}
if path != dest {
t.Fatalf("path = %q, want %q", path, dest)
}
if got, _ := os.ReadFile(dest); string(got) != string(body) {
t.Fatalf("downloaded body = %q", string(got))
}
if err := os.WriteFile(dest, body, 0o644); err != nil {
t.Fatal(err)
}
if _, err := DownloadAndVerify(srv.URL+"/image.img", dest, "sha256:"+hex.EncodeToString(sum[:])); err != nil {
t.Fatalf("cached DownloadAndVerify: %v", err)
}
}
func TestDownloadAndVerifyRejectsBadChecksum(t *testing.T) {
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
if err := os.WriteFile(src, []byte("bad"), 0o644); err != nil {
t.Fatal(err)
}
if _, err := DownloadAndVerify("file://"+src, filepath.Join(dir, "dest.img"), "sha256:deadbeef"); err == nil {
t.Fatal("expected checksum mismatch")
}
}
func TestDownloadAndVerifyRawAndErrorBranches(t *testing.T) {
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
if err := os.WriteFile(src, []byte("raw"), 0o644); err != nil {
t.Fatal(err)
}
dest := filepath.Join(dir, "dest.img")
if _, err := DownloadAndVerify("file://"+src, dest, ""); err != nil {
t.Fatalf("DownloadAndVerify raw: %v", err)
}
if err := VerifyChecksum(dest, "bogus"); err == nil {
t.Fatal("expected invalid checksum format error")
}
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "boom", http.StatusInternalServerError)
}))
defer srv.Close()
if err := downloadRaw(srv.URL, filepath.Join(dir, "bad.img")); err == nil {
t.Fatal("expected HTTP error from downloadRaw")
}
archive := filepath.Join(dir, "empty.zip")
if err := writeTestZIP(archive, map[string]string{}); err != nil {
t.Fatalf("writeTestZIP: %v", err)
}
if err := decompressZIP(archive, filepath.Join(dir, "out.img")); err == nil {
t.Fatal("expected empty zip error")
}
}
func writeTestZIP(path string, files map[string]string) error {
out, err := os.Create(path)
if err != nil {

View File

@ -2,55 +2,70 @@ package image
import (
"os"
"os/exec"
"path/filepath"
"testing"
"metis/pkg/inject"
)
func TestWriteExt4Files(t *testing.T) {
if _, err := exec.LookPath("mkfs.ext4"); err != nil {
t.Skip("mkfs.ext4 not available")
}
if _, err := exec.LookPath("debugfs"); err != nil {
t.Skip("debugfs not available")
}
func TestInjectRootFSWithFakes(t *testing.T) {
scripts := fakeRootfsCommands(t, true)
t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH"))
workDir := t.TempDir()
fsPath := filepath.Join(workDir, "root.ext4")
f, err := os.Create(fsPath)
if err != nil {
imagePath := filepath.Join(t.TempDir(), "image.img")
if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
if err := f.Truncate(32 * 1024 * 1024); err != nil {
t.Fatal(err)
}
if err := f.Close(); err != nil {
t.Fatal(err)
}
cmd := exec.Command("mkfs.ext4", "-F", fsPath)
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("mkfs.ext4: %v: %s", err, string(out))
}
files := []inject.FileSpec{
{
Path: "etc/metis/firstboot.env",
Content: []byte("METIS_HOSTNAME='titan-13'\n"),
Mode: 0o600,
RootFS: true,
},
{
Path: "usr/local/sbin/test.sh",
Content: []byte("#!/usr/bin/env bash\nexit 0\n"),
Mode: 0o755,
RootFS: true,
},
{Path: "etc/metis/firstboot.env", Content: []byte("METIS_HOSTNAME='titan-13'\n"), Mode: 0o600, RootFS: true},
{Path: "usr/local/sbin/test.sh", Content: []byte("#!/usr/bin/env bash\nexit 0\n"), Mode: 0o755, RootFS: true},
}
if err := writeExt4Files(fsPath, files); err != nil {
t.Fatalf("writeExt4Files: %v", err)
if err := InjectRootFS(imagePath, files); err != nil {
t.Fatalf("InjectRootFS: %v", err)
}
}
func TestInjectRootFSSkipsBootOnlyFiles(t *testing.T) {
imagePath := filepath.Join(t.TempDir(), "image.img")
if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
if err := InjectRootFS(imagePath, []inject.FileSpec{{Path: "user-data", Content: []byte("boot"), RootFS: false}}); err != nil {
t.Fatalf("InjectRootFS boot-only: %v", err)
}
}
func TestFindLinuxPartitionAndTypeChecks(t *testing.T) {
scripts := fakeRootfsCommands(t, true)
t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH"))
imagePath := filepath.Join(t.TempDir(), "image.img")
if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
part, sector, err := findLinuxPartition(imagePath)
if err != nil {
t.Fatalf("findLinuxPartition: %v", err)
}
if sector != 512 || part.Start != 1 || part.Size != 2 {
t.Fatalf("unexpected partition info: %+v sector=%d", part, sector)
}
if !isLinuxPartitionType("83") || !isLinuxPartitionType("8300") || !isLinuxPartitionType("0fc63daf-8483-4772-8e79-3d69d8477de4") {
t.Fatal("expected linux partition types to match")
}
if isLinuxPartitionType("ef") {
t.Fatal("did not expect non-linux type to match")
}
}
func TestFindLinuxPartitionReturnsErrorWhenNoLinuxPartitionExists(t *testing.T) {
scripts := fakeRootfsCommands(t, false)
t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH"))
imagePath := filepath.Join(t.TempDir(), "image.img")
if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
if _, _, err := findLinuxPartition(imagePath); err == nil {
t.Fatal("expected error without Linux partition")
}
}
@ -66,3 +81,63 @@ func TestParentDirs(t *testing.T) {
}
}
}
func TestRootfsErrorBranches(t *testing.T) {
part := partitionTablePart{Start: 1, Size: 2}
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
dst := filepath.Join(dir, "dst.img")
if err := os.WriteFile(src, make([]byte, 512), 0o644); err != nil {
t.Fatal(err)
}
if err := extractPartition(src, dst, part, 512); err == nil {
t.Fatal("expected extractPartition to fail on short source image")
}
if err := os.WriteFile(dst, make([]byte, 512), 0o644); err != nil {
t.Fatal(err)
}
if err := replacePartition(src, dst, part, 512); err == nil {
t.Fatal("expected replacePartition size mismatch")
}
}
func fakeRootfsCommands(t *testing.T, includeLinux bool) string {
t.Helper()
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
partitions := `{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"ef"}]}}`
if includeLinux {
partitions = `{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}}`
}
write("sfdisk", "cat <<'JSON'\n"+partitions+"\nJSON")
write("debugfs", `if [[ "${1:-}" == "-w" ]]; then
exit 0
fi
if [[ "${1:-}" == "-R" ]]; then
set -- $2
case "${1:-}" in
stat)
case "${2:-}" in
/etc/metis/firstboot.env) printf 'Mode: 0600\n' ;;
/usr/local/sbin/test.sh) printf 'Mode: 0755\n' ;;
esac
exit 0
;;
dump)
dest="${3:-}"
case "${2:-}" in
/etc/metis/firstboot.env) printf "METIS_HOSTNAME='titan-13'\n" > "${dest}" ;;
/usr/local/sbin/test.sh) printf '#!/usr/bin/env bash\nexit 0\n' > "${dest}" ;;
esac
exit 0
;;
esac
fi
exit 0`)
return dir
}

View File

@ -0,0 +1,36 @@
package inject
import (
"os"
"path/filepath"
"testing"
)
func TestInjectorWriteBranches(t *testing.T) {
dir := t.TempDir()
boot := filepath.Join(dir, "boot")
root := filepath.Join(dir, "root")
inj := &Injector{BootPath: boot, RootPath: root}
files := []FileSpec{
{Path: "boot.txt", Content: []byte("boot"), Mode: 0o644, RootFS: false},
{Path: "root.txt", Content: []byte("root"), Mode: 0o600, RootFS: true},
}
if err := inj.Write(files); err != nil {
t.Fatalf("Write: %v", err)
}
if got, err := os.ReadFile(filepath.Join(boot, "boot.txt")); err != nil || string(got) != "boot" {
t.Fatalf("boot write = %q err=%v", got, err)
}
if got, err := os.ReadFile(filepath.Join(root, "root.txt")); err != nil || string(got) != "root" {
t.Fatalf("root write = %q err=%v", got, err)
}
block := filepath.Join(dir, "blocked")
if err := os.WriteFile(block, []byte("file"), 0o644); err != nil {
t.Fatal(err)
}
inj = &Injector{BootPath: block}
if err := inj.Write([]FileSpec{{Path: "x", Content: []byte("x")}}); err == nil {
t.Fatal("expected mkdir error")
}
}

View File

@ -1,38 +1,40 @@
package inject
import (
"fmt"
"os"
"path/filepath"
"fmt"
"os"
"path/filepath"
)
// Injector writes node config into a mounted image (boot/root paths supplied by caller).
type Injector struct {
BootPath string
RootPath string
BootPath string
RootPath string
}
// FileSpec describes a file to write.
type FileSpec struct {
Path string
Content []byte
Mode os.FileMode
RootFS bool // if true, write under root path; else boot path
Path string
Content []byte
Mode os.FileMode
RootFS bool // if true, write under root path; else boot path
}
// Write materializes the requested files under the boot or root mount because
// the burn flow needs a single place to stage config fragments before sync.
func (i *Injector) Write(files []FileSpec) error {
for _, f := range files {
base := i.BootPath
if f.RootFS {
base = i.RootPath
}
target := filepath.Join(base, f.Path)
if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return fmt.Errorf("mkdir %s: %w", filepath.Dir(target), err)
}
if err := os.WriteFile(target, f.Content, f.Mode); err != nil {
return fmt.Errorf("write %s: %w", target, err)
}
}
return nil
for _, f := range files {
base := i.BootPath
if f.RootFS {
base = i.RootPath
}
target := filepath.Join(base, f.Path)
if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return fmt.Errorf("mkdir %s: %w", filepath.Dir(target), err)
}
if err := os.WriteFile(target, f.Content, f.Mode); err != nil {
return fmt.Errorf("write %s: %w", target, err)
}
}
return nil
}

41
pkg/inject/inject_test.go Normal file
View File

@ -0,0 +1,41 @@
package inject
import (
"os"
"path/filepath"
"testing"
)
func TestWriteTargetsBootAndRootMounts(t *testing.T) {
dir := t.TempDir()
boot := filepath.Join(dir, "boot")
root := filepath.Join(dir, "root")
inj := Injector{BootPath: boot, RootPath: root}
files := []FileSpec{
{Path: "boot.txt", Content: []byte("boot"), Mode: 0o644, RootFS: false},
{Path: "root.txt", Content: []byte("root"), Mode: 0o600, RootFS: true},
}
if err := inj.Write(files); err != nil {
t.Fatalf("Write: %v", err)
}
if got, err := os.ReadFile(filepath.Join(boot, "boot.txt")); err != nil || string(got) != "boot" {
t.Fatalf("boot file = %q, err=%v", string(got), err)
}
if got, err := os.ReadFile(filepath.Join(root, "root.txt")); err != nil || string(got) != "root" {
t.Fatalf("root file = %q, err=%v", string(got), err)
}
}
func TestWriteReturnsFilesystemErrors(t *testing.T) {
dir := t.TempDir()
boot := filepath.Join(dir, "boot")
rootFile := filepath.Join(dir, "root-file")
if err := os.WriteFile(rootFile, []byte("not a dir"), 0o644); err != nil {
t.Fatal(err)
}
inj := Injector{BootPath: boot, RootPath: rootFile}
if err := inj.Write([]FileSpec{{Path: "root.txt", Content: []byte("root"), RootFS: true}}); err == nil {
t.Fatal("expected write error for root path file")
}
}

View File

@ -0,0 +1,37 @@
package inventory
import (
"os"
"path/filepath"
"testing"
)
func TestLoadAndFindNodeBranches(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "inventory.yaml")
if err := os.WriteFile(path, []byte(`
classes:
- name: ${CLASS}
arch: arm64
os: armbian
image: file:///tmp/base.img
nodes:
- name: node1
class: ${CLASS}
hostname: node1
k3s_role: agent
`), 0o644); err != nil {
t.Fatal(err)
}
t.Setenv("CLASS", "rpi4")
inv, err := Load(path)
if err != nil {
t.Fatalf("Load: %v", err)
}
if _, _, err := inv.FindNode("missing"); err == nil {
t.Fatal("expected missing node error")
}
if _, cls, err := inv.FindNode("node1"); err != nil || cls == nil {
t.Fatalf("expected class lookup for node1, got class=%#v err=%v", cls, err)
}
}

View File

@ -41,3 +41,26 @@ nodes:
t.Fatalf("token not expanded: %q", node.K3sToken)
}
}
func TestFindNodeReturnsClassMissingError(t *testing.T) {
inv := &Inventory{
Nodes: []NodeSpec{{Name: "n1", Class: "missing"}},
}
node, class, err := inv.FindNode("n1")
if err == nil {
t.Fatal("expected class missing error")
}
if node == nil || class != nil {
t.Fatalf("unexpected node/class: %#v %#v", node, class)
}
}
func TestLoadRejectsInvalidYAML(t *testing.T) {
invPath := filepath.Join(t.TempDir(), "inventory.yaml")
if err := os.WriteFile(invPath, []byte("classes: ["), 0o644); err != nil {
t.Fatal(err)
}
if _, err := Load(invPath); err == nil {
t.Fatal("expected parse inventory error")
}
}

View File

@ -0,0 +1,49 @@
package mount
import (
"os"
"path/filepath"
"testing"
)
func TestSetupAndTeardownWithFakeCommands(t *testing.T) {
dir := t.TempDir()
scripts := filepath.Join(dir, "bin")
if err := os.MkdirAll(scripts, 0o755); err != nil {
t.Fatal(err)
}
write := func(name, body string) {
path := filepath.Join(scripts, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("losetup", `printf '/dev/loop9\n'`)
write("mount", `exit 0`)
write("umount", `exit 0`)
t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH"))
image := filepath.Join(dir, "image.img")
if err := os.WriteFile(image, make([]byte, 1024), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(image+"p1", []byte(""), 0o644); err != nil {
t.Fatal(err)
}
if got := partitionPath(image, 1); got != image+"p1" {
t.Fatalf("partitionPath existing = %q", got)
}
m, err := Setup(image)
if err != nil {
t.Fatalf("Setup: %v", err)
}
if m.LoopDevice != "/dev/loop9" || m.BootPath == "" || m.RootPath == "" {
t.Fatalf("unexpected mount: %#v", m)
}
if err := Teardown(m); err != nil {
t.Fatalf("Teardown: %v", err)
}
if got := partitionPath("/dev/loop9", 2); got != "/dev/loop92" && got != "/dev/loop9p2" {
t.Fatalf("partitionPath /dev = %q", got)
}
}

67
pkg/mount/mount_test.go Normal file
View File

@ -0,0 +1,67 @@
package mount
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestSetupAndTeardownUseHelperCommands(t *testing.T) {
scripts := fakeCommandDir(t)
t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH"))
image := filepath.Join(t.TempDir(), "disk.img")
if err := os.WriteFile(image, []byte("image"), 0o644); err != nil {
t.Fatal(err)
}
m, err := Setup(image)
if err != nil {
t.Fatalf("Setup: %v", err)
}
if m.LoopDevice != "/dev/loop9" {
t.Fatalf("loop device = %q", m.LoopDevice)
}
if !strings.Contains(m.BootPath, "metis-boot-") || !strings.Contains(m.RootPath, "metis-root-") {
t.Fatalf("unexpected mount paths: %+v", m)
}
if err := Teardown(m); err != nil {
t.Fatalf("Teardown: %v", err)
}
}
func TestPartitionPathFallsBackToNumberSuffix(t *testing.T) {
if got := partitionPath("/dev/loop0", 2); got != "/dev/loop02" {
t.Fatalf("partitionPath = %q", got)
}
}
func TestTeardownNilAndDirectDeviceSetup(t *testing.T) {
if err := Teardown(nil); err != nil {
t.Fatalf("Teardown(nil): %v", err)
}
scripts := fakeCommandDir(t)
t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH"))
if _, err := Setup("/dev/loop9"); err != nil {
t.Fatalf("Setup direct device: %v", err)
}
}
func fakeCommandDir(t *testing.T) string {
t.Helper()
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("losetup", `if [[ "${1:-}" == "-Pf" && "${2:-}" == "--show" ]]; then
printf '/dev/loop9\n'
exit 0
fi
exit 0`)
write("mount", `exit 0`)
write("umount", `exit 0`)
return dir
}

View File

@ -0,0 +1,136 @@
package plan
import (
"context"
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"strings"
"testing"
"time"
"metis/pkg/inventory"
)
func TestPlanBuildFilesAndExecuteBranches(t *testing.T) {
dir := t.TempDir()
base := filepath.Join(dir, "base.img")
baseContent := []byte("image")
if err := os.WriteFile(base, baseContent, 0o644); err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(baseContent)
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "rpi4",
Arch: "arm64",
OS: "armbian",
Image: "file://" + base,
Checksum: "sha256:" + hex.EncodeToString(sum[:]),
DefaultLabels: map[string]string{
"node-role.kubernetes.io/worker": "true",
},
BootOverlay: filepath.Join(dir, "boot-overlay"),
RootOverlay: filepath.Join(dir, "root-overlay"),
}},
Nodes: []inventory.NodeSpec{{
Name: "titan-15",
Class: "rpi4",
Hostname: "titan-15",
IP: "192.168.22.43",
K3sRole: "agent",
K3sURL: "https://192.168.22.7:6443",
K3sToken: "token",
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/var/lib/longhorn", UUID: "u1"}},
}},
}
if err := os.MkdirAll(inv.Classes[0].BootOverlay, 0o755); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(inv.Classes[0].RootOverlay, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(inv.Classes[0].BootOverlay, "boot.txt"), []byte("boot"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(inv.Classes[0].RootOverlay, "root.txt"), []byte("root"), 0o600); err != nil {
t.Fatal(err)
}
if _, err := Build(inv, "missing", "", dir); err == nil {
t.Fatal("expected Build to fail for missing node")
}
p, err := Build(inv, "titan-15", "", dir)
if err != nil {
t.Fatalf("Build: %v", err)
}
if p.Device != "/dev/sdX" || !strings.Contains(strings.Join(actionDetails(p.Actions), " "), "Inject hostname/network/k3s config") {
t.Fatalf("unexpected plan: %#v", p)
}
if got := cacheName("foo.img.xz"); got != "foo.img" {
t.Fatalf("cacheName = %q", got)
}
files, err := Files(inv, "titan-15")
if err != nil {
t.Fatalf("Files: %v", err)
}
if len(files) == 0 {
t.Fatal("expected files")
}
if got := cloudInitUserData(nil, nil); got != "" {
t.Fatalf("cloudInitUserData nil = %q", got)
}
if got := allowK3sNodeLabel("agent", "node-role.kubernetes.io/master"); got {
t.Fatal("agent should reject node-role labels")
}
// Inject is a thin wrapper around maybeInject; exercise both the no-op and
// path-setting branches.
if err := Inject(inv, "titan-15", "", ""); err != nil {
t.Fatalf("Inject noop: %v", err)
}
boot := filepath.Join(dir, "boot")
root := filepath.Join(dir, "root")
if err := os.MkdirAll(boot, 0o755); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(root, 0o755); err != nil {
t.Fatal(err)
}
if err := Inject(inv, "titan-15", boot, root); err != nil {
t.Fatalf("Inject with paths: %v", err)
}
cacheDir := filepath.Join(dir, "cache")
output := filepath.Join(dir, "output.img")
t.Setenv("PATH", dir)
if err := BuildImageFile(context.Background(), inv, "titan-15", cacheDir, output); err == nil {
t.Fatal("expected BuildImageFile to fail without xz/debugfs setup")
}
if _, err := Execute(inv, "titan-15", "/dev/sdX", cacheDir, true); err == nil {
t.Fatal("expected Execute to reject placeholder device")
}
if _, err := Execute(inv, "titan-15", "/dev/sdz", cacheDir, false); err != nil {
t.Fatalf("Execute dry-run: %v", err)
}
}
func actionDetails(actions []Action) []string {
out := make([]string, 0, len(actions))
for _, action := range actions {
out = append(out, action.Detail)
}
return out
}
func TestPlanMiscBranches(t *testing.T) {
if !NextRunStale(timeNow().Add(-time.Hour), time.Minute) {
t.Fatal("expected NextRunStale")
}
}
func timeNow() time.Time { return time.Now() }

View File

@ -0,0 +1,127 @@
package plan
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
"metis/pkg/inventory"
)
func TestFilesAndInjectWithSecretsAndOverlays(t *testing.T) {
vault := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/v1/secret/data/nodes/titan-15" {
http.NotFound(w, r)
return
}
_ = json.NewEncoder(w).Encode(map[string]any{
"data": map[string]any{
"data": map[string]any{
"cloud_init": "#cloud-config\nmanage_etc_hosts: true\n",
"k3s_token": "secret-token",
"extra": map[string]string{"foo": "bar"},
},
},
})
}))
defer vault.Close()
t.Setenv("VAULT_ADDR", vault.URL)
t.Setenv("VAULT_TOKEN", "tok")
dir := t.TempDir()
bootOverlay := filepath.Join(dir, "boot-overlay")
rootOverlay := filepath.Join(dir, "root-overlay")
if err := os.MkdirAll(filepath.Join(bootOverlay, "over"), 0o755); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Join(rootOverlay, "etc"), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(bootOverlay, "over", "cmdline.txt"), []byte("console=tty1"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(rootOverlay, "etc", "issue"), []byte("hello"), 0o644); err != nil {
t.Fatal(err)
}
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "c1",
Arch: "arm64",
OS: "linux",
Image: "file:///tmp/base.img",
BootOverlay: bootOverlay,
RootOverlay: rootOverlay,
}},
Nodes: []inventory.NodeSpec{{
Name: "titan-15",
Class: "c1",
Hostname: "titan-15",
IP: "192.168.22.43",
K3sRole: "agent",
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
}},
}
files, err := Files(inv, "titan-15")
if err != nil {
t.Fatalf("Files: %v", err)
}
var sawSecret, sawBootOverlay, sawRootOverlay, sawCloudInit bool
for _, f := range files {
switch {
case f.Path == "etc/metis/secrets.json":
sawSecret = true
case f.Path == "over/cmdline.txt":
sawBootOverlay = true
case f.Path == "etc/issue":
sawRootOverlay = true
case f.Path == "user-data":
sawCloudInit = strings.Contains(string(f.Content), "manage_etc_hosts: true")
}
}
if !sawSecret || !sawBootOverlay || !sawRootOverlay || !sawCloudInit {
t.Fatalf("missing generated files: secret=%v boot=%v root=%v cloudinit=%v", sawSecret, sawBootOverlay, sawRootOverlay, sawCloudInit)
}
bootDir := filepath.Join(dir, "boot")
rootDir := filepath.Join(dir, "root")
if err := os.MkdirAll(bootDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(rootDir, 0o755); err != nil {
t.Fatal(err)
}
if err := Inject(inv, "titan-15", bootDir, rootDir); err != nil {
t.Fatalf("Inject: %v", err)
}
if _, err := os.Stat(filepath.Join(bootDir, "over", "cmdline.txt")); err != nil {
t.Fatalf("expected boot overlay file: %v", err)
}
if _, err := os.Stat(filepath.Join(rootDir, "etc/metis/node.json")); err != nil {
t.Fatalf("expected injected rootfs file: %v", err)
}
}
func TestNextRunStale(t *testing.T) {
if !NextRunStale(time.Now().Add(-2*time.Hour), time.Hour) {
t.Fatal("expected stale run")
}
if NextRunStale(time.Now(), time.Hour) {
t.Fatal("did not expect fresh run to be stale")
}
}
func TestAllowK3sNodeLabelRules(t *testing.T) {
if allowK3sNodeLabel("agent", "node-role.kubernetes.io/worker") {
t.Fatal("agent should block node-role labels")
}
if !allowK3sNodeLabel("server", "node-role.kubernetes.io/worker") {
t.Fatal("server should allow node-role labels")
}
}

View File

@ -2,6 +2,7 @@ package plan
import (
"os"
"path/filepath"
"testing"
"metis/pkg/inventory"
@ -37,3 +38,39 @@ func TestBuildIncludesInjectWhenEnvSet(t *testing.T) {
t.Fatalf("expected inject action when METIS_BOOT_PATH set")
}
}
func TestBuildAndExecuteErrorBranches(t *testing.T) {
inv := &inventory.Inventory{}
if _, err := Build(inv, "missing", "/dev/sdz", "/tmp/cache"); err == nil {
t.Fatal("expected Build to fail for missing node")
}
if got := checksumFromInventory(inv, "missing"); got != "" {
t.Fatalf("checksumFromInventory missing node = %q", got)
}
dir := t.TempDir()
raw := filepath.Join(dir, "base.img")
if err := os.WriteFile(raw, []byte("image"), 0o644); err != nil {
t.Fatal(err)
}
sum := imageChecksum(t, raw)
inv = &inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "c1",
Arch: "arm64",
OS: "linux",
Image: "file://" + raw,
Checksum: sum,
}},
Nodes: []inventory.NodeSpec{{
Name: "n1",
Class: "c1",
Hostname: "n1",
IP: "10.0.0.1",
K3sRole: "agent",
}},
}
if _, err := Execute(inv, "n1", "/dev/sdX", filepath.Join(dir, "cache"), true); err == nil {
t.Fatal("expected placeholder device rejection")
}
}

199
pkg/plan/workflow_test.go Normal file
View File

@ -0,0 +1,199 @@
package plan
import (
"context"
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"testing"
"metis/pkg/inventory"
)
func TestExecuteAndBuildImageFileWithFakes(t *testing.T) {
rootTools := fakeRootfsTools(t)
mountTools := fakeMountTools(t)
t.Setenv("PATH", rootTools+string(os.PathListSeparator)+mountTools+string(os.PathListSeparator)+os.Getenv("PATH"))
dir := t.TempDir()
rawImage := filepath.Join(dir, "base.img")
if err := os.WriteFile(rawImage, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
sum := imageChecksum(t, rawImage)
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "c1",
Arch: "arm64",
OS: "linux",
Image: "file://" + rawImage,
Checksum: sum,
}},
Nodes: []inventory.NodeSpec{{
Name: "n1",
Class: "c1",
Hostname: "n1",
IP: "10.0.0.1",
K3sRole: "agent",
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
}},
}
planDry, err := Execute(inv, "n1", filepath.Join(dir, "disk.img"), filepath.Join(dir, "cache"), false)
if err != nil {
t.Fatalf("Execute dry-run: %v", err)
}
if planDry.Node != "n1" || len(planDry.Actions) == 0 {
t.Fatalf("unexpected dry-run plan: %#v", planDry)
}
bootDir := filepath.Join(dir, "boot")
rootDir := filepath.Join(dir, "root")
if err := os.MkdirAll(bootDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(rootDir, 0o755); err != nil {
t.Fatal(err)
}
t.Setenv("METIS_BOOT_PATH", bootDir)
t.Setenv("METIS_ROOT_PATH", rootDir)
t.Setenv("METIS_AUTO_MOUNT", "1")
written := filepath.Join(dir, "written.img")
planRun, err := Execute(inv, "n1", written, filepath.Join(dir, "cache2"), true)
if err != nil {
t.Fatalf("Execute confirm: %v", err)
}
if planRun.Image != "file://"+rawImage {
t.Fatalf("unexpected plan image: %#v", planRun)
}
if _, err := os.Stat(written); err != nil {
t.Fatalf("expected written image: %v", err)
}
if _, err := os.Stat(filepath.Join(rootDir, "etc/metis/firstboot.env")); err != nil {
t.Fatalf("expected injected rootfs file: %v", err)
}
}
func TestBuildImageFileMaterializesRootFS(t *testing.T) {
rootTools := fakeRootfsTools(t)
t.Setenv("PATH", rootTools+string(os.PathListSeparator)+os.Getenv("PATH"))
dir := t.TempDir()
rawImage := filepath.Join(dir, "base.img")
if err := os.WriteFile(rawImage, make([]byte, 4096), 0o644); err != nil {
t.Fatal(err)
}
sum := imageChecksum(t, rawImage)
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "c1",
Arch: "arm64",
OS: "linux",
Image: "file://" + rawImage,
Checksum: sum,
}},
Nodes: []inventory.NodeSpec{{
Name: "n1",
Class: "c1",
Hostname: "n1",
IP: "10.0.0.1",
K3sRole: "agent",
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
}},
}
out := filepath.Join(dir, "output.img")
if err := BuildImageFile(context.Background(), inv, "n1", filepath.Join(dir, "cache"), out); err != nil {
t.Fatalf("BuildImageFile: %v", err)
}
if _, err := os.Stat(out); err != nil {
t.Fatalf("expected output image: %v", err)
}
}
func TestMaybeInjectNoopsWhenEnvUnset(t *testing.T) {
if err := maybeInject(&inventory.Inventory{}, "n1"); err != nil {
t.Fatalf("maybeInject without env: %v", err)
}
}
func TestChecksumFromInventoryAndCacheName(t *testing.T) {
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{Name: "c1", Checksum: "sha256:deadbeef"}},
Nodes: []inventory.NodeSpec{{Name: "n1", Class: "c1"}},
}
if got := checksumFromInventory(inv, "n1"); got != "sha256:deadbeef" {
t.Fatalf("checksumFromInventory = %q", got)
}
if got := cacheName("/tmp/archive/base.img.xz"); got != "base.img" {
t.Fatalf("cacheName = %q", got)
}
}
func fakeRootfsTools(t *testing.T) string {
t.Helper()
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("sfdisk", `cat <<'JSON'
{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}}
JSON`)
write("debugfs", `if [[ "${1:-}" == "-w" ]]; then
cp "${3:-}" "${4:-}.commands"
exit 0
fi
if [[ "${1:-}" == "-R" ]]; then
state="${3:-}.commands"
set -- $2
case "${1:-}" in
stat)
mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)"
mode="${mode: -4}"
printf 'Mode: %s\n' "${mode}"
exit 0
;;
dump)
local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)"
cat "${local_path}" > "${3:-}"
exit 0
;;
esac
fi
exit 0`)
return dir
}
func fakeMountTools(t *testing.T) string {
t.Helper()
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("losetup", `if [[ "${1:-}" == "-Pf" && "${2:-}" == "--show" ]]; then
printf '/dev/loop9\n'
exit 0
fi
exit 0`)
write("mount", `exit 0`)
write("umount", `exit 0`)
return dir
}
func imageChecksum(t *testing.T, path string) string {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(data)
return "sha256:" + hex.EncodeToString(sum[:])
}

View File

@ -0,0 +1,52 @@
package secrets
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestClientLoginAndFetchBranches(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/auth/approle/login"):
_ = json.NewEncoder(w).Encode(map[string]any{"auth": map[string]any{"client_token": "token"}})
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/secret/data/nodes/missing"):
w.WriteHeader(http.StatusNotFound)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/secret/data/nodes/error"):
http.Error(w, "boom", http.StatusInternalServerError)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/secret/data/nodes/node1"):
_ = json.NewEncoder(w).Encode(map[string]any{
"data": map[string]any{
"data": map[string]any{"k3s_token": "abc", "cloud_init": "ci"},
},
})
default:
http.NotFound(w, r)
}
}))
defer srv.Close()
cli := &Client{Addr: srv.URL, RoleID: "role", SecretID: "secret", Client: srv.Client()}
if err := cli.LoginIfNeeded(context.Background()); err != nil {
t.Fatalf("LoginIfNeeded: %v", err)
}
if cli.Token != "token" {
t.Fatalf("expected token, got %q", cli.Token)
}
if got, err := cli.FetchNode(context.Background(), "missing"); err != nil || got == nil || got.K3sToken != "" {
t.Fatalf("FetchNode missing = %#v err=%v", got, err)
}
if _, err := cli.FetchNode(context.Background(), "error"); err == nil {
t.Fatal("expected FetchNode error")
}
if got, err := cli.FetchNode(context.Background(), "node1"); err != nil || got.K3sToken != "abc" {
t.Fatalf("FetchNode node1 = %#v err=%v", got, err)
}
if cli.httpClient() == nil {
t.Fatal("httpClient returned nil")
}
}

View File

@ -81,6 +81,8 @@ func (c *Client) LoginIfNeeded(ctx context.Context) error {
return nil
}
// FetchNode loads per-node secret material because burn-time injection needs
// a single read path that can fall back to empty secrets when Vault has no row.
// FetchNode pulls secret/data/nodes/<hostname>.
func (c *Client) FetchNode(ctx context.Context, hostname string) (*NodeSecrets, error) {
if err := c.LoginIfNeeded(ctx); err != nil {

View File

@ -74,3 +74,47 @@ func TestApproRoleLogin(t *testing.T) {
t.Fatalf("approle login not called")
}
}
func TestLoginIfNeededNoopWithToken(t *testing.T) {
c := &Client{Addr: "http://example.invalid", Token: "existing"}
if err := c.LoginIfNeeded(context.Background()); err != nil {
t.Fatalf("LoginIfNeeded: %v", err)
}
if c.Token != "existing" {
t.Fatalf("token unexpectedly changed")
}
}
func TestNewFromEnvPopulatesCredentials(t *testing.T) {
t.Setenv("VAULT_ADDR", "http://vault.example")
t.Setenv("VAULT_TOKEN", "tok")
t.Setenv("VAULT_ROLE_ID", "role")
t.Setenv("VAULT_SECRET_ID", "secret")
c := NewFromEnv()
if c.Addr != "http://vault.example" || c.Token != "tok" || c.RoleID != "role" || c.SecretID != "secret" {
t.Fatalf("unexpected env client: %+v", c)
}
}
func TestFetchNodeAndLoginErrorBranches(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/v1/auth/approle/login":
http.Error(w, "denied", http.StatusForbidden)
case "/v1/secret/data/nodes/missing":
http.Error(w, "down", http.StatusInternalServerError)
default:
http.NotFound(w, r)
}
}))
defer srv.Close()
c := &Client{Addr: srv.URL, RoleID: "r", SecretID: "s", Client: srv.Client()}
if _, err := c.FetchNode(context.Background(), "missing"); err == nil {
t.Fatal("expected approle login failure")
}
c = &Client{Addr: srv.URL, Token: "tok", Client: srv.Client()}
if _, err := c.FetchNode(context.Background(), "missing"); err == nil {
t.Fatal("expected fetch error for 500 response")
}
}

View File

@ -0,0 +1,87 @@
package sentinel
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestCollectUsesCommandOutputAndPkgSample(t *testing.T) {
dir := fakeSentinelCommands(t)
t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH"))
snap := Collect()
if snap.Hostname != "titan-13" || snap.Kernel != "6.6.63" || snap.OSImage != "Metis OS" {
t.Fatalf("unexpected snapshot: %+v", snap)
}
if snap.K3sVersion != "v1.31.5+k3s1" || snap.Containerd != "1.7.99" {
t.Fatalf("unexpected runtime facts: %+v", snap)
}
if len(snap.PackageSample) != 4 || snap.PackageSample["k3s"] != "v1.31.5+k3s1" {
t.Fatalf("unexpected package sample: %+v", snap.PackageSample)
}
}
func TestCommandOutputUsesNsenterWhenRequested(t *testing.T) {
dir := fakeSentinelCommands(t)
t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH"))
t.Setenv("METIS_SENTINEL_NSENTER", "1")
got, err := commandOutput("ignored", "arg")
if err != nil {
t.Fatalf("commandOutput: %v", err)
}
if strings.TrimSpace(string(got)) != "nsenter-ok" {
t.Fatalf("unexpected nsenter output: %q", string(got))
}
}
func TestRunAndTrimAndPkgVersionFallbacks(t *testing.T) {
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("cat", `printf 'ID=metis\n'`)
write("rpm", `exit 1`)
t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH"))
if got := runAndTrim("missing-command"); got != "" {
t.Fatalf("runAndTrim missing command = %q", got)
}
if got := osRelease(); got != "" {
t.Fatalf("osRelease without PRETTY_NAME = %q", got)
}
if got := pkgVersion("does-not-exist"); got != "" {
t.Fatalf("pkgVersion fallback = %q", got)
}
}
func fakeSentinelCommands(t *testing.T) string {
t.Helper()
dir := t.TempDir()
write := func(name, body string) {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil {
t.Fatalf("write %s: %v", name, err)
}
}
write("hostname", `printf 'titan-13\n'`)
write("uname", `printf '6.6.63\n'`)
write("k3s", `printf 'v1.31.5+k3s1\n'`)
write("containerd", `printf '1.7.99\n'`)
write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`)
write("dpkg-query", `case "${@: -1}" in
containerd) printf '1.7.99\n' ;;
k3s) printf 'v1.31.5+k3s1\n' ;;
nvidia-container-toolkit) printf '1.16.2\n' ;;
linux-image-raspi) printf '6.6.63\n' ;;
*) printf '1.0.0\n' ;;
esac`)
write("rpm", `printf '1.0.0\n'`)
write("nsenter", `printf 'nsenter-ok\n'`)
return dir
}

View File

@ -1,12 +1,8 @@
package service
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
@ -18,6 +14,8 @@ import (
"metis/pkg/sentinel"
)
// JobStatus identifies the current lifecycle state of a queued job because
// the UI and metrics need a stable shared vocabulary for progress updates.
type JobStatus string
const (
@ -313,418 +311,3 @@ func (a *App) WatchSentinel() (*Event, error) {
a.metrics.SetDriftTargets(nextTargets, len(changes))
return event, nil
}
func (a *App) newJob(kind, node, host, device string) *Job {
job := &Job{
ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()),
Kind: kind,
Node: node,
Host: host,
Device: device,
Status: JobQueued,
ProgressPct: 0,
StartedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
}
a.mu.Lock()
a.jobs[job.ID] = job
a.mu.Unlock()
return job
}
func (a *App) job(id string) *Job {
a.mu.RLock()
defer a.mu.RUnlock()
return a.jobs[id]
}
func (a *App) setJob(id string, update func(*Job)) {
a.mu.Lock()
defer a.mu.Unlock()
job := a.jobs[id]
if job == nil {
return
}
update(job)
job.UpdatedAt = time.Now().UTC()
}
func (a *App) failJob(id string, err error) {
a.completeJob(id, func(j *Job) {
j.Status = JobError
j.Error = err.Error()
j.Message = err.Error()
})
}
func (a *App) completeJob(id string, update func(*Job)) {
a.mu.Lock()
defer a.mu.Unlock()
job := a.jobs[id]
if job == nil {
return
}
update(job)
if job.Status != JobError {
job.Status = JobDone
}
job.UpdatedAt = time.Now().UTC()
job.FinishedAt = time.Now().UTC()
}
func (a *App) appendEvent(event Event) {
line, err := json.Marshal(event)
if err != nil {
return
}
f, err := os.OpenFile(a.settings.HistoryPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
return
}
defer f.Close()
_, _ = f.Write(append(line, '\n'))
}
func (a *App) recentEvents(limit int) []Event {
f, err := os.Open(a.settings.HistoryPath)
if err != nil {
return nil
}
defer f.Close()
events := make([]Event, 0, limit)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
var event Event
if err := json.Unmarshal(scanner.Bytes(), &event); err != nil {
continue
}
events = append(events, event)
}
if len(events) > limit {
events = events[len(events)-limit:]
}
for i, j := 0, len(events)-1; i < j; i, j = i+1, j-1 {
events[i], events[j] = events[j], events[i]
}
return events
}
func cachedImageName(source string) string {
return strings.TrimSuffix(filepath.Base(source), ".xz")
}
func (a *App) replacementNodes() []inventory.NodeSpec {
nodes := make([]inventory.NodeSpec, 0, len(a.inventory.Nodes))
for _, node := range a.inventory.Nodes {
spec, class, err := a.inventory.FindNode(node.Name)
if err != nil {
continue
}
if replacementReady(spec, class) {
nodes = append(nodes, node)
}
}
sort.Slice(nodes, func(i, j int) bool {
return nodes[i].Name < nodes[j].Name
})
return nodes
}
func (a *App) ensureReplacementReady(nodeName string) error {
node, class, err := a.inventory.FindNode(nodeName)
if err != nil {
return err
}
if replacementReady(node, class) {
return nil
}
return fmt.Errorf("node %s does not yet have a complete replacement definition", nodeName)
}
func replacementReady(node *inventory.NodeSpec, class *inventory.NodeClass) bool {
if node == nil || class == nil {
return false
}
if strings.TrimSpace(class.Image) == "" || strings.TrimSpace(class.Checksum) == "" {
return false
}
if strings.TrimSpace(node.Name) == "" || strings.TrimSpace(node.Hostname) == "" || strings.TrimSpace(node.IP) == "" {
return false
}
if strings.TrimSpace(node.K3sRole) == "" {
return false
}
if strings.TrimSpace(node.K3sRole) != "server" && strings.TrimSpace(node.K3sURL) == "" {
return false
}
if strings.TrimSpace(node.K3sToken) == "" {
return false
}
if strings.TrimSpace(node.SSHUser) == "" || len(node.SSHAuthorized) == 0 {
return false
}
return true
}
func (a *App) flashHosts() []string {
hosts := map[string]struct{}{}
for _, host := range a.settings.FlashHosts {
if value := strings.TrimSpace(host); value != "" {
hosts[value] = struct{}{}
}
}
for _, host := range []string{a.settings.DefaultFlashHost, a.settings.LocalHost} {
if value := strings.TrimSpace(host); value != "" {
hosts[value] = struct{}{}
}
}
for _, node := range clusterNodes() {
if value := strings.TrimSpace(node.Name); value != "" {
hosts[value] = struct{}{}
}
}
out := make([]string, 0, len(hosts))
for host := range hosts {
out = append(out, host)
}
sort.Strings(out)
if a.settings.DefaultFlashHost == "" {
return out
}
return moveToFront(out, a.settings.DefaultFlashHost)
}
func (a *App) loadSnapshots() error {
data, err := os.ReadFile(a.settings.SnapshotsPath)
if err != nil {
return err
}
var snapshots map[string]SnapshotRecord
if err := json.Unmarshal(data, &snapshots); err != nil {
return err
}
a.mu.Lock()
a.snapshots = snapshots
a.mu.Unlock()
for _, snap := range snapshots {
a.metrics.RecordSnapshot(snap.Node, "ok", snap.CollectedAt)
}
return nil
}
func (a *App) persistSnapshots() error {
a.mu.RLock()
data, err := json.MarshalIndent(a.snapshots, "", " ")
a.mu.RUnlock()
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(a.settings.SnapshotsPath), 0o755); err != nil {
return err
}
return os.WriteFile(a.settings.SnapshotsPath, data, 0o644)
}
func (a *App) loadTargets() error {
data, err := os.ReadFile(a.settings.TargetsPath)
if err != nil {
return err
}
var targets map[string]facts.Targets
if err := json.Unmarshal(data, &targets); err != nil {
return err
}
a.mu.Lock()
a.targets = targets
a.mu.Unlock()
a.metrics.SetDriftTargets(targets, 0)
return nil
}
func (a *App) persistTargets() error {
a.mu.RLock()
data, err := json.MarshalIndent(a.targets, "", " ")
a.mu.RUnlock()
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(a.settings.TargetsPath), 0o755); err != nil {
return err
}
return os.WriteFile(a.settings.TargetsPath, data, 0o644)
}
func diffTargets(prev, next map[string]facts.Targets) []string {
classes := map[string]struct{}{}
for class := range prev {
classes[class] = struct{}{}
}
for class := range next {
classes[class] = struct{}{}
}
out := make([]string, 0)
for class := range classes {
if !targetsEqual(prev[class], next[class]) {
out = append(out, class)
}
}
sort.Strings(out)
return out
}
func targetsEqual(a, b facts.Targets) bool {
if a.Kernel != b.Kernel || a.OSImage != b.OSImage || a.Containerd != b.Containerd || a.K3sVersion != b.K3sVersion {
return false
}
if len(a.Packages) != len(b.Packages) {
return false
}
for key, value := range a.Packages {
if b.Packages[key] != value {
return false
}
}
return true
}
func humanBytes(value int64) string {
const unit = 1024
if value < unit {
return fmt.Sprintf("%d B", value)
}
div, exp := int64(unit), 0
for n := value / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %ciB", float64(value)/float64(div), "KMGTPE"[exp])
}
func firstLine(value string) string {
value = strings.TrimSpace(value)
if idx := strings.IndexByte(value, '\n'); idx >= 0 {
return strings.TrimSpace(value[:idx])
}
return value
}
func preferredDevice(devices []Device) string {
if len(devices) == 0 {
return ""
}
return devices[0].Path
}
func errorString(err error) string {
if err == nil {
return ""
}
return err.Error()
}
func cloneDevices(devices []Device) []Device {
if len(devices) == 0 {
return nil
}
out := make([]Device, len(devices))
copy(out, devices)
return out
}
func (a *App) cachedDevices(host string) ([]Device, error) {
host = strings.TrimSpace(host)
if host == "" {
host = a.settings.DefaultFlashHost
}
a.mu.RLock()
snapshot, ok := a.deviceStore[host]
a.mu.RUnlock()
if !ok {
return nil, nil
}
if strings.TrimSpace(snapshot.Err) != "" {
return cloneDevices(snapshot.Devices), errors.New(snapshot.Err)
}
return cloneDevices(snapshot.Devices), nil
}
func (a *App) recordDevices(host string, devices []Device, err error) {
host = strings.TrimSpace(host)
if host == "" {
host = a.settings.DefaultFlashHost
}
snapshot := deviceSnapshot{
Devices: cloneDevices(devices),
CheckedAt: time.Now().UTC(),
}
if err != nil {
snapshot.Err = err.Error()
}
a.mu.Lock()
if existing, ok := a.deviceStore[host]; ok && len(snapshot.Devices) == 0 {
snapshot.Devices = cloneDevices(existing.Devices)
}
a.deviceStore[host] = snapshot
a.mu.Unlock()
}
func deviceScore(device Device) int {
score := 0
model := strings.ToLower(strings.TrimSpace(device.Model))
switch {
case strings.Contains(model, "microsd"), strings.Contains(model, "micro sd"):
score += 60
case strings.Contains(model, "sdxc"), strings.Contains(model, "sdhc"), strings.Contains(model, "sd "):
score += 50
case strings.Contains(model, "card"), strings.Contains(model, "reader"):
score += 40
}
if device.Removable {
score += 20
}
if device.Hotplug {
score += 10
}
if device.Transport == "usb" {
score += 5
}
if strings.HasPrefix(device.Name, "mmcblk") {
score += 25
}
return score
}
func moveToFront(values []string, preferred string) []string {
if preferred == "" || len(values) < 2 {
return values
}
out := append([]string{}, values...)
for idx, value := range out {
if value != preferred {
continue
}
copy(out[1:idx+1], out[:idx])
out[0] = preferred
return out
}
return out
}
func deleteNodeObject(node string) error {
if err := deleteNodeObjectInCluster(node); err == nil {
return nil
}
cmd := exec.Command("kubectl", "delete", "node", node, "--ignore-not-found")
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("delete node: %w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}
func deleteNodeObjectInCluster(node string) error {
kube, err := inClusterKubeClient()
if err != nil {
return errors.New("not running in cluster")
}
return kube.deleteRequest(fmt.Sprintf("/api/v1/nodes/%s", node))
}

432
pkg/service/app_helpers.go Normal file
View File

@ -0,0 +1,432 @@
package service
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"metis/pkg/facts"
"metis/pkg/inventory"
)
func (a *App) newJob(kind, node, host, device string) *Job {
job := &Job{
ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()),
Kind: kind,
Node: node,
Host: host,
Device: device,
Status: JobQueued,
ProgressPct: 0,
StartedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
}
a.mu.Lock()
a.jobs[job.ID] = job
a.mu.Unlock()
return job
}
func (a *App) job(id string) *Job {
a.mu.RLock()
defer a.mu.RUnlock()
return a.jobs[id]
}
func (a *App) setJob(id string, update func(*Job)) {
a.mu.Lock()
defer a.mu.Unlock()
job := a.jobs[id]
if job == nil {
return
}
update(job)
job.UpdatedAt = time.Now().UTC()
}
func (a *App) failJob(id string, err error) {
a.completeJob(id, func(j *Job) {
j.Status = JobError
j.Error = err.Error()
j.Message = err.Error()
})
}
func (a *App) completeJob(id string, update func(*Job)) {
a.mu.Lock()
defer a.mu.Unlock()
job := a.jobs[id]
if job == nil {
return
}
update(job)
if job.Status != JobError {
job.Status = JobDone
}
job.UpdatedAt = time.Now().UTC()
job.FinishedAt = time.Now().UTC()
}
func (a *App) appendEvent(event Event) {
line, err := json.Marshal(event)
if err != nil {
return
}
f, err := os.OpenFile(a.settings.HistoryPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
return
}
defer f.Close()
_, _ = f.Write(append(line, '\n'))
}
func (a *App) recentEvents(limit int) []Event {
f, err := os.Open(a.settings.HistoryPath)
if err != nil {
return nil
}
defer f.Close()
events := make([]Event, 0, limit)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
var event Event
if err := json.Unmarshal(scanner.Bytes(), &event); err != nil {
continue
}
events = append(events, event)
}
if len(events) > limit {
events = events[len(events)-limit:]
}
for i, j := 0, len(events)-1; i < j; i, j = i+1, j-1 {
events[i], events[j] = events[j], events[i]
}
return events
}
func cachedImageName(source string) string {
return strings.TrimSuffix(filepath.Base(source), ".xz")
}
func (a *App) replacementNodes() []inventory.NodeSpec {
nodes := make([]inventory.NodeSpec, 0, len(a.inventory.Nodes))
for _, node := range a.inventory.Nodes {
spec, class, err := a.inventory.FindNode(node.Name)
if err != nil {
continue
}
if replacementReady(spec, class) {
nodes = append(nodes, node)
}
}
sort.Slice(nodes, func(i, j int) bool {
return nodes[i].Name < nodes[j].Name
})
return nodes
}
func (a *App) ensureReplacementReady(nodeName string) error {
node, class, err := a.inventory.FindNode(nodeName)
if err != nil {
return err
}
if replacementReady(node, class) {
return nil
}
return fmt.Errorf("node %s does not yet have a complete replacement definition", nodeName)
}
func replacementReady(node *inventory.NodeSpec, class *inventory.NodeClass) bool {
if node == nil || class == nil {
return false
}
if strings.TrimSpace(class.Image) == "" || strings.TrimSpace(class.Checksum) == "" {
return false
}
if strings.TrimSpace(node.Name) == "" || strings.TrimSpace(node.Hostname) == "" || strings.TrimSpace(node.IP) == "" {
return false
}
if strings.TrimSpace(node.K3sRole) == "" {
return false
}
if strings.TrimSpace(node.K3sRole) != "server" && strings.TrimSpace(node.K3sURL) == "" {
return false
}
if strings.TrimSpace(node.K3sToken) == "" {
return false
}
if strings.TrimSpace(node.SSHUser) == "" || len(node.SSHAuthorized) == 0 {
return false
}
return true
}
func (a *App) flashHosts() []string {
hosts := map[string]struct{}{}
for _, host := range a.settings.FlashHosts {
if value := strings.TrimSpace(host); value != "" {
hosts[value] = struct{}{}
}
}
for _, host := range []string{a.settings.DefaultFlashHost, a.settings.LocalHost} {
if value := strings.TrimSpace(host); value != "" {
hosts[value] = struct{}{}
}
}
for _, node := range clusterNodes() {
if value := strings.TrimSpace(node.Name); value != "" {
hosts[value] = struct{}{}
}
}
out := make([]string, 0, len(hosts))
for host := range hosts {
out = append(out, host)
}
sort.Strings(out)
if a.settings.DefaultFlashHost == "" {
return out
}
return moveToFront(out, a.settings.DefaultFlashHost)
}
func (a *App) loadSnapshots() error {
data, err := os.ReadFile(a.settings.SnapshotsPath)
if err != nil {
return err
}
var snapshots map[string]SnapshotRecord
if err := json.Unmarshal(data, &snapshots); err != nil {
return err
}
a.mu.Lock()
a.snapshots = snapshots
a.mu.Unlock()
for _, snap := range snapshots {
a.metrics.RecordSnapshot(snap.Node, "ok", snap.CollectedAt)
}
return nil
}
func (a *App) persistSnapshots() error {
a.mu.RLock()
data, err := json.MarshalIndent(a.snapshots, "", " ")
a.mu.RUnlock()
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(a.settings.SnapshotsPath), 0o755); err != nil {
return err
}
return os.WriteFile(a.settings.SnapshotsPath, data, 0o644)
}
func (a *App) loadTargets() error {
data, err := os.ReadFile(a.settings.TargetsPath)
if err != nil {
return err
}
var targets map[string]facts.Targets
if err := json.Unmarshal(data, &targets); err != nil {
return err
}
a.mu.Lock()
a.targets = targets
a.mu.Unlock()
a.metrics.SetDriftTargets(targets, 0)
return nil
}
func (a *App) persistTargets() error {
a.mu.RLock()
data, err := json.MarshalIndent(a.targets, "", " ")
a.mu.RUnlock()
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(a.settings.TargetsPath), 0o755); err != nil {
return err
}
return os.WriteFile(a.settings.TargetsPath, data, 0o644)
}
func diffTargets(prev, next map[string]facts.Targets) []string {
classes := map[string]struct{}{}
for class := range prev {
classes[class] = struct{}{}
}
for class := range next {
classes[class] = struct{}{}
}
out := make([]string, 0)
for class := range classes {
if !targetsEqual(prev[class], next[class]) {
out = append(out, class)
}
}
sort.Strings(out)
return out
}
func targetsEqual(a, b facts.Targets) bool {
if a.Kernel != b.Kernel || a.OSImage != b.OSImage || a.Containerd != b.Containerd || a.K3sVersion != b.K3sVersion {
return false
}
if len(a.Packages) != len(b.Packages) {
return false
}
for key, value := range a.Packages {
if b.Packages[key] != value {
return false
}
}
return true
}
func humanBytes(value int64) string {
const unit = 1024
if value < unit {
return fmt.Sprintf("%d B", value)
}
div, exp := int64(unit), 0
for n := value / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %ciB", float64(value)/float64(div), "KMGTPE"[exp])
}
func firstLine(value string) string {
value = strings.TrimSpace(value)
if idx := strings.IndexByte(value, '\n'); idx >= 0 {
return strings.TrimSpace(value[:idx])
}
return value
}
func preferredDevice(devices []Device) string {
if len(devices) == 0 {
return ""
}
return devices[0].Path
}
func errorString(err error) string {
if err == nil {
return ""
}
return err.Error()
}
func cloneDevices(devices []Device) []Device {
if len(devices) == 0 {
return nil
}
out := make([]Device, len(devices))
copy(out, devices)
return out
}
func (a *App) cachedDevices(host string) ([]Device, error) {
host = strings.TrimSpace(host)
if host == "" {
host = a.settings.DefaultFlashHost
}
a.mu.RLock()
snapshot, ok := a.deviceStore[host]
a.mu.RUnlock()
if !ok {
return nil, nil
}
if strings.TrimSpace(snapshot.Err) != "" {
return cloneDevices(snapshot.Devices), errors.New(snapshot.Err)
}
return cloneDevices(snapshot.Devices), nil
}
func (a *App) recordDevices(host string, devices []Device, err error) {
host = strings.TrimSpace(host)
if host == "" {
host = a.settings.DefaultFlashHost
}
snapshot := deviceSnapshot{
Devices: cloneDevices(devices),
CheckedAt: time.Now().UTC(),
}
if err != nil {
snapshot.Err = err.Error()
}
a.mu.Lock()
if existing, ok := a.deviceStore[host]; ok && len(snapshot.Devices) == 0 {
snapshot.Devices = cloneDevices(existing.Devices)
}
a.deviceStore[host] = snapshot
a.mu.Unlock()
}
func deviceScore(device Device) int {
score := 0
model := strings.ToLower(strings.TrimSpace(device.Model))
switch {
case strings.Contains(model, "microsd"), strings.Contains(model, "micro sd"):
score += 60
case strings.Contains(model, "sdxc"), strings.Contains(model, "sdhc"), strings.Contains(model, "sd "):
score += 50
case strings.Contains(model, "card"), strings.Contains(model, "reader"):
score += 40
}
if device.Removable {
score += 20
}
if device.Hotplug {
score += 10
}
if device.Transport == "usb" {
score += 5
}
if strings.HasPrefix(device.Name, "mmcblk") {
score += 25
}
return score
}
func moveToFront(values []string, preferred string) []string {
if preferred == "" || len(values) < 2 {
return values
}
out := append([]string{}, values...)
for idx, value := range out {
if value != preferred {
continue
}
copy(out[1:idx+1], out[:idx])
out[0] = preferred
return out
}
return out
}
func deleteNodeObject(node string) error {
if err := deleteNodeObjectInCluster(node); err == nil {
return nil
}
cmd := exec.Command("kubectl", "delete", "node", node, "--ignore-not-found")
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("delete node: %w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}
func deleteNodeObjectInCluster(node string) error {
kube, err := kubeClientFactory()
if err != nil {
return errors.New("not running in cluster")
}
return kube.deleteRequest(fmt.Sprintf("/api/v1/nodes/%s", node))
}

View File

@ -37,17 +37,23 @@ type kubeClient struct {
client *http.Client
}
var kubeClientFactory = inClusterKubeClient
var (
kubeServiceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token"
kubeServiceAccountCAPath = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
)
func inClusterKubeClient() (*kubeClient, error) {
host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST"))
port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT"))
if host == "" || port == "" {
return nil, fmt.Errorf("not running in cluster")
}
token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
token, err := os.ReadFile(kubeServiceAccountTokenPath)
if err != nil {
return nil, err
}
caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt")
caPEM, err := os.ReadFile(kubeServiceAccountCAPath)
if err != nil {
return nil, err
}
@ -118,7 +124,7 @@ func (k *kubeClient) deleteRequest(path string) error {
}
func clusterNodes() []clusterNode {
kube, err := inClusterKubeClient()
kube, err := kubeClientFactory()
if err != nil {
return nil
}
@ -164,7 +170,7 @@ func (a *App) podImageForArch(arch string) string {
}
func (a *App) runRemotePod(jobID, podName string, podSpec map[string]any) (string, error) {
kube, err := inClusterKubeClient()
kube, err := kubeClientFactory()
if err != nil {
return "", err
}

123
pkg/service/cluster_test.go Normal file
View File

@ -0,0 +1,123 @@
package service
import (
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
)
func TestInClusterKubeClientMissingEnv(t *testing.T) {
t.Setenv("KUBERNETES_SERVICE_HOST", "")
t.Setenv("KUBERNETES_SERVICE_PORT", "")
if _, err := inClusterKubeClient(); err == nil {
t.Fatal("expected inClusterKubeClient error without env")
}
}
func TestKubeClientAndPodHelpers(t *testing.T) {
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
_ = json.NewEncoder(w).Encode(map[string]any{
"items": []any{
map[string]any{
"metadata": map[string]any{"name": "b", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}},
"spec": map[string]any{"unschedulable": false},
},
map[string]any{
"metadata": map[string]any{"name": "a", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}},
"spec": map[string]any{"unschedulable": false},
},
},
})
case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"):
w.WriteHeader(http.StatusCreated)
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/nodes/"):
w.WriteHeader(http.StatusNotFound)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/") && strings.HasSuffix(r.URL.Path, "/log"):
http.Error(w, "proxy error from 127.0.0.1:6443", http.StatusBadGateway)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"):
_ = json.NewEncoder(w).Encode(map[string]any{
"metadata": map[string]any{"name": filepath.Base(r.URL.Path)},
"status": map[string]any{
"phase": "Failed",
"reason": "CrashLoopBackOff",
"message": "boom",
"containerStatuses": []any{
map[string]any{
"state": map[string]any{
"waiting": map[string]any{"reason": "ImagePullBackOff", "message": "pulling"},
"terminated": map[string]any{"reason": "Completed", "message": "done"},
},
},
},
},
})
default:
http.NotFound(w, r)
}
}))
defer kube.Close()
client := kubeClientFactoryForURL(kube.URL, kube.Client())
if err := client.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &map[string]any{}); err != nil {
t.Fatalf("jsonRequest: %v", err)
}
if err := client.deleteRequest("/api/v1/nodes/a"); err != nil {
t.Fatalf("deleteRequest 404 should be nil: %v", err)
}
if err := client.jsonRequest(http.MethodGet, "/missing", nil, &map[string]any{}); err == nil {
t.Fatal("expected jsonRequest failure on 404")
}
origFactory := kubeClientFactory
kubeClientFactory = func() (*kubeClient, error) {
return client, nil
}
t.Cleanup(func() { kubeClientFactory = origFactory })
nodes := clusterNodes()
if len(nodes) != 2 || nodes[0].Name != "a" {
t.Fatalf("clusterNodes sort mismatch: %#v", nodes)
}
app := newTestApp(t)
app.settings.Namespace = "maintenance"
app.settings.RunnerImageARM64 = "runner:arm64"
state, err := app.remotePodState(client, "metis-build-test")
if err != nil {
t.Fatalf("remotePodState: %v", err)
}
if state.Reason != "Completed" || state.Message != "done" {
t.Fatalf("expected terminated state override, got %#v", state)
}
if _, err := app.remotePodLogs(client, "metis-build-test"); err == nil || !strings.Contains(err.Error(), "could not reach the node kubelet log endpoint") {
t.Fatalf("expected kubelet log endpoint error, got %v", err)
}
if _, err := app.runRemotePod("job-1", "metis-fail-test", map[string]any{}); err == nil {
t.Fatal("expected runRemotePod failure")
}
if _, err := app.ensureDevice("titan-22", "missing"); err == nil {
t.Fatal("expected ensureDevice missing target to fail")
}
}
func TestDeleteNodeObjectFallback(t *testing.T) {
tmp := t.TempDir()
kubectl := filepath.Join(tmp, "kubectl")
if err := os.WriteFile(kubectl, []byte("#!/usr/bin/env bash\nset -eu\nprintf '%s' \"$*\" > \""+filepath.Join(tmp, "kubectl.args")+"\"\n"), 0o755); err != nil {
t.Fatal(err)
}
t.Setenv("PATH", tmp+string(os.PathListSeparator)+os.Getenv("PATH"))
origFactory := kubeClientFactory
kubeClientFactory = func() (*kubeClient, error) { return nil, errors.New("offline") }
t.Cleanup(func() { kubeClientFactory = origFactory })
if err := deleteNodeObject("titan-15"); err != nil {
t.Fatalf("deleteNodeObject fallback: %v", err)
}
}

View File

@ -0,0 +1,257 @@
package service
import (
"encoding/json"
"encoding/pem"
"errors"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"metis/pkg/facts"
"metis/pkg/inventory"
)
func TestServiceArtifactAndSnapshotPersistenceErrorBranches(t *testing.T) {
app := newTestApp(t)
fileParent := filepath.Join(t.TempDir(), "blocked")
if err := os.WriteFile(fileParent, []byte("block"), 0o644); err != nil {
t.Fatal(err)
}
app.settings.ArtifactStatePath = filepath.Join(fileParent, "artifacts.json")
if err := app.persistArtifacts(); err == nil {
t.Fatal("expected persistArtifacts to fail when parent is a file")
}
app.settings.SnapshotsPath = filepath.Join(fileParent, "snapshots.json")
if err := app.persistSnapshots(); err == nil {
t.Fatal("expected persistSnapshots to fail when parent is a file")
}
app.settings.TargetsPath = filepath.Join(fileParent, "targets.json")
if err := app.persistTargets(); err == nil {
t.Fatal("expected persistTargets to fail when parent is a file")
}
}
func TestServiceReplacementAndDeviceBranches(t *testing.T) {
app := newTestApp(t)
ready := inventory.NodeSpec{
Name: "ready",
Class: "rpi4",
Hostname: "ready",
IP: "192.168.22.10",
K3sRole: "agent",
K3sURL: "https://192.168.22.1:6443",
K3sToken: "token",
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
}
incomplete := inventory.NodeSpec{Name: "incomplete", Class: "rpi4"}
class := inventory.NodeClass{Name: "rpi4", Image: "file:///tmp/base.img", Checksum: "sha256:abc"}
app.inventory = &inventory.Inventory{Classes: []inventory.NodeClass{class}, Nodes: []inventory.NodeSpec{ready, incomplete}}
if got := app.replacementNodes(); len(got) != 1 || got[0].Name != "ready" {
t.Fatalf("replacementNodes = %#v", got)
}
if err := app.ensureReplacementReady("incomplete"); err == nil {
t.Fatal("expected ensureReplacementReady to reject incomplete node")
}
if diff := diffTargets(map[string]facts.Targets{"a": {Kernel: "1"}}, map[string]facts.Targets{"a": {Kernel: "2"}, "b": {Kernel: "3"}}); len(diff) != 2 {
t.Fatalf("diffTargets = %#v", diff)
}
app.recordDevices("host", []Device{{Path: "/dev/sda"}}, nil)
if got, err := app.cachedDevices("host"); err != nil || len(got) != 1 {
t.Fatalf("cachedDevices = %#v err=%v", got, err)
}
app.recordDevices("host", nil, errors.New("boom"))
if got, err := app.cachedDevices("host"); err == nil || len(got) != 1 {
t.Fatalf("cachedDevices error snapshot = %#v err=%v", got, err)
}
if _, err := app.Replace("incomplete", "titan-22", "/dev/sdz"); err == nil {
t.Fatal("expected Replace to reject incomplete node")
}
}
func TestServiceHarborBranches(t *testing.T) {
harbor := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"):
_, _ = w.Write([]byte(`[]`))
case r.Method == http.MethodPost && r.URL.Path == "/api/v2.0/projects":
w.WriteHeader(http.StatusCreated)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"):
_ = json.NewEncoder(w).Encode([]map[string]any{
{"digest": "sha256:aaa", "push_time": "2026-04-01T10:00:00Z"},
{"digest": "sha256:bbb", "push_time": "2026-04-01T09:00:00Z"},
})
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/artifacts/"):
w.WriteHeader(http.StatusAccepted)
default:
http.Error(w, "boom", http.StatusInternalServerError)
}
}))
defer harbor.Close()
app := &App{settings: Settings{
HarborAPIBase: harbor.URL + "/api/v2.0",
HarborUsername: "admin",
HarborPassword: "pw",
HarborProject: "metis",
HarborRegistry: "registry.example",
}}
if got := app.artifactRepo("node"); got != "registry.example/metis/node" {
t.Fatalf("artifactRepo = %q", got)
}
if err := app.ensureHarborProject(); err != nil {
t.Fatalf("ensureHarborProject create: %v", err)
}
if err := app.pruneHarborArtifacts("node", 1); err != nil {
t.Fatalf("pruneHarborArtifacts: %v", err)
}
}
func TestServiceHarborErrorBranches(t *testing.T) {
harbor := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"):
http.Error(w, "lookup failed", http.StatusInternalServerError)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"):
_ = json.NewEncoder(w).Encode([]map[string]any{
{"digest": "sha256:aaa", "push_time": "2026-04-01T10:00:00Z"},
{"digest": "sha256:bbb", "push_time": "2026-04-01T09:00:00Z"},
})
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/artifacts/"):
http.Error(w, "delete failed", http.StatusInternalServerError)
default:
http.NotFound(w, r)
}
}))
defer harbor.Close()
app := &App{settings: Settings{
HarborAPIBase: harbor.URL + "/api/v2.0",
HarborUsername: "admin",
HarborPassword: "pw",
HarborProject: "metis",
HarborRegistry: "registry.example",
}}
if err := app.ensureHarborProject(); err == nil {
t.Fatal("expected ensureHarborProject error")
}
if err := app.pruneHarborArtifacts("node", 0); err == nil {
t.Fatal("expected pruneHarborArtifacts error")
}
}
func TestServiceClusterAndRemotePodBranches(t *testing.T) {
origTokenPath := kubeServiceAccountTokenPath
origCAPath := kubeServiceAccountCAPath
dir := t.TempDir()
kubeServiceAccountTokenPath = filepath.Join(dir, "token")
kubeServiceAccountCAPath = filepath.Join(dir, "ca.crt")
t.Cleanup(func() {
kubeServiceAccountTokenPath = origTokenPath
kubeServiceAccountCAPath = origCAPath
})
if err := os.WriteFile(kubeServiceAccountTokenPath, []byte("tok"), 0o644); err != nil {
t.Fatal(err)
}
t.Setenv("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc")
t.Setenv("KUBERNETES_SERVICE_PORT", "443")
srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
_ = json.NewEncoder(w).Encode(map[string]any{
"items": []any{
map[string]any{
"metadata": map[string]any{"name": "b", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}},
"spec": map[string]any{"unschedulable": false},
},
map[string]any{
"metadata": map[string]any{"name": "a", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}},
"spec": map[string]any{"unschedulable": false},
},
},
})
case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"):
w.WriteHeader(http.StatusCreated)
case r.Method == http.MethodDelete:
w.WriteHeader(http.StatusOK)
case r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/log"):
_, _ = w.Write([]byte("pod logs"))
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"):
_ = json.NewEncoder(w).Encode(map[string]any{
"metadata": map[string]any{"name": filepath.Base(r.URL.Path)},
"status": map[string]any{
"phase": "Succeeded",
"message": `{"dest_path":"/tmp/out.img"}`,
"reason": "Completed",
},
})
default:
http.NotFound(w, r)
}
}))
defer srv.Close()
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: srv.Certificate().Raw})
if err := os.WriteFile(kubeServiceAccountCAPath, certPEM, 0o644); err != nil {
t.Fatal(err)
}
client, err := inClusterKubeClient()
if err != nil {
t.Fatalf("inClusterKubeClient: %v", err)
}
client.baseURL = srv.URL
client.client = srv.Client()
kubeClientFactory = func() (*kubeClient, error) { return client, nil }
t.Cleanup(func() { kubeClientFactory = inClusterKubeClient })
var nodePayload map[string]any
if err := client.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &nodePayload); err != nil {
t.Fatalf("jsonRequest: %v", err)
}
if err := client.deleteRequest("/api/v1/nodes/a"); err != nil {
t.Fatalf("deleteRequest: %v", err)
}
if nodes := clusterNodes(); len(nodes) != 2 || nodes[0].Name != "a" {
t.Fatalf("clusterNodes = %#v", nodes)
}
app := newTestApp(t)
app.settings.Namespace = "maintenance"
app.settings.RunnerImageARM64 = "runner:arm64"
state, err := app.remotePodState(client, "metis-build-test")
if err != nil {
t.Fatalf("remotePodState: %v", err)
}
if state.Phase != "Succeeded" || state.Message == "" {
t.Fatalf("remotePodState = %#v", state)
}
logs, err := app.remotePodLogs(client, "metis-build-test")
if err != nil || logs != "pod logs" {
t.Fatalf("remotePodLogs = %q err=%v", logs, err)
}
if got := app.podImageForArch("amd64"); got != "" {
t.Fatalf("podImageForArch fallback = %q", got)
}
if got := app.podImageForArch("arm64"); got != "runner:arm64" {
t.Fatalf("podImageForArch arm64 = %q", got)
}
job := app.newJob("build", "titan-15", "titan-22", "/dev/sdz")
app.settings.HarborAPIBase = ""
app.runBuild(job, false)
if got := app.job(job.ID); got == nil || got.Status != JobError {
t.Fatalf("runBuild should fail without harbor creds: %#v", got)
}
if _, err := app.Replace("incomplete", "titan-22", "/dev/sdz"); err == nil {
t.Fatal("expected Replace to reject incomplete node")
}
}

View File

@ -0,0 +1,16 @@
package service
import "testing"
func TestHarborHelpersErrorPaths(t *testing.T) {
app := &App{settings: Settings{HarborRegistry: "reg/", HarborProject: "proj"}}
if got := app.artifactRepo("node"); got != "reg/proj/node" {
t.Fatalf("artifactRepo = %q", got)
}
if err := app.ensureHarborProject(); err == nil {
t.Fatal("expected missing creds error")
}
if err := app.pruneHarborArtifacts("node", 1); err == nil {
t.Fatal("expected pruneHarborArtifacts to require API base")
}
}

355
pkg/service/helpers_test.go Normal file
View File

@ -0,0 +1,355 @@
package service
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"reflect"
"testing"
"time"
"metis/pkg/facts"
"metis/pkg/inventory"
"metis/pkg/sentinel"
)
func TestSettingsHelpersAndSmallUtilities(t *testing.T) {
dataDir := filepath.Join(t.TempDir(), "data")
t.Setenv("METIS_DATA_DIR", dataDir)
t.Setenv("METIS_FLASH_HOSTS", "a, b,, c")
t.Setenv("METIS_MAX_DEVICE_BYTES", "12345")
t.Setenv("METIS_DEFAULT_FLASH_HOST", "flash-1")
t.Setenv("METIS_LOCAL_HOST", "local-1")
settings := FromEnv()
if got, want := settings.CacheDir, filepath.Join(dataDir, "cache"); got != want {
t.Fatalf("CacheDir = %q, want %q", got, want)
}
if settings.DefaultFlashHost != "flash-1" || settings.LocalHost != "local-1" {
t.Fatalf("unexpected env settings: %+v", settings)
}
if settings.MaxDeviceBytes != 12345 {
t.Fatalf("expected MaxDeviceBytes=12345, got %d", settings.MaxDeviceBytes)
}
if !reflect.DeepEqual(splitList("a, b,, c"), []string{"a", "b", "c"}) {
t.Fatalf("splitList mismatch")
}
if got := getenvInt64("METIS_MAX_DEVICE_BYTES", 1); got != 12345 {
t.Fatalf("getenvInt64 = %d", got)
}
if got := hostnameOr("fallback"); got == "" {
t.Fatal("hostnameOr returned empty string")
}
if got := humanBytes(1536); got != "1.5 KiB" {
t.Fatalf("humanBytes = %q", got)
}
if got := firstLine("alpha\nbeta"); got != "alpha" {
t.Fatalf("firstLine = %q", got)
}
if got := moveToFront([]string{"b", "a", "c"}, "a"); !reflect.DeepEqual(got, []string{"a", "b", "c"}) {
t.Fatalf("moveToFront = %#v", got)
}
if got := errorString(nil); got != "" {
t.Fatalf("errorString(nil) = %q", got)
}
if got := preferredDevice([]Device{{Path: "/dev/sda"}}); got != "/dev/sda" {
t.Fatalf("preferredDevice = %q", got)
}
if got := deviceScore(Device{Name: "mmcblk0", Model: "Micro SD card", Removable: true, Hotplug: true, Transport: "usb"}); got <= 0 {
t.Fatalf("expected positive device score, got %d", got)
}
if got := inventoryNodeArch(nil, nil); got != "arm64" {
t.Fatalf("inventoryNodeArch fallback = %q", got)
}
if got := (&App{settings: Settings{HarborRegistry: "reg/", HarborProject: "/proj/"}}).artifactRepo("node"); got != "reg/proj/node" {
t.Fatalf("artifactRepo = %q", got)
}
}
func TestAppJobDeviceAndStateHelpers(t *testing.T) {
app := newTestApp(t)
app.settings.HistoryPath = filepath.Join(t.TempDir(), "history.jsonl")
app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json")
job := app.newJob("build", "titan-15", "titan-22", "/dev/sdz")
if job.Status != JobQueued {
t.Fatalf("new job status = %s", job.Status)
}
app.setJob(job.ID, func(j *Job) {
j.Status = JobRunning
j.Stage = "build"
})
if got := app.job(job.ID); got == nil || got.Status != JobRunning {
t.Fatalf("setJob did not update job: %#v", got)
}
app.completeJob(job.ID, func(j *Job) {
j.Message = "done"
})
if got := app.job(job.ID); got == nil || got.Status != JobDone || got.FinishedAt.IsZero() {
t.Fatalf("completeJob did not finish job: %#v", got)
}
failed := app.newJob("replace", "titan-15", "titan-22", "/dev/sdz")
app.failJob(failed.ID, os.ErrNotExist)
if got := app.job(failed.ID); got == nil || got.Status != JobError || got.Error == "" {
t.Fatalf("failJob did not mark error: %#v", got)
}
app.appendEvent(Event{Kind: "one", Summary: "first"})
app.appendEvent(Event{Kind: "two", Summary: "second"})
events := app.recentEvents(1)
if len(events) != 1 || events[0].Kind != "two" {
t.Fatalf("recentEvents returned %#v", events)
}
app.recordDevices("titan-22", []Device{{Name: "sda", Path: "/dev/sda"}}, nil)
devices, err := app.cachedDevices("titan-22")
if err != nil || len(devices) != 1 || devices[0].Path != "/dev/sda" {
t.Fatalf("cachedDevices = %#v err=%v", devices, err)
}
devices[0].Path = "/dev/mutated"
again, _ := app.cachedDevices("titan-22")
if again[0].Path != "/dev/sda" {
t.Fatalf("cachedDevices should return a copy, got %#v", again)
}
app.recordDevices("titan-22", nil, os.ErrPermission)
if _, err := app.cachedDevices("titan-22"); err == nil {
t.Fatal("expected cached device error")
}
app.recordDevices("titan-22", []Device{{Path: "/dev/sda"}}, nil)
state := app.State("titan-22")
if state.SelectedHost != "titan-22" || state.PreferredDevice == "" {
t.Fatalf("unexpected state: %+v", state)
}
}
func TestAppPersistenceAndTargets(t *testing.T) {
dir := t.TempDir()
invPath := filepath.Join(dir, "inventory.yaml")
if err := os.WriteFile(invPath, []byte(`
classes:
- name: rpi4
arch: arm64
os: armbian
image: file:///tmp/base.img
nodes:
- name: titan-15
class: rpi4
hostname: titan-15
ip: 192.168.22.43
k3s_role: agent
`), 0o644); err != nil {
t.Fatal(err)
}
snapshotsPath := filepath.Join(dir, "snapshots.json")
targetsPath := filepath.Join(dir, "targets.json")
artifactStatePath := filepath.Join(dir, "artifacts.json")
seedSnapshots := map[string]SnapshotRecord{
"titan-15": {
Node: "titan-15",
CollectedAt: testTime(t),
Snapshot: sentinel.Snapshot{Hostname: "titan-15", Kernel: "6.6.63", K3sVersion: "v1.31.5+k3s1"},
},
}
data, _ := json.MarshalIndent(seedSnapshots, "", " ")
if err := os.WriteFile(snapshotsPath, data, 0o644); err != nil {
t.Fatal(err)
}
seedTargets := map[string]facts.Targets{
"rpi4": {Kernel: "6.6.63"},
}
data, _ = json.MarshalIndent(seedTargets, "", " ")
if err := os.WriteFile(targetsPath, data, 0o644); err != nil {
t.Fatal(err)
}
seedArtifacts := map[string]ArtifactSummary{
"titan-15": {Node: "titan-15", Ref: "reg/proj/titan-15:latest"},
}
data, _ = json.MarshalIndent(seedArtifacts, "", " ")
if err := os.WriteFile(artifactStatePath, data, 0o644); err != nil {
t.Fatal(err)
}
app, err := NewApp(Settings{
InventoryPath: invPath,
CacheDir: filepath.Join(dir, "cache"),
ArtifactDir: filepath.Join(dir, "artifacts"),
ArtifactStatePath: artifactStatePath,
HistoryPath: filepath.Join(dir, "history.jsonl"),
SnapshotsPath: snapshotsPath,
TargetsPath: targetsPath,
DefaultFlashHost: "titan-22",
FlashHosts: []string{"titan-22"},
LocalHost: "titan-22",
AllowedGroups: []string{"admin"},
})
if err != nil {
t.Fatalf("NewApp: %v", err)
}
if got := app.artifacts()["titan-15"].Ref; got != "reg/proj/titan-15:latest" {
t.Fatalf("artifacts() = %q", got)
}
if err := app.recordArtifact(ArtifactSummary{Node: "titan-15", Ref: "reg/proj/titan-15:v2"}); err != nil {
t.Fatalf("recordArtifact: %v", err)
}
if err := app.loadArtifacts(); err != nil {
t.Fatalf("loadArtifacts: %v", err)
}
if got := app.artifacts()["titan-15"].Ref; got != "reg/proj/titan-15:v2" {
t.Fatalf("recordArtifact/persist mismatch: %q", got)
}
if err := app.StoreSnapshot(SnapshotRecord{Node: "titan-15", Snapshot: sentinel.Snapshot{Hostname: "titan-15"}}); err != nil {
t.Fatalf("StoreSnapshot: %v", err)
}
if event, err := app.WatchSentinel(); err != nil || event == nil || event.Kind != "sentinel.watch" {
t.Fatalf("WatchSentinel: event=%#v err=%v", event, err)
}
}
func TestHelperBranchesAndPersistenceFailures(t *testing.T) {
app := newTestApp(t)
if got := cachedImageName("/tmp/archive/base.img.xz"); got != "base.img" {
t.Fatalf("cachedImageName = %q", got)
}
if got := humanBytes(1); got != "1 B" {
t.Fatalf("humanBytes(1) = %q", got)
}
if got := humanBytes(1024 * 1024); got != "1.0 MiB" {
t.Fatalf("humanBytes(1MiB) = %q", got)
}
if got := errorString(fmt.Errorf("boom")); got != "boom" {
t.Fatalf("errorString = %q", got)
}
if got := moveToFront([]string{"a", "b", "c"}, "missing"); !reflect.DeepEqual(got, []string{"a", "b", "c"}) {
t.Fatalf("moveToFront missing = %#v", got)
}
if targetsEqual(facts.Targets{Kernel: "a"}, facts.Targets{Kernel: "b"}) {
t.Fatal("targetsEqual should reject differing kernels")
}
if got := deviceScore(Device{Name: "reader", Model: "Card reader", Transport: "usb", Removable: true, Hotplug: true}); got < 75 {
t.Fatalf("unexpected deviceScore: %d", got)
}
if got := cachedImageName("foo.xz"); got != "foo" {
t.Fatalf("cachedImageName alias = %q", got)
}
if got := app.flashHosts(); len(got) == 0 {
t.Fatal("flashHosts returned empty list")
}
app.settings.SnapshotsPath = filepath.Join(t.TempDir(), "missing", "snapshots.json")
if err := app.loadSnapshots(); err == nil {
t.Fatal("expected loadSnapshots error for missing file")
}
app.settings.TargetsPath = filepath.Join(t.TempDir(), "missing", "targets.json")
if err := app.loadTargets(); err == nil {
t.Fatal("expected loadTargets error for missing file")
}
app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "missing", "artifacts.json")
if err := app.loadArtifacts(); err == nil {
t.Fatal("expected loadArtifacts error for missing file")
}
tmpDir := t.TempDir()
app.settings.SnapshotsPath = tmpDir
if err := app.persistSnapshots(); err == nil {
t.Fatal("expected persistSnapshots error when path is a directory")
}
app.settings.TargetsPath = tmpDir
if err := app.persistTargets(); err == nil {
t.Fatal("expected persistTargets error when path is a directory")
}
app.settings.ArtifactStatePath = tmpDir
if err := app.persistArtifacts(); err == nil {
t.Fatal("expected persistArtifacts error when path is a directory")
}
if err := app.StoreSnapshot(SnapshotRecord{}); err == nil {
t.Fatal("expected snapshot validation error")
}
if _, err := app.Build("missing"); err == nil {
t.Fatal("expected Build to reject unknown node")
}
if _, err := app.Replace("missing", "", ""); err == nil {
t.Fatal("expected Replace to reject unknown node")
}
}
func TestNewAppReportsInventoryErrors(t *testing.T) {
settings := Settings{
InventoryPath: filepath.Join(t.TempDir(), "missing.yaml"),
CacheDir: t.TempDir(),
ArtifactDir: t.TempDir(),
ArtifactStatePath: filepath.Join(t.TempDir(), "artifacts.json"),
HistoryPath: filepath.Join(t.TempDir(), "history.jsonl"),
SnapshotsPath: filepath.Join(t.TempDir(), "snapshots.json"),
TargetsPath: filepath.Join(t.TempDir(), "targets.json"),
}
if _, err := NewApp(settings); err == nil {
t.Fatal("expected NewApp inventory error")
}
}
func TestAppHelperNoopAndInvalidStateBranches(t *testing.T) {
app := newTestApp(t)
app.setJob("missing", func(*Job) { t.Fatal("setJob should not run for missing job") })
app.completeJob("missing", func(*Job) { t.Fatal("completeJob should not run for missing job") })
app.failJob("missing", os.ErrNotExist)
if replacementReady(nil, nil) {
t.Fatal("replacementReady nil should be false")
}
if replacementReady(&inventory.NodeSpec{}, &inventory.NodeClass{}) {
t.Fatal("replacementReady empty should be false")
}
app.inventory = &inventory.Inventory{}
if got := app.replacementNodes(); len(got) != 0 {
t.Fatalf("replacementNodes empty inventory = %#v", got)
}
app.settings.FlashHosts = []string{"titan-22"}
app.settings.DefaultFlashHost = ""
if got := app.flashHosts(); len(got) == 0 {
t.Fatal("flashHosts should still include cluster nodes")
}
if !replacementReady(&inventory.NodeSpec{
Name: "ready",
Hostname: "ready",
IP: "192.168.22.10",
K3sRole: "agent",
K3sURL: "https://192.168.22.1:6443",
K3sToken: "token",
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
}, &inventory.NodeClass{Image: "img", Checksum: "sum"}) {
t.Fatal("replacementReady valid node should be true")
}
fileParent := filepath.Join(t.TempDir(), "blocked")
if err := os.WriteFile(fileParent, []byte("block"), 0o644); err != nil {
t.Fatal(err)
}
app.settings.HistoryPath = filepath.Join(fileParent, "history.jsonl")
app.appendEvent(Event{Kind: "noop"})
if got := app.recentEvents(1); got != nil {
t.Fatalf("recentEvents missing file = %#v", got)
}
kube := fakeKubeServer(t)
installKubeFactory(t, kube)
if err := deleteNodeObjectInCluster("titan-15"); err != nil {
t.Fatalf("deleteNodeObjectInCluster success: %v", err)
}
}
func testTime(t *testing.T) time.Time {
t.Helper()
return time.Date(2026, time.March, 31, 12, 0, 0, 0, time.UTC)
}

View File

@ -37,18 +37,24 @@ func NewMetrics() *Metrics {
}
}
// RecordBuild increments the per-node build counter because the UI and
// Prometheus graphs need a stable view of build outcomes by node.
func (m *Metrics) RecordBuild(node, status string) {
m.mu.Lock()
defer m.mu.Unlock()
m.builds[counterKey(node, status)]++
}
// RecordFlash increments the per-node and per-host flash counter because the
// replacement workflow needs separate visibility for build and burn stages.
func (m *Metrics) RecordFlash(node, host, status string) {
m.mu.Lock()
defer m.mu.Unlock()
m.flashes[counterKey(node, host, status)]++
}
// RecordSnapshot tracks accepted sentinel snapshots because drift detection
// depends on the last successful push per node.
func (m *Metrics) RecordSnapshot(node, status string, ts time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
@ -58,6 +64,8 @@ func (m *Metrics) RecordSnapshot(node, status string, ts time.Time) {
}
}
// RecordWatch increments the sentinel watch outcome counter because the
// dashboard needs to show whether the latest reconciliation succeeded.
func (m *Metrics) RecordWatch(status string) {
m.mu.Lock()
defer m.mu.Unlock()
@ -67,6 +75,8 @@ func (m *Metrics) RecordWatch(status string) {
}
}
// SetDriftTargets refreshes the target-count gauge because the UI exposes how
// much class configuration is already populated versus still missing.
func (m *Metrics) SetDriftTargets(targets map[string]facts.Targets, changed int) {
m.mu.Lock()
defer m.mu.Unlock()

View File

@ -0,0 +1,33 @@
package service
import (
"strings"
"testing"
"time"
"metis/pkg/facts"
)
func TestMetricsRenderAndKeyHelpers(t *testing.T) {
metrics := NewMetrics()
metrics.RecordBuild("n1", "ok")
metrics.RecordFlash("n1", "h1", "ok")
metrics.RecordSnapshot("n1", "ok", time.Unix(123, 0))
metrics.RecordWatch("ok")
metrics.SetDriftTargets(map[string]facts.Targets{
"c1": {Kernel: "k", Packages: map[string]string{"p": "1"}},
}, 2)
var b strings.Builder
metrics.Render(&b)
out := b.String()
if !strings.Contains(out, `metis_builds_total{node="n1",status="ok"} 1`) {
t.Fatalf("missing build metric: %s", out)
}
if !strings.Contains(out, `metis_class_target_fields{class="c1"} 2`) {
t.Fatalf("missing target metric: %s", out)
}
if got := splitKey("a", 3); len(got) != 3 || got[0] != "a" || got[1] != "" || got[2] != "" {
t.Fatalf("splitKey fallback = %#v", got)
}
}

View File

@ -3,13 +3,9 @@ package service
import (
"encoding/json"
"fmt"
"math"
"path/filepath"
"sort"
"strings"
"time"
"metis/pkg/inventory"
)
const (
@ -20,10 +16,14 @@ const (
vaultSSHKeysSecretPath = "kv/data/atlas/maintenance/metis-ssh-keys"
)
// ListDevices returns cached device data because the UI needs a cheap refresh
// path while remote enumeration is still in flight.
func (a *App) ListDevices(host string) ([]Device, error) {
return a.cachedDevices(host)
}
// RefreshDevices rebuilds the flash-device list because the chooser needs the
// latest host-specific USB inventory before a burn can start.
func (a *App) RefreshDevices(host string) ([]Device, error) {
if host == "" {
host = a.settings.DefaultFlashHost
@ -280,360 +280,3 @@ func (a *App) heartbeatRemoteJob(jobID string) {
}
})
}
func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64, string) {
seconds := elapsed.Seconds()
switch {
case seconds < 20:
return ramp(seconds, 0, 20, 8, 14), fmt.Sprintf("Scheduling a remote builder on %s for %s", builder, node)
case seconds < 120:
return ramp(seconds, 20, 120, 14, 30), fmt.Sprintf("Injecting %s recovery config into the base image on %s", node, builder)
case seconds < 360:
return ramp(seconds, 120, 360, 30, 58), fmt.Sprintf("Building the replacement image filesystem for %s on %s", node, builder)
case seconds < 540:
return ramp(seconds, 360, 540, 58, 70), fmt.Sprintf("Compressing the replacement image for %s before upload", node)
default:
return math.Min(76, ramp(seconds, 540, 900, 70, 76)), fmt.Sprintf("Publishing %s to Harbor and refreshing the latest tag", node)
}
}
func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) {
seconds := elapsed.Seconds()
switch {
case seconds < 10:
return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host)
case seconds < 45:
return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host)
default:
return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host)
}
}
func prettyDeviceTarget(path string) string {
switch {
case strings.HasPrefix(path, "hosttmp://"):
return "/tmp"
case strings.TrimSpace(path) == "":
return "the selected target"
default:
return path
}
}
func ramp(value, start, end, min, max float64) float64 {
if end <= start {
return max
}
if value <= start {
return min
}
if value >= end {
return max
}
return min + ((value-start)/(end-start))*(max-min)
}
func (a *App) ensureDevice(host, path string) (*Device, error) {
if strings.TrimSpace(path) == "" {
return nil, fmt.Errorf("select removable media before starting a flash run")
}
devices, err := a.RefreshDevices(host)
if err != nil {
return nil, err
}
for _, device := range devices {
if device.Path == path {
return &device, nil
}
}
return nil, fmt.Errorf("device %s is not a current flash candidate on %s", path, host)
}
func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) {
nodes := clusterNodes()
storageNodes := map[string]struct{}{}
for _, node := range a.inventory.Nodes {
if len(node.LonghornDisks) > 0 {
storageNodes[node.Name] = struct{}{}
}
}
type scored struct {
node clusterNode
score int
}
candidates := make([]scored, 0)
for _, node := range nodes {
if node.Arch != arch || node.Unschedulable || node.ControlPlane {
continue
}
score := 0
if node.Worker {
score += 40
}
switch arch {
case "arm64":
if node.Hardware == "rpi5" {
score += 30
}
if _, storage := storageNodes[node.Name]; storage {
score -= 50
}
case "amd64":
if node.Name == a.settings.DefaultFlashHost {
score += 30
}
if node.Name == "titan-24" {
score -= 10
}
}
if flashHost != "" && node.Name == flashHost {
score += 5
}
candidates = append(candidates, scored{node: node, score: score})
}
sort.Slice(candidates, func(i, j int) bool {
if candidates[i].score != candidates[j].score {
return candidates[i].score > candidates[j].score
}
return candidates[i].node.Name < candidates[j].node.Name
})
if len(candidates) == 0 {
return clusterNode{}, fmt.Errorf("no build host available for arch %s", arch)
}
return candidates[0].node, nil
}
func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "devices"},
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-devices",
"image": image,
"imagePullPolicy": "Always",
"command": []string{
"metis", "remote-devices",
"--max-device-bytes", fmt.Sprintf("%d", a.settings.MaxDeviceBytes),
"--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir),
},
"securityContext": map[string]any{"privileged": true, "runAsUser": 0},
"volumeMounts": []map[string]any{
{"name": "host-dev", "mountPath": "/dev"},
{"name": "host-sys", "mountPath": "/sys", "readOnly": true},
{"name": "host-udev", "mountPath": "/run/udev", "readOnly": true},
{"name": "host-tmp", "mountPath": "/host-tmp"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}},
{"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}},
{"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}},
{"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}},
},
},
}
}
func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "build"},
"annotations": vaultRuntimeAnnotations(true),
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-build",
"image": image,
"imagePullPolicy": "Always",
"command": []string{"/bin/sh", "-c"},
"args": []string{
remoteWorkerEntrypoint(
true,
"remote-build",
"--inventory", a.settings.InventoryPath,
"--node", node,
"--cache", "/workspace/cache",
"--work-dir", "/workspace/build",
"--artifact-ref", artifactRef,
"--build-tag", buildTag,
"--harbor-registry", a.settings.HarborRegistry,
),
},
"envFrom": []map[string]any{
{"configMapRef": map[string]any{"name": "metis"}},
},
"volumeMounts": []map[string]any{
{"name": "workspace", "mountPath": "/workspace"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "workspace", "emptyDir": map[string]any{}},
},
},
}
}
func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "flash"},
"annotations": vaultRuntimeAnnotations(false),
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-flash",
"image": image,
"imagePullPolicy": "Always",
"command": []string{"/bin/sh", "-c"},
"args": []string{
remoteWorkerEntrypoint(
false,
"remote-flash",
"--node", node,
"--device", device,
"--artifact-ref", artifactRef,
"--work-dir", "/workspace/flash",
"--harbor-registry", a.settings.HarborRegistry,
"--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir),
),
},
"securityContext": map[string]any{"privileged": true, "runAsUser": 0},
"envFrom": []map[string]any{
{"configMapRef": map[string]any{"name": "metis"}},
},
"volumeMounts": []map[string]any{
{"name": "workspace", "mountPath": "/workspace"},
{"name": "host-dev", "mountPath": "/dev"},
{"name": "host-sys", "mountPath": "/sys", "readOnly": true},
{"name": "host-udev", "mountPath": "/run/udev", "readOnly": true},
{"name": "host-tmp", "mountPath": "/host-tmp"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "workspace", "emptyDir": map[string]any{}},
{"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}},
{"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}},
{"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}},
{"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}},
},
},
}
}
func (a *App) remoteArtifactNote(node string) string {
if summary, ok := a.artifacts()[node]; ok && strings.TrimSpace(summary.Ref) != "" {
return summary.Ref
}
return a.artifactRepo(node) + ":latest"
}
func inventoryNodeArch(spec *inventory.NodeSpec, class *inventory.NodeClass) string {
if class != nil && strings.TrimSpace(class.Arch) != "" {
return strings.TrimSpace(class.Arch)
}
return "arm64"
}
func mountedHostTmpDir(path string) string {
path = strings.TrimSpace(path)
switch {
case path == "", path == "/tmp":
return "/host-tmp"
case strings.HasPrefix(path, "/tmp/"):
return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/tmp/"))
default:
return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/"))
}
}
func vaultRuntimeAnnotations(includeSSHKeys bool) map[string]string {
annotations := map[string]string{
"vault.hashicorp.com/agent-inject": "true",
"vault.hashicorp.com/agent-pre-populate-only": "true",
"vault.hashicorp.com/role": vaultRoleMaintenance,
"vault.hashicorp.com/agent-inject-secret-metis-runtime-env.sh": vaultRuntimeSecretPath,
"vault.hashicorp.com/agent-inject-template-metis-runtime-env.sh": `{{ with secret "kv/data/atlas/maintenance/metis-runtime" }}
export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}"
{{ end }}`,
"vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh": vaultHarborSecretPath,
"vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh": `{{ with secret "kv/data/atlas/harbor/harbor-core" }}
export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}"
{{ end }}`,
}
if includeSSHKeys {
annotations["vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh"] = vaultSSHKeysSecretPath
annotations["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"] = `{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }}
export METIS_SSH_KEY_BASTION="{{ .Data.data.bastion_pub }}"
export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}"
export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}"
{{ end }}`
}
return annotations
}
func remoteWorkerEntrypoint(includeSSHKeys bool, args ...string) string {
lines := []string{
"set -e",
". /vault/secrets/metis-runtime-env.sh",
". /vault/secrets/metis-harbor-env.sh",
}
if includeSSHKeys {
lines = append(lines, ". /vault/secrets/metis-ssh-env.sh")
}
lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...))
return strings.Join(lines, "\n")
}
func shellJoin(args ...string) string {
quoted := make([]string, 0, len(args))
for _, arg := range args {
quoted = append(quoted, shellQuote(arg))
}
return strings.Join(quoted, " ")
}
func shellQuote(value string) string {
if value == "" {
return "''"
}
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}

View File

@ -0,0 +1,61 @@
package service
import (
"path/filepath"
"testing"
"time"
)
func TestRemoteWorkflowErrorBranches(t *testing.T) {
kube := fakeKubeServer(t)
installKubeFactory(t, kube)
app := newTestApp(t)
app.settings.Namespace = "maintenance"
app.settings.RunnerImageARM64 = ""
if _, err := app.RefreshDevices("titan-22"); err == nil {
t.Fatal("expected RefreshDevices to fail without runner image")
}
job := app.newJob("build", "titan-15", "", "")
app.runBuild(job, false)
if got := app.job(job.ID); got == nil || got.Status != JobError {
t.Fatalf("expected build job error, got %#v", got)
}
job = app.newJob("flash", "titan-15", "titan-22", "/dev/sdz")
if err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil {
t.Fatal("expected flashArtifact error")
}
app.setJob(job.ID, func(j *Job) {
j.Status = JobRunning
j.Stage = "build"
j.StageStartedAt = time.Now().Add(-30 * time.Second)
})
app.heartbeatRemoteJob(job.ID)
if got := app.job(job.ID); got == nil || got.ProgressPct == 0 {
t.Fatalf("expected heartbeat progress, got %#v", got)
}
}
func TestRemoteWorkflowMissingRunnerImageBranch(t *testing.T) {
kube := fakeKubeServer(t)
harbor := fakeHarborServer(t, true)
installKubeFactory(t, kube)
app := newTestApp(t)
app.settings.Namespace = "maintenance"
app.settings.RunnerImageARM64 = ""
app.settings.HarborAPIBase = harbor.URL + "/api/v2.0"
app.settings.HarborUsername = "admin"
app.settings.HarborPassword = "pw"
app.settings.HarborProject = "metis"
app.settings.HarborRegistry = "registry.example"
app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json")
job := app.newJob("build", "titan-15", "", "")
app.runBuild(job, false)
if got := app.job(job.ID); got == nil || got.Status != JobError {
t.Fatalf("expected build job error, got %#v", got)
}
}

View File

@ -0,0 +1,369 @@
package service
import (
"fmt"
"math"
"path/filepath"
"sort"
"strings"
"time"
"metis/pkg/inventory"
)
func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64, string) {
seconds := elapsed.Seconds()
switch {
case seconds < 20:
return ramp(seconds, 0, 20, 8, 14), fmt.Sprintf("Scheduling a remote builder on %s for %s", builder, node)
case seconds < 120:
return ramp(seconds, 20, 120, 14, 30), fmt.Sprintf("Injecting %s recovery config into the base image on %s", node, builder)
case seconds < 360:
return ramp(seconds, 120, 360, 30, 58), fmt.Sprintf("Building the replacement image filesystem for %s on %s", node, builder)
case seconds < 540:
return ramp(seconds, 360, 540, 58, 70), fmt.Sprintf("Compressing the replacement image for %s before upload", node)
default:
return math.Min(76, ramp(seconds, 540, 900, 70, 76)), fmt.Sprintf("Publishing %s to Harbor and refreshing the latest tag", node)
}
}
func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) {
seconds := elapsed.Seconds()
switch {
case seconds < 10:
return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host)
case seconds < 45:
return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host)
default:
return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host)
}
}
func prettyDeviceTarget(path string) string {
switch {
case strings.HasPrefix(path, "hosttmp://"):
return "/tmp"
case strings.TrimSpace(path) == "":
return "the selected target"
default:
return path
}
}
func ramp(value, start, end, min, max float64) float64 {
if end <= start {
return max
}
if value <= start {
return min
}
if value >= end {
return max
}
return min + ((value-start)/(end-start))*(max-min)
}
func (a *App) ensureDevice(host, path string) (*Device, error) {
if strings.TrimSpace(path) == "" {
return nil, fmt.Errorf("select removable media before starting a flash run")
}
devices, err := a.RefreshDevices(host)
if err != nil {
return nil, err
}
for _, device := range devices {
if device.Path == path {
return &device, nil
}
}
return nil, fmt.Errorf("device %s is not a current flash candidate on %s", path, host)
}
func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) {
nodes := clusterNodes()
storageNodes := map[string]struct{}{}
for _, node := range a.inventory.Nodes {
if len(node.LonghornDisks) > 0 {
storageNodes[node.Name] = struct{}{}
}
}
type scored struct {
node clusterNode
score int
}
candidates := make([]scored, 0)
for _, node := range nodes {
if node.Arch != arch || node.Unschedulable || node.ControlPlane {
continue
}
score := 0
if node.Worker {
score += 40
}
switch arch {
case "arm64":
if node.Hardware == "rpi5" {
score += 30
}
if _, storage := storageNodes[node.Name]; storage {
score -= 50
}
case "amd64":
if node.Name == a.settings.DefaultFlashHost {
score += 30
}
if node.Name == "titan-24" {
score -= 10
}
}
if flashHost != "" && node.Name == flashHost {
score += 5
}
candidates = append(candidates, scored{node: node, score: score})
}
sort.Slice(candidates, func(i, j int) bool {
if candidates[i].score != candidates[j].score {
return candidates[i].score > candidates[j].score
}
return candidates[i].node.Name < candidates[j].node.Name
})
if len(candidates) == 0 {
return clusterNode{}, fmt.Errorf("no build host available for arch %s", arch)
}
return candidates[0].node, nil
}
func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "devices"},
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-devices",
"image": image,
"imagePullPolicy": "Always",
"command": []string{
"metis", "remote-devices",
"--max-device-bytes", fmt.Sprintf("%d", a.settings.MaxDeviceBytes),
"--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir),
},
"securityContext": map[string]any{"privileged": true, "runAsUser": 0},
"volumeMounts": []map[string]any{
{"name": "host-dev", "mountPath": "/dev"},
{"name": "host-sys", "mountPath": "/sys", "readOnly": true},
{"name": "host-udev", "mountPath": "/run/udev", "readOnly": true},
{"name": "host-tmp", "mountPath": "/host-tmp"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}},
{"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}},
{"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}},
{"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}},
},
},
}
}
func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "build"},
"annotations": vaultRuntimeAnnotations(true),
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-build",
"image": image,
"imagePullPolicy": "Always",
"command": []string{"/bin/sh", "-c"},
"args": []string{
remoteWorkerEntrypoint(
true,
"remote-build",
"--inventory", a.settings.InventoryPath,
"--node", node,
"--cache", "/workspace/cache",
"--work-dir", "/workspace/build",
"--artifact-ref", artifactRef,
"--build-tag", buildTag,
"--harbor-registry", a.settings.HarborRegistry,
),
},
"envFrom": []map[string]any{
{"configMapRef": map[string]any{"name": "metis"}},
},
"volumeMounts": []map[string]any{
{"name": "workspace", "mountPath": "/workspace"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "workspace", "emptyDir": map[string]any{}},
},
},
}
}
func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef string) map[string]any {
return map[string]any{
"apiVersion": "v1",
"kind": "Pod",
"metadata": map[string]any{
"name": name,
"namespace": a.settings.Namespace,
"labels": map[string]string{"app": "metis-remote", "metis-run": "flash"},
"annotations": vaultRuntimeAnnotations(false),
},
"spec": map[string]any{
"restartPolicy": "Never",
"serviceAccountName": "metis",
"nodeSelector": map[string]string{
"kubernetes.io/hostname": host,
},
"containers": []map[string]any{
{
"name": "remote-flash",
"image": image,
"imagePullPolicy": "Always",
"command": []string{"/bin/sh", "-c"},
"args": []string{
remoteWorkerEntrypoint(
false,
"remote-flash",
"--node", node,
"--device", device,
"--artifact-ref", artifactRef,
"--work-dir", "/workspace/flash",
"--harbor-registry", a.settings.HarborRegistry,
"--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir),
),
},
"securityContext": map[string]any{"privileged": true, "runAsUser": 0},
"envFrom": []map[string]any{
{"configMapRef": map[string]any{"name": "metis"}},
},
"volumeMounts": []map[string]any{
{"name": "workspace", "mountPath": "/workspace"},
{"name": "host-dev", "mountPath": "/dev"},
{"name": "host-sys", "mountPath": "/sys", "readOnly": true},
{"name": "host-udev", "mountPath": "/run/udev", "readOnly": true},
{"name": "host-tmp", "mountPath": "/host-tmp"},
},
},
},
"imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}},
"volumes": []map[string]any{
{"name": "workspace", "emptyDir": map[string]any{}},
{"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}},
{"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}},
{"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}},
{"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}},
},
},
}
}
func (a *App) remoteArtifactNote(node string) string {
if summary, ok := a.artifacts()[node]; ok && strings.TrimSpace(summary.Ref) != "" {
return summary.Ref
}
return a.artifactRepo(node) + ":latest"
}
func inventoryNodeArch(spec *inventory.NodeSpec, class *inventory.NodeClass) string {
if class != nil && strings.TrimSpace(class.Arch) != "" {
return strings.TrimSpace(class.Arch)
}
return "arm64"
}
func mountedHostTmpDir(path string) string {
path = strings.TrimSpace(path)
switch {
case path == "", path == "/tmp":
return "/host-tmp"
case strings.HasPrefix(path, "/tmp/"):
return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/tmp/"))
default:
return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/"))
}
}
func vaultRuntimeAnnotations(includeSSHKeys bool) map[string]string {
annotations := map[string]string{
"vault.hashicorp.com/agent-inject": "true",
"vault.hashicorp.com/agent-pre-populate-only": "true",
"vault.hashicorp.com/role": vaultRoleMaintenance,
"vault.hashicorp.com/agent-inject-secret-metis-runtime-env.sh": vaultRuntimeSecretPath,
"vault.hashicorp.com/agent-inject-template-metis-runtime-env.sh": `{{ with secret "kv/data/atlas/maintenance/metis-runtime" }}
export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}"
{{ end }}`,
"vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh": vaultHarborSecretPath,
"vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh": `{{ with secret "kv/data/atlas/harbor/harbor-core" }}
export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}"
{{ end }}`,
}
if includeSSHKeys {
annotations["vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh"] = vaultSSHKeysSecretPath
annotations["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"] = `{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }}
export METIS_SSH_KEY_BASTION="{{ .Data.data.bastion_pub }}"
export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}"
export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}"
{{ end }}`
}
return annotations
}
func remoteWorkerEntrypoint(includeSSHKeys bool, args ...string) string {
lines := []string{
"set -e",
". /vault/secrets/metis-runtime-env.sh",
". /vault/secrets/metis-harbor-env.sh",
}
if includeSSHKeys {
lines = append(lines, ". /vault/secrets/metis-ssh-env.sh")
}
lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...))
return strings.Join(lines, "\n")
}
func shellJoin(args ...string) string {
quoted := make([]string, 0, len(args))
for _, arg := range args {
quoted = append(quoted, shellQuote(arg))
}
return strings.Join(quoted, " ")
}
func shellQuote(value string) string {
if value == "" {
return "''"
}
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}

View File

@ -0,0 +1,68 @@
package service
import (
"strings"
"testing"
"time"
"metis/pkg/inventory"
)
func TestRemoteHelperBranches(t *testing.T) {
if got := prettyDeviceTarget(""); got != "the selected target" {
t.Fatalf("prettyDeviceTarget empty = %q", got)
}
if got := prettyDeviceTarget("hosttmp:///tmp"); got != "/tmp" {
t.Fatalf("prettyDeviceTarget hosttmp = %q", got)
}
if got := ramp(0, 10, 20, 1, 2); got != 1 {
t.Fatalf("ramp before start = %v", got)
}
if got := mountedHostTmpDir("/tmp/metis-flash-test"); got != "/host-tmp/metis-flash-test" {
t.Fatalf("mountedHostTmpDir = %q", got)
}
if got := shellQuote("a'b"); got != `'a'"'"'b'` {
t.Fatalf("shellQuote = %q", got)
}
if got, msg := buildStageHeartbeat("n1", "b1", 5*time.Second); got < 8 || !strings.Contains(msg, "Scheduling") {
t.Fatalf("buildStageHeartbeat early = %v %q", got, msg)
}
if got, msg := flashStageHeartbeat("h1", "artifact", 15*time.Second); got < 88 || !strings.Contains(msg, "Writing") {
t.Fatalf("flashStageHeartbeat = %v %q", got, msg)
}
app := newTestApp(t)
app.artifactStore["n1"] = ArtifactSummary{Node: "n1", Ref: "registry.example/metis/n1:latest"}
if got := app.remoteArtifactNote("n1"); got != "registry.example/metis/n1:latest" {
t.Fatalf("remoteArtifactNote = %q", got)
}
if got := inventoryNodeArch(&inventory.NodeSpec{}, &inventory.NodeClass{Arch: "amd64"}); got != "amd64" {
t.Fatalf("inventoryNodeArch = %q", got)
}
worker := remoteWorkerEntrypoint(true, "--node", "n1")
if !strings.Contains(worker, "metis-runtime-env.sh") || !strings.Contains(worker, "metis-ssh-env.sh") {
t.Fatalf("remoteWorkerEntrypoint missing expected sources: %s", worker)
}
}
func TestSelectBuilderHostPrefersWorkerAndArch(t *testing.T) {
kube := fakeKubeServer(t)
installKubeFactory(t, kube)
app := newTestApp(t)
node, err := app.selectBuilderHost("arm64", "titan-22")
if err != nil {
t.Fatalf("selectBuilderHost: %v", err)
}
if node.Name != "titan-22" {
t.Fatalf("expected titan-22 builder, got %s", node.Name)
}
}
func TestSelectBuilderHostErrorBranch(t *testing.T) {
kube := fakeKubeServer(t)
installKubeFactory(t, kube)
app := newTestApp(t)
if _, err := app.selectBuilderHost("s390x", "titan-22"); err == nil {
t.Fatal("expected selectBuilderHost error")
}
}

View File

@ -234,721 +234,3 @@ func writeJSON(w http.ResponseWriter, status int, payload any) {
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(payload)
}
var metisPage = template.Must(template.New("metis").Parse(`<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Metis Control</title>
<style>
:root{
--bg:#081018;
--bg-soft:#0e1722;
--panel:#101c29;
--panel-strong:#172535;
--line:rgba(149,177,205,.18);
--line-strong:rgba(149,177,205,.28);
--ink:#f3f7fb;
--muted:#9bb0c4;
--brand:#3da7ff;
--brand-deep:#1c6ca8;
--accent:#ff9a4a;
--success:#3dd08c;
--danger:#ff6f6f;
--warn:#f2c14c;
--shadow:0 24px 60px rgba(0,0,0,.35);
}
*{box-sizing:border-box}
body{
margin:0;
min-height:100vh;
font-family:"Avenir Next","Trebuchet MS","Segoe UI",sans-serif;
color:var(--ink);
background:
radial-gradient(circle at top left, rgba(61,167,255,.20), transparent 28rem),
radial-gradient(circle at top right, rgba(255,154,74,.16), transparent 26rem),
linear-gradient(180deg, #071018 0%, #0a131d 50%, #0b1622 100%);
}
.frame{
max-width:1320px;
margin:0 auto;
padding:2rem 1.25rem 3rem;
}
.mast{
display:flex;
justify-content:space-between;
align-items:flex-end;
gap:1.5rem;
margin-bottom:1rem;
}
.eyebrow{
letter-spacing:.14em;
text-transform:uppercase;
font-size:.72rem;
color:#81c6ff;
margin-bottom:.35rem;
font-weight:800;
}
h1{
margin:0;
font-size:clamp(2rem,4vw,3.4rem);
line-height:1;
}
.sub{
max-width:56rem;
color:var(--muted);
margin-top:.7rem;
font-size:1rem;
}
.badge{
display:inline-flex;
align-items:center;
gap:.45rem;
padding:.78rem 1rem;
background:rgba(16,28,41,.82);
border:1px solid var(--line);
border-radius:999px;
box-shadow:var(--shadow);
font-size:.92rem;
}
.banner{
display:flex;
align-items:flex-start;
gap:.75rem;
padding:1rem 1.1rem;
border-radius:1rem;
border:1px solid var(--line);
background:rgba(16,28,41,.84);
margin-bottom:1rem;
box-shadow:var(--shadow);
}
.banner.hidden{display:none}
.banner.info{border-color:rgba(61,167,255,.32);background:rgba(17,36,54,.88)}
.banner.success{border-color:rgba(61,208,140,.32);background:rgba(10,41,31,.9)}
.banner.error{border-color:rgba(255,111,111,.32);background:rgba(56,18,18,.9)}
.banner.warn{border-color:rgba(242,193,76,.32);background:rgba(53,40,13,.9)}
.banner strong{display:block;margin-bottom:.15rem}
.grid{
display:grid;
grid-template-columns:1.16fr .9fr;
gap:1rem;
}
.stack{display:grid;gap:1rem}
.card{
background:linear-gradient(180deg, rgba(16,28,41,.95), rgba(12,21,31,.94));
border:1px solid var(--line);
border-radius:1.35rem;
padding:1.1rem;
box-shadow:var(--shadow);
}
.card h2{
margin:0 0 .35rem;
font-size:1rem;
text-transform:uppercase;
letter-spacing:.1em;
color:#8bccff;
}
.hint{
color:var(--muted);
font-size:.92rem;
margin-bottom:1rem;
}
.microcopy{
color:var(--muted);
font-size:.84rem;
margin-top:.5rem;
min-height:1.2rem;
}
.form-grid{
display:grid;
grid-template-columns:repeat(2,minmax(0,1fr));
gap:.85rem;
}
label{
display:grid;
gap:.35rem;
font-weight:700;
font-size:.92rem;
}
select, button{
width:100%;
border-radius:.95rem;
border:1px solid var(--line-strong);
padding:.9rem .95rem;
font:inherit;
}
select{
background:rgba(11,21,33,.96);
color:var(--ink);
min-height:3.1rem;
}
select:focus, button:focus{
outline:2px solid rgba(61,167,255,.28);
outline-offset:2px;
}
button{
cursor:pointer;
background:linear-gradient(135deg,var(--brand) 0%,var(--brand-deep) 100%);
color:#fff;
border:none;
font-weight:800;
letter-spacing:.03em;
box-shadow:0 14px 30px rgba(17,66,102,.30);
transition:transform .12s ease, opacity .12s ease;
}
button:hover{transform:translateY(-1px)}
button.secondary{
background:rgba(18,30,43,.96);
color:var(--ink);
border:1px solid var(--line-strong);
box-shadow:none;
}
button:disabled{
opacity:.55;
cursor:not-allowed;
transform:none;
}
.actions{
display:grid;
grid-template-columns:repeat(3,minmax(0,1fr));
gap:.7rem;
margin-top:.9rem;
}
.list{
display:grid;
gap:.7rem;
max-height:30rem;
overflow:auto;
}
.item{
border:1px solid rgba(149,177,205,.14);
border-radius:1rem;
padding:.85rem .95rem;
background:rgba(8,17,27,.78);
}
.item-head{
display:flex;
justify-content:space-between;
gap:1rem;
margin-bottom:.35rem;
font-weight:700;
}
.meta{
color:var(--muted);
font-size:.85rem;
}
.bar{
height:.55rem;
background:rgba(149,177,205,.12);
border-radius:999px;
overflow:hidden;
margin-top:.7rem;
}
.bar > span{
display:block;
height:100%;
background:linear-gradient(90deg,var(--accent),var(--brand));
}
.pill{
display:inline-block;
padding:.2rem .55rem;
border-radius:999px;
font-size:.75rem;
text-transform:uppercase;
letter-spacing:.08em;
background:rgba(61,167,255,.12);
color:#9bd1ff;
}
.pill.done{background:rgba(61,208,140,.12);color:var(--success)}
.pill.error{background:rgba(255,111,111,.12);color:var(--danger)}
.pill.running{background:rgba(255,154,74,.14);color:var(--accent)}
.mini{
display:grid;
grid-template-columns:repeat(2,minmax(0,1fr));
gap:.7rem;
}
.stat{
padding:.8rem .9rem;
border-radius:1rem;
background:rgba(8,17,27,.72);
border:1px solid rgba(149,177,205,.12);
}
.stat strong{display:block;font-size:1.35rem}
.row{
display:flex;
justify-content:space-between;
gap:1rem;
align-items:center;
}
code{
font-family:"IBM Plex Mono","SFMono-Regular","Menlo",monospace;
font-size:.88em;
}
@media (max-width: 980px){
.grid,.form-grid,.actions,.mini{grid-template-columns:1fr}
.mast{align-items:flex-start;flex-direction:column}
.row{align-items:flex-start;flex-direction:column}
}
</style>
</head>
<body>
<main class="frame">
<section class="mast">
<div>
<div class="eyebrow">Atlas Recovery Plane</div>
<h1>Metis Control</h1>
<p class="sub">Build replacement node images, verify removable media on the Texas flash host, and keep image templates fresh with sentinel-driven drift tracking.</p>
</div>
<div class="badge"><strong>Default flash host:</strong> <span id="default-host">{{.State.DefaultFlashHost}}</span></div>
</section>
<section id="status-banner" class="banner hidden" aria-live="polite">
<div>
<strong id="status-title">Ready</strong>
<div id="status-text">Metis is ready.</div>
</div>
</section>
<section class="grid">
<div class="stack">
<article class="card">
<h2>Replacement Run</h2>
<p class="hint">This UI is meant for the one-shot recovery path: build the node image, verify the card on the flash host, then write it and hand off only the physical swap.</p>
<div class="form-grid">
<label>Target node
<select id="node-select"></select>
</label>
<label>Flash host
<select id="host-select"></select>
</label>
<label style="grid-column:1 / -1">Detected removable media
<select id="device-select"></select>
</label>
</div>
<div class="microcopy" id="target-note"></div>
<div class="microcopy" id="host-note"></div>
<div class="microcopy" id="device-note"></div>
<div class="microcopy" id="artifact-note"></div>
<div class="actions">
<button class="secondary" id="refresh-devices">Refresh media</button>
<button class="secondary" id="build-only">Build image only</button>
<button id="replace-run">Build and flash</button>
</div>
</article>
<article class="card">
<h2>Live Jobs</h2>
<p class="hint">Progress updates stream from the running Metis operation. The replacement flow automatically tries to clear the stale Kubernetes node object before the card write.</p>
<div id="jobs" class="list"></div>
</article>
</div>
<div class="stack">
<article class="card">
<h2>Sentinel Watch</h2>
<p class="hint">Ariadne should hit the internal sentinel watch route on a schedule. You can also run it manually here when you want the latest template recommendations immediately.</p>
<div class="mini">
<div class="stat">
<span class="meta">Tracked nodes</span>
<strong id="snapshot-count">0</strong>
</div>
<div class="stat">
<span class="meta">Class targets</span>
<strong id="target-count">0</strong>
</div>
</div>
<div class="actions" style="grid-template-columns:1fr">
<button id="sentinel-watch">Run sentinel watch now</button>
</div>
</article>
<article class="card">
<div class="row">
<div>
<h2>Recent Changes</h2>
<p class="hint">This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.</p>
</div>
</div>
<div id="events" class="list"></div>
</article>
</div>
</section>
</main>
<script id="boot" type="application/json">{{.BootJSON}}</script>
<script>
const boot = JSON.parse(document.getElementById('boot').textContent);
let state = boot;
let busy = false;
let lastJobAlert = '';
const nodeSelect = document.getElementById('node-select');
const hostSelect = document.getElementById('host-select');
const deviceSelect = document.getElementById('device-select');
const jobsEl = document.getElementById('jobs');
const eventsEl = document.getElementById('events');
const snapshotCountEl = document.getElementById('snapshot-count');
const targetCountEl = document.getElementById('target-count');
const targetNoteEl = document.getElementById('target-note');
const hostNoteEl = document.getElementById('host-note');
const deviceNoteEl = document.getElementById('device-note');
const artifactNoteEl = document.getElementById('artifact-note');
const bannerEl = document.getElementById('status-banner');
const bannerTitleEl = document.getElementById('status-title');
const bannerTextEl = document.getElementById('status-text');
const actionButtons = Array.from(document.querySelectorAll('button'));
function fmtTime(value){
if(!value){ return 'pending'; }
const date = new Date(value);
return isNaN(date.getTime()) ? value : date.toLocaleString();
}
function fmtBytes(value){
if(!value){ return '0 B'; }
const units = ['B','KiB','MiB','GiB','TiB'];
let size = Number(value);
let idx = 0;
while(size >= 1024 && idx < units.length - 1){
size /= 1024;
idx += 1;
}
return size.toFixed(size >= 10 || idx === 0 ? 0 : 1) + ' ' + units[idx];
}
function fmtDuration(startValue, endValue){
if(!startValue){ return ''; }
const start = new Date(startValue);
if(isNaN(start.getTime())){ return ''; }
const end = endValue ? new Date(endValue) : new Date();
if(isNaN(end.getTime())){ return ''; }
let seconds = Math.max(0, Math.round((end.getTime() - start.getTime()) / 1000));
const hours = Math.floor(seconds / 3600);
seconds -= hours * 3600;
const minutes = Math.floor(seconds / 60);
seconds -= minutes * 60;
if(hours){ return hours + 'h ' + minutes + 'm'; }
if(minutes){ return minutes + 'm ' + seconds + 's'; }
return seconds + 's';
}
function banner(kind, title, text){
bannerEl.className = 'banner ' + kind;
bannerTitleEl.textContent = title;
bannerTextEl.textContent = text;
}
function clearBanner(){
bannerEl.className = 'banner hidden';
}
function setBusy(nextBusy){
busy = nextBusy;
actionButtons.forEach((button)=>{
button.disabled = nextBusy;
});
}
function bestDevicePath(){
return state.preferred_device || (state.devices[0] ? state.devices[0].path : '');
}
function setOptions(select, values, labeler, emptyLabel){
const current = select.value;
select.innerHTML = '';
if(!values.length){
const option = document.createElement('option');
option.value = '';
option.textContent = emptyLabel;
select.appendChild(option);
select.value = '';
return;
}
values.forEach((value)=>{
const option = document.createElement('option');
option.value = value;
option.textContent = labeler ? labeler(value) : value;
select.appendChild(option);
});
if(current && values.includes(current)){
select.value = current;
}
}
function renderJobs(){
jobsEl.innerHTML = '';
const jobs = state.jobs.length ? state.jobs : [{kind:'idle',status:'done',message:'No active or recent Metis jobs yet.',progress_pct:100,started_at:new Date().toISOString(),finished_at:new Date().toISOString()}];
jobs.forEach((job)=>{
const wrap = document.createElement('div');
wrap.className = 'item';
const statusClass = job.status === 'error' ? 'error' : (job.status === 'done' ? 'done' : (job.status === 'running' ? 'running' : ''));
const title = job.kind.toUpperCase() + (job.node ? ' · ' + job.node : '');
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
const timingBits = [];
if(job.stage){ timingBits.push('stage: ' + job.stage); }
const stageDuration = fmtDuration(job.stage_started_at || job.started_at, job.finished_at);
if(stageDuration){
timingBits.push((job.status === 'running' ? 'stage elapsed ' : 'stage duration ') + stageDuration);
}
const totalDuration = fmtDuration(job.started_at, job.finished_at);
if(totalDuration && job.stage_started_at){
timingBits.push((job.status === 'running' ? 'total elapsed ' : 'total duration ') + totalDuration);
}
if(job.updated_at && job.status === 'running'){
timingBits.push('last update ' + fmtDuration(job.updated_at, new Date().toISOString()) + ' ago');
}
const detailBits = [];
if(job.written_bytes){ detailBits.push(fmtBytes(job.written_bytes) + ' / ' + fmtBytes(job.total_bytes)); }
if(job.artifact){ detailBits.push(job.artifact); }
if(job.error){ detailBits.push(job.error); }
wrap.innerHTML =
'<div class="item-head">' +
'<span>' + title + '</span>' +
'<span class="pill ' + statusClass + '">' + job.status + '</span>' +
'</div>' +
'<div>' + (job.message || job.stage || 'queued') + '</div>' +
'<div class="meta">' + started + '</div>' +
'<div class="meta">' + timingBits.join(' · ') + '</div>' +
'<div class="meta">' + detailBits.join(' · ') + '</div>' +
'<div class="bar"><span style="width:' + Math.max(0, Math.min(100, job.progress_pct || 0)) + '%"></span></div>';
jobsEl.appendChild(wrap);
});
const newestError = jobs.find((job)=>job.status === 'error');
if(newestError){
const signature = [newestError.id, newestError.error || newestError.message || newestError.stage || 'error'].join(':');
if(signature !== lastJobAlert){
lastJobAlert = signature;
banner('error', 'Metis job failed', newestError.error || newestError.message || 'Check the live jobs panel for details.');
}
}
}
function renderEvents(){
eventsEl.innerHTML = '';
state.events.forEach((event)=>{
const wrap = document.createElement('div');
wrap.className = 'item';
wrap.innerHTML =
'<div class="item-head">' +
'<span>' + event.summary + '</span>' +
'<span class="meta">' + fmtTime(event.time) + '</span>' +
'</div>' +
'<div class="meta"><code>' + event.kind + '</code></div>';
eventsEl.appendChild(wrap);
});
}
function render(){
const nodeNames = (state.nodes || []).map((node)=>node.name).filter(Boolean);
setOptions(nodeSelect, nodeNames, null, 'No replacement nodes available');
if(!nodeSelect.value && nodeNames.length){
nodeSelect.value = nodeNames[0];
}
const trackedNodes = Math.max((state.snapshots || []).length, (state.flash_hosts || []).length);
targetNoteEl.textContent = nodeNames.length
? 'Only nodes with full replacement definitions appear here. Current replacement coverage: ' + nodeNames.length + ' node(s)' + (trackedNodes ? ' across ' + trackedNodes + ' tracked cluster node(s).' : '.')
: 'No inventory-backed replacement nodes are loaded yet.';
setOptions(hostSelect, state.flash_hosts || [], null, 'No flash hosts available');
if(state.selected_host && (state.flash_hosts || []).includes(state.selected_host)){
hostSelect.value = state.selected_host;
}
if(!hostSelect.value && state.default_flash_host){
hostSelect.value = state.default_flash_host;
}
const devicePaths = (state.devices || []).map((device)=>device.path);
setOptions(deviceSelect, devicePaths, (path)=>{
const dev = state.devices.find((item)=>item.path === path);
if(!dev){ return path; }
const parts = [dev.path, fmtBytes(dev.size_bytes)];
if(dev.model){ parts.push(dev.model); }
else if(dev.transport){ parts.push(dev.transport); }
return parts.join(' · ');
}, state.device_error || 'No removable media detected');
const preferredDevice = bestDevicePath();
const selectedDeviceStillExists = devicePaths.includes(deviceSelect.value);
if(preferredDevice && (!selectedDeviceStillExists || !deviceSelect.value)){
deviceSelect.value = preferredDevice;
}
const selectedHost = hostSelect.value || state.default_flash_host;
hostNoteEl.textContent = 'Metis will inspect media and run the flash writer on ' + selectedHost + ' through a short-lived in-cluster worker. ' + state.default_flash_host + ' remains the default flash host.';
if(state.device_error){
deviceNoteEl.textContent = state.device_error;
} else if(state.devices.length){
deviceNoteEl.textContent = 'Best candidate preselected: ' + (bestDevicePath() || 'none');
} else {
deviceNoteEl.textContent = 'Insert an SD card or removable drive on the selected flash host, then refresh media.';
}
const artifact = (state.artifacts || {})[nodeSelect.value];
artifactNoteEl.textContent = artifact && artifact.ref
? 'Latest published image: ' + artifact.ref + ' (Metis keeps the newest 3 builds in Harbor).'
: 'Successful build-only runs publish <node>:latest into Harbor and keep the newest 3 builds per node.';
document.getElementById('build-only').disabled = busy || !nodeSelect.value;
document.getElementById('refresh-devices').disabled = busy;
document.getElementById('replace-run').disabled = busy || !nodeSelect.value || !deviceSelect.value || !!state.device_error;
document.getElementById('sentinel-watch').disabled = busy;
renderJobs();
renderEvents();
snapshotCountEl.textContent = state.snapshots.length;
targetCountEl.textContent = Object.keys(state.targets || {}).length;
}
async function refreshState(opts = {}){
const host = hostSelect.value || state.default_flash_host;
const resp = await fetch('/api/state?host=' + encodeURIComponent(host));
if(!resp.ok){
const text = await resp.text();
throw new Error(text || 'Could not refresh Metis state');
}
state = await resp.json();
render();
if(!opts.silent && state.device_error){
banner('warn', 'Flash host needs attention', state.device_error);
}
}
async function refreshDevices(){
const host = hostSelect.value || state.default_flash_host;
const resp = await fetch('/api/devices?host=' + encodeURIComponent(host));
if(!resp.ok){
const text = await resp.text();
throw new Error(text || 'Could not refresh removable media');
}
const payload = await resp.json();
state.devices = payload.devices || [];
state.selected_host = host;
state.device_error = '';
render();
}
async function post(path, body){
const resp = await fetch(path, {
method:'POST',
headers:{'Content-Type':'application/json'},
body: JSON.stringify(body)
});
if(!resp.ok){
const text = await resp.text();
throw new Error(text || ('Request failed for ' + path));
}
const contentType = resp.headers.get('content-type') || '';
return contentType.includes('application/json') ? resp.json() : {};
}
function requireValue(value, message){
if(stringsafe(value)){
return true;
}
banner('error', 'Missing input', message);
return false;
}
function stringsafe(value){
return !!String(value || '').trim();
}
async function runAction(title, pending, fn){
try {
setBusy(true);
banner('info', title, pending);
await fn();
} catch (error) {
banner('error', title + ' failed', error.message || String(error));
} finally {
setBusy(false);
render();
}
}
document.getElementById('refresh-devices').addEventListener('click', async ()=>{
await runAction('Refreshing media', 'Checking removable devices on the selected flash host.', async ()=>{
await refreshDevices();
await refreshState({silent:true});
if(state.device_error){
banner('warn', 'Flash host needs attention', state.device_error);
return;
}
banner('success', 'Media refreshed', state.devices.length ? 'Detected ' + state.devices.length + ' flash candidate(s).' : 'No removable media candidates are visible yet.');
});
});
document.getElementById('build-only').addEventListener('click', async ()=>{
if(!requireValue(nodeSelect.value, 'Choose the target node image you want Metis to build first.')){
return;
}
await runAction('Starting image build', 'Queueing the node image build now.', async ()=>{
await post('/api/jobs/build', {node: nodeSelect.value});
await refreshState({silent:true});
banner('success', 'Image build queued', 'Metis started building the replacement image for ' + nodeSelect.value + '. Successful build-only runs publish ' + nodeSelect.value + ':latest to Harbor and keep the newest 3 builds.');
});
});
document.getElementById('replace-run').addEventListener('click', async ()=>{
if(!requireValue(nodeSelect.value, 'Choose the target node whose SD card image should be built and flashed.')){
return;
}
if(!requireValue(deviceSelect.value, 'Choose removable media before starting a build-and-flash run.')){
return;
}
if(state.device_error){
banner('error', 'Flash host unavailable', state.device_error);
return;
}
await runAction('Starting build and flash', 'Queueing the full replacement workflow now.', async ()=>{
await post('/api/jobs/replace', {node: nodeSelect.value, host: hostSelect.value, device: deviceSelect.value});
await refreshState({silent:true});
banner('success', 'Replacement workflow queued', 'Metis is building the image for ' + nodeSelect.value + ' and will flash ' + deviceSelect.value + '.');
});
});
document.getElementById('sentinel-watch').addEventListener('click', async ()=>{
await runAction('Running sentinel watch', 'Refreshing template recommendations from the latest snapshots.', async ()=>{
await post('/api/sentinel/watch', {});
await refreshState({silent:true});
banner('success', 'Sentinel watch complete', 'Metis refreshed its template recommendations.');
});
});
hostSelect.addEventListener('change', async ()=>{
await runAction('Changing flash host', 'Loading removable media candidates for the selected flash host.', async ()=>{
await refreshState({silent:true});
await refreshDevices();
await refreshState({silent:true});
if(!state.device_error){
banner('success', 'Flash host ready', 'Loaded removable media candidates for ' + (hostSelect.value || state.default_flash_host) + '.');
}
});
});
nodeSelect.addEventListener('change', render);
render();
clearBanner();
(async ()=>{
try {
await refreshDevices();
} catch (_error) {
// Initial media scan can fail if the selected host is unavailable.
}
})();
async function pollLoop(){
try {
await refreshState({silent:true});
} catch (_error) {
// Keep the live dashboard calm during background polling.
}
const running = (state.jobs || []).some((job)=>job.status === 'running');
setTimeout(pollLoop, running ? 2000 : 5000);
}
pollLoop();
</script>
</body>
</html>`))

View File

@ -0,0 +1,104 @@
package service
import (
"encoding/json"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
)
func TestHTTPHandlersCoverServiceRoutes(t *testing.T) {
kube := fakeKubeServer(t)
harbor := fakeHarborServer(t, true)
installKubeFactory(t, kube)
app := newTestApp(t)
app.settings.Namespace = "maintenance"
app.settings.RunnerImageARM64 = "runner:arm64"
app.settings.HarborAPIBase = harbor.URL + "/api/v2.0"
app.settings.HarborUsername = "admin"
app.settings.HarborPassword = "pw"
app.settings.HarborProject = "metis"
app.settings.HarborRegistry = "registry.example"
app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json")
handler := app.Handler()
t.Run("health", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), `"status":"ok"`) {
t.Fatalf("health response: %d %s", resp.Code, resp.Body.String())
}
})
authHeaders := func(req *http.Request) {
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
}
t.Run("devices", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/api/devices?host=titan-22", nil)
authHeaders(req)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), `"/dev/sdz"`) {
t.Fatalf("devices response: %d %s", resp.Code, resp.Body.String())
}
})
t.Run("build", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/build", strings.NewReader(`{"node":"titan-15"}`))
req.Header.Set("Content-Type", "application/json")
authHeaders(req)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusAccepted {
t.Fatalf("build response: %d %s", resp.Code, resp.Body.String())
}
var job Job
if err := json.Unmarshal(resp.Body.Bytes(), &job); err != nil {
t.Fatalf("decode build job: %v", err)
}
waitForJobState(t, app, job.ID, JobDone)
})
t.Run("replace", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/replace", strings.NewReader(`{"node":"titan-15","host":"titan-22","device":"/dev/sdz"}`))
req.Header.Set("Content-Type", "application/json")
authHeaders(req)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusAccepted {
t.Fatalf("replace response: %d %s", resp.Code, resp.Body.String())
}
var job Job
if err := json.Unmarshal(resp.Body.Bytes(), &job); err != nil {
t.Fatalf("decode replace job: %v", err)
}
waitForJobState(t, app, job.ID, JobDone)
})
t.Run("watch", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/sentinel/watch", nil)
authHeaders(req)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), `"kind":"sentinel.watch"`) {
t.Fatalf("watch response: %d %s", resp.Code, resp.Body.String())
}
})
t.Run("index", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/", nil)
authHeaders(req)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), "<html") {
t.Fatalf("index response: %d %s", resp.Code, resp.Body.String())
}
})
}

View File

@ -206,6 +206,127 @@ func TestRequestValuesJSONBody(t *testing.T) {
}
}
func TestRequestValuesFormAndAuthHelpers(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/replace", strings.NewReader("node=titan-13&host=titan-20&device=/dev/sdz"))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
values := requestValues(req)
if values["device"] != "/dev/sdz" {
t.Fatalf("form requestValues = %#v", values)
}
app := newTestApp(t)
req = httptest.NewRequest(http.MethodGet, "/", nil)
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
if user, ok := app.authorize(req); !ok || user.Name != "brad" {
t.Fatalf("authorize = %#v ok=%v", user, ok)
}
req = httptest.NewRequest(http.MethodGet, "/", nil)
req.Header.Set("X-Forwarded-User", "brad")
req.Header.Set("X-Forwarded-Groups", "/admin")
if user, ok := app.authorize(req); !ok || user.Name != "brad" {
t.Fatalf("forwarded authorize = %#v ok=%v", user, ok)
}
}
func TestHTTPHandlersExerciseErrorBranches(t *testing.T) {
kube := fakeKubeServer(t)
installKubeFactory(t, kube)
app := newTestApp(t)
app.settings.RunnerImageARM64 = ""
app.settings.TargetsPath = t.TempDir()
handler := app.Handler()
req := httptest.NewRequest(http.MethodPut, "/internal/sentinel/snapshot", nil)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusMethodNotAllowed {
t.Fatalf("expected method not allowed, got %d", resp.Code)
}
req = httptest.NewRequest(http.MethodPost, "/internal/sentinel/snapshot", strings.NewReader("{"))
req.Header.Set("Content-Type", "application/json")
resp = httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected bad request, got %d", resp.Code)
}
req = httptest.NewRequest(http.MethodGet, "/api/devices?host=titan-22", nil)
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
resp = httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected bad request from missing runner image, got %d", resp.Code)
}
req = httptest.NewRequest(http.MethodPost, "/api/jobs/build", strings.NewReader("node="))
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
resp = httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected build validation error, got %d", resp.Code)
}
req = httptest.NewRequest(http.MethodPost, "/api/jobs/replace", strings.NewReader("node=titan-15"))
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
resp = httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected replace validation error, got %d", resp.Code)
}
req = httptest.NewRequest(http.MethodPost, "/api/sentinel/watch", nil)
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
resp = httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusInternalServerError {
t.Fatalf("expected watch error from unwritable targets path, got %d", resp.Code)
}
_, ok := app.authorize(httptest.NewRequest(http.MethodGet, "/", nil))
if ok {
t.Fatal("authorize should return false for empty headers")
}
if got := splitHeaderList("a, /b"); len(got) != 2 || got[0] != "a" || got[1] != "/b" {
t.Fatal("splitHeaderList failed")
}
values := requestValues(httptest.NewRequest(http.MethodPost, "/", strings.NewReader(`{"node":"n1"}`)))
if values["node"] != "n1" {
t.Fatalf("requestValues JSON parse failed: %#v", values)
}
}
func TestWatchHandlersReturnErrorsWhenTargetsCannotPersist(t *testing.T) {
app := newTestApp(t)
blocked := filepath.Join(t.TempDir(), "blocked")
if err := os.WriteFile(blocked, []byte("block"), 0o644); err != nil {
t.Fatal(err)
}
app.settings.TargetsPath = filepath.Join(blocked, "targets.json")
handler := app.Handler()
req := httptest.NewRequest(http.MethodPost, "/internal/sentinel/watch", nil)
resp := httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusInternalServerError {
t.Fatalf("expected internal watch error, got %d", resp.Code)
}
req = httptest.NewRequest(http.MethodPost, "/api/sentinel/watch", nil)
req.Header.Set("X-Auth-Request-User", "brad")
req.Header.Set("X-Auth-Request-Groups", "admin")
resp = httptest.NewRecorder()
handler.ServeHTTP(resp, req)
if resp.Code != http.StatusInternalServerError {
t.Fatalf("expected api watch error, got %d", resp.Code)
}
}
func newTestApp(t *testing.T) *App {
t.Helper()
dir := t.TempDir()

View File

@ -7,6 +7,8 @@ import (
"strings"
)
var hostNameLookup = os.Hostname
// Settings configures the Metis service runtime.
type Settings struct {
BindAddr string
@ -101,7 +103,7 @@ func splitList(raw string) []string {
}
func hostnameOr(fallback string) string {
name, err := os.Hostname()
name, err := hostNameLookup()
if err != nil || strings.TrimSpace(name) == "" {
return fallback
}

View File

@ -0,0 +1,42 @@
package service
import "testing"
func TestSettingsHelpers(t *testing.T) {
t.Setenv("METIS_SAMPLE", "value")
if got := getenvDefault("METIS_SAMPLE", "fallback"); got != "value" {
t.Fatalf("getenvDefault = %q", got)
}
if got := getenvInt64("METIS_MAX_DEVICE_BYTES", 99); got != 99 {
t.Fatalf("getenvInt64 fallback = %d", got)
}
t.Setenv("METIS_MAX_DEVICE_BYTES", "abc")
if got := getenvInt64("METIS_MAX_DEVICE_BYTES", 99); got != 99 {
t.Fatalf("getenvInt64 invalid = %d", got)
}
if got := splitList("a, ,b,,c"); len(got) != 3 || got[0] != "a" || got[2] != "c" {
t.Fatalf("splitList = %#v", got)
}
if got := hostnameOr("fallback"); got == "" {
t.Fatal("hostnameOr returned empty string")
}
}
func TestHostnameFallbackAndSplitListBranches(t *testing.T) {
orig := hostNameLookup
hostNameLookup = func() (string, error) { return "", errStub{} }
t.Cleanup(func() { hostNameLookup = orig })
if got := hostnameOr("fallback"); got != "fallback" {
t.Fatalf("hostnameOr fallback = %q", got)
}
if got := splitList(""); got != nil {
t.Fatalf("splitList empty = %#v", got)
}
if got := splitList(" "); got != nil {
t.Fatalf("splitList whitespace = %#v", got)
}
}
type errStub struct{}
func (errStub) Error() string { return "stub" }

11
pkg/service/template.go Normal file
View File

@ -0,0 +1,11 @@
package service
import (
"embed"
"html/template"
)
//go:embed templates/metis.html
var metisTemplateFS embed.FS
var metisPage = template.Must(template.ParseFS(metisTemplateFS, "templates/metis.html"))

View File

@ -0,0 +1,717 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Metis Control</title>
<style>
:root{
--bg:#081018;
--bg-soft:#0e1722;
--panel:#101c29;
--panel-strong:#172535;
--line:rgba(149,177,205,.18);
--line-strong:rgba(149,177,205,.28);
--ink:#f3f7fb;
--muted:#9bb0c4;
--brand:#3da7ff;
--brand-deep:#1c6ca8;
--accent:#ff9a4a;
--success:#3dd08c;
--danger:#ff6f6f;
--warn:#f2c14c;
--shadow:0 24px 60px rgba(0,0,0,.35);
}
*{box-sizing:border-box}
body{
margin:0;
min-height:100vh;
font-family:"Avenir Next","Trebuchet MS","Segoe UI",sans-serif;
color:var(--ink);
background:
radial-gradient(circle at top left, rgba(61,167,255,.20), transparent 28rem),
radial-gradient(circle at top right, rgba(255,154,74,.16), transparent 26rem),
linear-gradient(180deg, #071018 0%, #0a131d 50%, #0b1622 100%);
}
.frame{
max-width:1320px;
margin:0 auto;
padding:2rem 1.25rem 3rem;
}
.mast{
display:flex;
justify-content:space-between;
align-items:flex-end;
gap:1.5rem;
margin-bottom:1rem;
}
.eyebrow{
letter-spacing:.14em;
text-transform:uppercase;
font-size:.72rem;
color:#81c6ff;
margin-bottom:.35rem;
font-weight:800;
}
h1{
margin:0;
font-size:clamp(2rem,4vw,3.4rem);
line-height:1;
}
.sub{
max-width:56rem;
color:var(--muted);
margin-top:.7rem;
font-size:1rem;
}
.badge{
display:inline-flex;
align-items:center;
gap:.45rem;
padding:.78rem 1rem;
background:rgba(16,28,41,.82);
border:1px solid var(--line);
border-radius:999px;
box-shadow:var(--shadow);
font-size:.92rem;
}
.banner{
display:flex;
align-items:flex-start;
gap:.75rem;
padding:1rem 1.1rem;
border-radius:1rem;
border:1px solid var(--line);
background:rgba(16,28,41,.84);
margin-bottom:1rem;
box-shadow:var(--shadow);
}
.banner.hidden{display:none}
.banner.info{border-color:rgba(61,167,255,.32);background:rgba(17,36,54,.88)}
.banner.success{border-color:rgba(61,208,140,.32);background:rgba(10,41,31,.9)}
.banner.error{border-color:rgba(255,111,111,.32);background:rgba(56,18,18,.9)}
.banner.warn{border-color:rgba(242,193,76,.32);background:rgba(53,40,13,.9)}
.banner strong{display:block;margin-bottom:.15rem}
.grid{
display:grid;
grid-template-columns:1.16fr .9fr;
gap:1rem;
}
.stack{display:grid;gap:1rem}
.card{
background:linear-gradient(180deg, rgba(16,28,41,.95), rgba(12,21,31,.94));
border:1px solid var(--line);
border-radius:1.35rem;
padding:1.1rem;
box-shadow:var(--shadow);
}
.card h2{
margin:0 0 .35rem;
font-size:1rem;
text-transform:uppercase;
letter-spacing:.1em;
color:#8bccff;
}
.hint{
color:var(--muted);
font-size:.92rem;
margin-bottom:1rem;
}
.microcopy{
color:var(--muted);
font-size:.84rem;
margin-top:.5rem;
min-height:1.2rem;
}
.form-grid{
display:grid;
grid-template-columns:repeat(2,minmax(0,1fr));
gap:.85rem;
}
label{
display:grid;
gap:.35rem;
font-weight:700;
font-size:.92rem;
}
select, button{
width:100%;
border-radius:.95rem;
border:1px solid var(--line-strong);
padding:.9rem .95rem;
font:inherit;
}
select{
background:rgba(11,21,33,.96);
color:var(--ink);
min-height:3.1rem;
}
select:focus, button:focus{
outline:2px solid rgba(61,167,255,.28);
outline-offset:2px;
}
button{
cursor:pointer;
background:linear-gradient(135deg,var(--brand) 0%,var(--brand-deep) 100%);
color:#fff;
border:none;
font-weight:800;
letter-spacing:.03em;
box-shadow:0 14px 30px rgba(17,66,102,.30);
transition:transform .12s ease, opacity .12s ease;
}
button:hover{transform:translateY(-1px)}
button.secondary{
background:rgba(18,30,43,.96);
color:var(--ink);
border:1px solid var(--line-strong);
box-shadow:none;
}
button:disabled{
opacity:.55;
cursor:not-allowed;
transform:none;
}
.actions{
display:grid;
grid-template-columns:repeat(3,minmax(0,1fr));
gap:.7rem;
margin-top:.9rem;
}
.list{
display:grid;
gap:.7rem;
max-height:30rem;
overflow:auto;
}
.item{
border:1px solid rgba(149,177,205,.14);
border-radius:1rem;
padding:.85rem .95rem;
background:rgba(8,17,27,.78);
}
.item-head{
display:flex;
justify-content:space-between;
gap:1rem;
margin-bottom:.35rem;
font-weight:700;
}
.meta{
color:var(--muted);
font-size:.85rem;
}
.bar{
height:.55rem;
background:rgba(149,177,205,.12);
border-radius:999px;
overflow:hidden;
margin-top:.7rem;
}
.bar > span{
display:block;
height:100%;
background:linear-gradient(90deg,var(--accent),var(--brand));
}
.pill{
display:inline-block;
padding:.2rem .55rem;
border-radius:999px;
font-size:.75rem;
text-transform:uppercase;
letter-spacing:.08em;
background:rgba(61,167,255,.12);
color:#9bd1ff;
}
.pill.done{background:rgba(61,208,140,.12);color:var(--success)}
.pill.error{background:rgba(255,111,111,.12);color:var(--danger)}
.pill.running{background:rgba(255,154,74,.14);color:var(--accent)}
.mini{
display:grid;
grid-template-columns:repeat(2,minmax(0,1fr));
gap:.7rem;
}
.stat{
padding:.8rem .9rem;
border-radius:1rem;
background:rgba(8,17,27,.72);
border:1px solid rgba(149,177,205,.12);
}
.stat strong{display:block;font-size:1.35rem}
.row{
display:flex;
justify-content:space-between;
gap:1rem;
align-items:center;
}
code{
font-family:"IBM Plex Mono","SFMono-Regular","Menlo",monospace;
font-size:.88em;
}
@media (max-width: 980px){
.grid,.form-grid,.actions,.mini{grid-template-columns:1fr}
.mast{align-items:flex-start;flex-direction:column}
.row{align-items:flex-start;flex-direction:column}
}
</style>
</head>
<body>
<main class="frame">
<section class="mast">
<div>
<div class="eyebrow">Atlas Recovery Plane</div>
<h1>Metis Control</h1>
<p class="sub">Build replacement node images, verify removable media on the Texas flash host, and keep image templates fresh with sentinel-driven drift tracking.</p>
</div>
<div class="badge"><strong>Default flash host:</strong> <span id="default-host">{{.State.DefaultFlashHost}}</span></div>
</section>
<section id="status-banner" class="banner hidden" aria-live="polite">
<div>
<strong id="status-title">Ready</strong>
<div id="status-text">Metis is ready.</div>
</div>
</section>
<section class="grid">
<div class="stack">
<article class="card">
<h2>Replacement Run</h2>
<p class="hint">This UI is meant for the one-shot recovery path: build the node image, verify the card on the flash host, then write it and hand off only the physical swap.</p>
<div class="form-grid">
<label>Target node
<select id="node-select"></select>
</label>
<label>Flash host
<select id="host-select"></select>
</label>
<label style="grid-column:1 / -1">Detected removable media
<select id="device-select"></select>
</label>
</div>
<div class="microcopy" id="target-note"></div>
<div class="microcopy" id="host-note"></div>
<div class="microcopy" id="device-note"></div>
<div class="microcopy" id="artifact-note"></div>
<div class="actions">
<button class="secondary" id="refresh-devices">Refresh media</button>
<button class="secondary" id="build-only">Build image only</button>
<button id="replace-run">Build and flash</button>
</div>
</article>
<article class="card">
<h2>Live Jobs</h2>
<p class="hint">Progress updates stream from the running Metis operation. The replacement flow automatically tries to clear the stale Kubernetes node object before the card write.</p>
<div id="jobs" class="list"></div>
</article>
</div>
<div class="stack">
<article class="card">
<h2>Sentinel Watch</h2>
<p class="hint">Ariadne should hit the internal sentinel watch route on a schedule. You can also run it manually here when you want the latest template recommendations immediately.</p>
<div class="mini">
<div class="stat">
<span class="meta">Tracked nodes</span>
<strong id="snapshot-count">0</strong>
</div>
<div class="stat">
<span class="meta">Class targets</span>
<strong id="target-count">0</strong>
</div>
</div>
<div class="actions" style="grid-template-columns:1fr">
<button id="sentinel-watch">Run sentinel watch now</button>
</div>
</article>
<article class="card">
<div class="row">
<div>
<h2>Recent Changes</h2>
<p class="hint">This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.</p>
</div>
</div>
<div id="events" class="list"></div>
</article>
</div>
</section>
</main>
<script id="boot" type="application/json">{{.BootJSON}}</script>
<script>
const boot = JSON.parse(document.getElementById('boot').textContent);
let state = boot;
let busy = false;
let lastJobAlert = '';
const nodeSelect = document.getElementById('node-select');
const hostSelect = document.getElementById('host-select');
const deviceSelect = document.getElementById('device-select');
const jobsEl = document.getElementById('jobs');
const eventsEl = document.getElementById('events');
const snapshotCountEl = document.getElementById('snapshot-count');
const targetCountEl = document.getElementById('target-count');
const targetNoteEl = document.getElementById('target-note');
const hostNoteEl = document.getElementById('host-note');
const deviceNoteEl = document.getElementById('device-note');
const artifactNoteEl = document.getElementById('artifact-note');
const bannerEl = document.getElementById('status-banner');
const bannerTitleEl = document.getElementById('status-title');
const bannerTextEl = document.getElementById('status-text');
const actionButtons = Array.from(document.querySelectorAll('button'));
function fmtTime(value){
if(!value){ return 'pending'; }
const date = new Date(value);
return isNaN(date.getTime()) ? value : date.toLocaleString();
}
function fmtBytes(value){
if(!value){ return '0 B'; }
const units = ['B','KiB','MiB','GiB','TiB'];
let size = Number(value);
let idx = 0;
while(size >= 1024 && idx < units.length - 1){
size /= 1024;
idx += 1;
}
return size.toFixed(size >= 10 || idx === 0 ? 0 : 1) + ' ' + units[idx];
}
function fmtDuration(startValue, endValue){
if(!startValue){ return ''; }
const start = new Date(startValue);
if(isNaN(start.getTime())){ return ''; }
const end = endValue ? new Date(endValue) : new Date();
if(isNaN(end.getTime())){ return ''; }
let seconds = Math.max(0, Math.round((end.getTime() - start.getTime()) / 1000));
const hours = Math.floor(seconds / 3600);
seconds -= hours * 3600;
const minutes = Math.floor(seconds / 60);
seconds -= minutes * 60;
if(hours){ return hours + 'h ' + minutes + 'm'; }
if(minutes){ return minutes + 'm ' + seconds + 's'; }
return seconds + 's';
}
function banner(kind, title, text){
bannerEl.className = 'banner ' + kind;
bannerTitleEl.textContent = title;
bannerTextEl.textContent = text;
}
function clearBanner(){
bannerEl.className = 'banner hidden';
}
function setBusy(nextBusy){
busy = nextBusy;
actionButtons.forEach((button)=>{
button.disabled = nextBusy;
});
}
function bestDevicePath(){
return state.preferred_device || (state.devices[0] ? state.devices[0].path : '');
}
function setOptions(select, values, labeler, emptyLabel){
const current = select.value;
select.innerHTML = '';
if(!values.length){
const option = document.createElement('option');
option.value = '';
option.textContent = emptyLabel;
select.appendChild(option);
select.value = '';
return;
}
values.forEach((value)=>{
const option = document.createElement('option');
option.value = value;
option.textContent = labeler ? labeler(value) : value;
select.appendChild(option);
});
if(current && values.includes(current)){
select.value = current;
}
}
function renderJobs(){
jobsEl.innerHTML = '';
const jobs = state.jobs.length ? state.jobs : [{kind:'idle',status:'done',message:'No active or recent Metis jobs yet.',progress_pct:100,started_at:new Date().toISOString(),finished_at:new Date().toISOString()}];
jobs.forEach((job)=>{
const wrap = document.createElement('div');
wrap.className = 'item';
const statusClass = job.status === 'error' ? 'error' : (job.status === 'done' ? 'done' : (job.status === 'running' ? 'running' : ''));
const title = job.kind.toUpperCase() + (job.node ? ' · ' + job.node : '');
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
const timingBits = [];
if(job.stage){ timingBits.push('stage: ' + job.stage); }
const stageDuration = fmtDuration(job.stage_started_at || job.started_at, job.finished_at);
if(stageDuration){
timingBits.push((job.status === 'running' ? 'stage elapsed ' : 'stage duration ') + stageDuration);
}
const totalDuration = fmtDuration(job.started_at, job.finished_at);
if(totalDuration && job.stage_started_at){
timingBits.push((job.status === 'running' ? 'total elapsed ' : 'total duration ') + totalDuration);
}
if(job.updated_at && job.status === 'running'){
timingBits.push('last update ' + fmtDuration(job.updated_at, new Date().toISOString()) + ' ago');
}
const detailBits = [];
if(job.written_bytes){ detailBits.push(fmtBytes(job.written_bytes) + ' / ' + fmtBytes(job.total_bytes)); }
if(job.artifact){ detailBits.push(job.artifact); }
if(job.error){ detailBits.push(job.error); }
wrap.innerHTML =
'<div class="item-head">' +
'<span>' + title + '</span>' +
'<span class="pill ' + statusClass + '">' + job.status + '</span>' +
'</div>' +
'<div>' + (job.message || job.stage || 'queued') + '</div>' +
'<div class="meta">' + started + '</div>' +
'<div class="meta">' + timingBits.join(' · ') + '</div>' +
'<div class="meta">' + detailBits.join(' · ') + '</div>' +
'<div class="bar"><span style="width:' + Math.max(0, Math.min(100, job.progress_pct || 0)) + '%"></span></div>';
jobsEl.appendChild(wrap);
});
const newestError = jobs.find((job)=>job.status === 'error');
if(newestError){
const signature = [newestError.id, newestError.error || newestError.message || newestError.stage || 'error'].join(':');
if(signature !== lastJobAlert){
lastJobAlert = signature;
banner('error', 'Metis job failed', newestError.error || newestError.message || 'Check the live jobs panel for details.');
}
}
}
function renderEvents(){
eventsEl.innerHTML = '';
state.events.forEach((event)=>{
const wrap = document.createElement('div');
wrap.className = 'item';
wrap.innerHTML =
'<div class="item-head">' +
'<span>' + event.summary + '</span>' +
'<span class="meta">' + fmtTime(event.time) + '</span>' +
'</div>' +
'<div class="meta"><code>' + event.kind + '</code></div>';
eventsEl.appendChild(wrap);
});
}
function render(){
const nodeNames = (state.nodes || []).map((node)=>node.name).filter(Boolean);
setOptions(nodeSelect, nodeNames, null, 'No replacement nodes available');
if(!nodeSelect.value && nodeNames.length){
nodeSelect.value = nodeNames[0];
}
const trackedNodes = Math.max((state.snapshots || []).length, (state.flash_hosts || []).length);
targetNoteEl.textContent = nodeNames.length
? 'Only nodes with full replacement definitions appear here. Current replacement coverage: ' + nodeNames.length + ' node(s)' + (trackedNodes ? ' across ' + trackedNodes + ' tracked cluster node(s).' : '.')
: 'No inventory-backed replacement nodes are loaded yet.';
setOptions(hostSelect, state.flash_hosts || [], null, 'No flash hosts available');
if(state.selected_host && (state.flash_hosts || []).includes(state.selected_host)){
hostSelect.value = state.selected_host;
}
if(!hostSelect.value && state.default_flash_host){
hostSelect.value = state.default_flash_host;
}
const devicePaths = (state.devices || []).map((device)=>device.path);
setOptions(deviceSelect, devicePaths, (path)=>{
const dev = state.devices.find((item)=>item.path === path);
if(!dev){ return path; }
const parts = [dev.path, fmtBytes(dev.size_bytes)];
if(dev.model){ parts.push(dev.model); }
else if(dev.transport){ parts.push(dev.transport); }
return parts.join(' · ');
}, state.device_error || 'No removable media detected');
const preferredDevice = bestDevicePath();
const selectedDeviceStillExists = devicePaths.includes(deviceSelect.value);
if(preferredDevice && (!selectedDeviceStillExists || !deviceSelect.value)){
deviceSelect.value = preferredDevice;
}
const selectedHost = hostSelect.value || state.default_flash_host;
hostNoteEl.textContent = 'Metis will inspect media and run the flash writer on ' + selectedHost + ' through a short-lived in-cluster worker. ' + state.default_flash_host + ' remains the default flash host.';
if(state.device_error){
deviceNoteEl.textContent = state.device_error;
} else if(state.devices.length){
deviceNoteEl.textContent = 'Best candidate preselected: ' + (bestDevicePath() || 'none');
} else {
deviceNoteEl.textContent = 'Insert an SD card or removable drive on the selected flash host, then refresh media.';
}
const artifact = (state.artifacts || {})[nodeSelect.value];
artifactNoteEl.textContent = artifact && artifact.ref
? 'Latest published image: ' + artifact.ref + ' (Metis keeps the newest 3 builds in Harbor).'
: 'Successful build-only runs publish <node>:latest into Harbor and keep the newest 3 builds per node.';
document.getElementById('build-only').disabled = busy || !nodeSelect.value;
document.getElementById('refresh-devices').disabled = busy;
document.getElementById('replace-run').disabled = busy || !nodeSelect.value || !deviceSelect.value || !!state.device_error;
document.getElementById('sentinel-watch').disabled = busy;
renderJobs();
renderEvents();
snapshotCountEl.textContent = state.snapshots.length;
targetCountEl.textContent = Object.keys(state.targets || {}).length;
}
async function refreshState(opts = {}){
const host = hostSelect.value || state.default_flash_host;
const resp = await fetch('/api/state?host=' + encodeURIComponent(host));
if(!resp.ok){
const text = await resp.text();
throw new Error(text || 'Could not refresh Metis state');
}
state = await resp.json();
render();
if(!opts.silent && state.device_error){
banner('warn', 'Flash host needs attention', state.device_error);
}
}
async function refreshDevices(){
const host = hostSelect.value || state.default_flash_host;
const resp = await fetch('/api/devices?host=' + encodeURIComponent(host));
if(!resp.ok){
const text = await resp.text();
throw new Error(text || 'Could not refresh removable media');
}
const payload = await resp.json();
state.devices = payload.devices || [];
state.selected_host = host;
state.device_error = '';
render();
}
async function post(path, body){
const resp = await fetch(path, {
method:'POST',
headers:{'Content-Type':'application/json'},
body: JSON.stringify(body)
});
if(!resp.ok){
const text = await resp.text();
throw new Error(text || ('Request failed for ' + path));
}
const contentType = resp.headers.get('content-type') || '';
return contentType.includes('application/json') ? resp.json() : {};
}
function requireValue(value, message){
if(stringsafe(value)){
return true;
}
banner('error', 'Missing input', message);
return false;
}
function stringsafe(value){
return !!String(value || '').trim();
}
async function runAction(title, pending, fn){
try {
setBusy(true);
banner('info', title, pending);
await fn();
} catch (error) {
banner('error', title + ' failed', error.message || String(error));
} finally {
setBusy(false);
render();
}
}
document.getElementById('refresh-devices').addEventListener('click', async ()=>{
await runAction('Refreshing media', 'Checking removable devices on the selected flash host.', async ()=>{
await refreshDevices();
await refreshState({silent:true});
if(state.device_error){
banner('warn', 'Flash host needs attention', state.device_error);
return;
}
banner('success', 'Media refreshed', state.devices.length ? 'Detected ' + state.devices.length + ' flash candidate(s).' : 'No removable media candidates are visible yet.');
});
});
document.getElementById('build-only').addEventListener('click', async ()=>{
if(!requireValue(nodeSelect.value, 'Choose the target node image you want Metis to build first.')){
return;
}
await runAction('Starting image build', 'Queueing the node image build now.', async ()=>{
await post('/api/jobs/build', {node: nodeSelect.value});
await refreshState({silent:true});
banner('success', 'Image build queued', 'Metis started building the replacement image for ' + nodeSelect.value + '. Successful build-only runs publish ' + nodeSelect.value + ':latest to Harbor and keep the newest 3 builds.');
});
});
document.getElementById('replace-run').addEventListener('click', async ()=>{
if(!requireValue(nodeSelect.value, 'Choose the target node whose SD card image should be built and flashed.')){
return;
}
if(!requireValue(deviceSelect.value, 'Choose removable media before starting a build-and-flash run.')){
return;
}
if(state.device_error){
banner('error', 'Flash host unavailable', state.device_error);
return;
}
await runAction('Starting build and flash', 'Queueing the full replacement workflow now.', async ()=>{
await post('/api/jobs/replace', {node: nodeSelect.value, host: hostSelect.value, device: deviceSelect.value});
await refreshState({silent:true});
banner('success', 'Replacement workflow queued', 'Metis is building the image for ' + nodeSelect.value + ' and will flash ' + deviceSelect.value + '.');
});
});
document.getElementById('sentinel-watch').addEventListener('click', async ()=>{
await runAction('Running sentinel watch', 'Refreshing template recommendations from the latest snapshots.', async ()=>{
await post('/api/sentinel/watch', {});
await refreshState({silent:true});
banner('success', 'Sentinel watch complete', 'Metis refreshed its template recommendations.');
});
});
hostSelect.addEventListener('change', async ()=>{
await runAction('Changing flash host', 'Loading removable media candidates for the selected flash host.', async ()=>{
await refreshState({silent:true});
await refreshDevices();
await refreshState({silent:true});
if(!state.device_error){
banner('success', 'Flash host ready', 'Loaded removable media candidates for ' + (hostSelect.value || state.default_flash_host) + '.');
}
});
});
nodeSelect.addEventListener('change', render);
render();
clearBanner();
(async ()=>{
try {
await refreshDevices();
} catch (_error) {
// Initial media scan can fail if the selected host is unavailable.
}
})();
async function pollLoop(){
try {
await refreshState({silent:true});
} catch (_error) {
// Keep the live dashboard calm during background polling.
}
const running = (state.jobs || []).some((job)=>job.status === 'running');
setTimeout(pollLoop, running ? 2000 : 5000);
}
pollLoop();
</script>
</body>
</html>

View File

@ -0,0 +1,236 @@
package service
import (
"encoding/json"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"time"
)
func TestRefreshDevicesAndReplacementWorkflow(t *testing.T) {
kube := fakeKubeServer(t)
harbor := fakeHarborServer(t, true)
app := newTestApp(t)
app.settings.Namespace = "maintenance"
app.settings.RunnerImageARM64 = "runner:arm64"
app.settings.HarborAPIBase = harbor.URL + "/api/v2.0"
app.settings.HarborUsername = "admin"
app.settings.HarborPassword = "pw"
app.settings.HarborProject = "metis"
app.settings.HarborRegistry = "registry.example"
app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json")
installKubeFactory(t, kube)
devices, err := app.RefreshDevices("titan-22")
if err != nil {
t.Fatalf("RefreshDevices: %v", err)
}
if len(devices) < 2 || devices[0].Path != "/dev/sdz" {
t.Fatalf("unexpected devices: %+v", devices)
}
cached, err := app.ListDevices("titan-22")
if err != nil || len(cached) != len(devices) {
t.Fatalf("ListDevices cache mismatch: %+v err=%v", cached, err)
}
state := app.State("titan-22")
if state.PreferredDevice != "/dev/sdz" {
t.Fatalf("expected preferred device /dev/sdz, got %q", state.PreferredDevice)
}
job, err := app.Replace("titan-15", "titan-22", "/dev/sdz")
if err != nil {
t.Fatalf("Replace: %v", err)
}
waitForJobState(t, app, job.ID, JobDone)
if got := app.job(job.ID); got == nil || got.Status != JobDone {
t.Fatalf("replace job did not finish successfully: %#v", got)
}
if got := app.artifacts()["titan-15"].Ref; got != "registry.example/metis/titan-15:latest" {
t.Fatalf("artifact not recorded: %q", got)
}
}
func TestRemotePodStateAndLogsHelpers(t *testing.T) {
kube := fakeKubeServer(t)
installKubeFactory(t, kube)
app := newTestApp(t)
app.settings.Namespace = "maintenance"
client, err := kubeClientFactory()
if err != nil {
t.Fatalf("kube client: %v", err)
}
state, err := app.remotePodState(client, "metis-build-test")
if err != nil {
t.Fatalf("remotePodState: %v", err)
}
if state.Name != "metis-build-test" || state.Reason != "Completed" || !strings.Contains(state.Message, "build") {
t.Fatalf("unexpected pod state: %#v", state)
}
logs, err := app.remotePodLogs(client, "metis-build-test")
if err != nil || !strings.Contains(logs, "build logs") {
t.Fatalf("remotePodLogs: logs=%q err=%v", logs, err)
}
}
func TestHarborProjectCreationAndPrune(t *testing.T) {
harbor := fakeHarborServer(t, false)
app := &App{settings: Settings{
HarborAPIBase: harbor.URL + "/api/v2.0",
HarborUsername: "admin",
HarborPassword: "pw",
HarborProject: "metis",
HarborRegistry: "registry.example",
}, metrics: NewMetrics()}
if got := app.artifactRepo("titan-15"); got != "registry.example/metis/titan-15" {
t.Fatalf("artifactRepo = %q", got)
}
if err := app.ensureHarborProject(); err != nil {
t.Fatalf("ensureHarborProject: %v", err)
}
if err := app.pruneHarborArtifacts("titan-15", 1); err != nil {
t.Fatalf("pruneHarborArtifacts: %v", err)
}
}
func TestKubeJSONAndDeleteRequests(t *testing.T) {
kube := fakeKubeServer(t)
client := kubeClientFactoryForURL(kube.URL, kube.Client())
var payload map[string]any
if err := client.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &payload); err != nil {
t.Fatalf("jsonRequest: %v", err)
}
if err := client.deleteRequest("/api/v1/nodes/titan-15"); err != nil {
t.Fatalf("deleteRequest: %v", err)
}
}
func TestBuildStageAndArchiveHelpers(t *testing.T) {
if got := remoteArtifactNoteForTest(t); got != "registry.example/metis/titan-15:latest" {
t.Fatalf("remoteArtifactNote = %q", got)
}
}
func waitForJobState(t *testing.T, app *App, id string, want JobStatus) {
t.Helper()
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
if got := app.job(id); got != nil {
if got.Status == want {
return
}
if got.Status == JobError {
t.Fatalf("job %s failed: %s", id, got.Error)
}
}
time.Sleep(10 * time.Millisecond)
}
t.Fatalf("job %s never reached state %s", id, want)
}
func installKubeFactory(t *testing.T, srv *httptest.Server) {
t.Helper()
orig := kubeClientFactory
kubeClientFactory = func() (*kubeClient, error) {
return &kubeClient{baseURL: srv.URL, token: "tok", client: srv.Client()}, nil
}
t.Cleanup(func() {
kubeClientFactory = orig
})
}
func kubeClientFactoryForURL(baseURL string, client *http.Client) *kubeClient {
return &kubeClient{baseURL: baseURL, token: "tok", client: client}
}
func fakeKubeServer(t *testing.T) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
_ = json.NewEncoder(w).Encode(map[string]any{
"items": []any{
map[string]any{
"metadata": map[string]any{
"name": "titan-22",
"labels": map[string]string{
"kubernetes.io/arch": "arm64",
"hardware": "rpi5",
"node-role.kubernetes.io/worker": "true",
},
},
"spec": map[string]any{"unschedulable": false},
},
},
})
case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"):
w.WriteHeader(http.StatusCreated)
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/pods/"):
w.WriteHeader(http.StatusOK)
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/nodes/"):
w.WriteHeader(http.StatusOK)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/") && strings.HasSuffix(r.URL.Path, "/log"):
_, _ = w.Write([]byte("build logs from kubelet"))
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"):
podName := filepath.Base(strings.TrimSuffix(r.URL.Path, "/log"))
message := `{}`
switch {
case strings.Contains(podName, "devices"):
message = `{"devices":[{"name":"sdz","path":"/dev/sdz","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000},{"name":"tmp","path":"hosttmp:///tmp","model":"Host /tmp","transport":"test","type":"file","note":"Test-only host write target under /tmp","size_bytes":1}]}`
case strings.Contains(podName, "build"):
message = `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}`
case strings.Contains(podName, "flash"):
message = `{"dest_path":"/tmp/metis-flash-test/titan-15.img"}`
}
_ = json.NewEncoder(w).Encode(map[string]any{
"metadata": map[string]any{"name": podName},
"status": map[string]any{
"phase": "Succeeded",
"message": message,
"reason": "Completed",
},
})
default:
http.NotFound(w, r)
}
}))
}
func fakeHarborServer(t *testing.T, projectExists bool) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"):
if projectExists {
_ = json.NewEncoder(w).Encode([]map[string]string{{"name": "metis"}})
return
}
_ = json.NewEncoder(w).Encode([]map[string]string{})
case r.Method == http.MethodPost && r.URL.Path == "/api/v2.0/projects":
w.WriteHeader(http.StatusCreated)
case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"):
_ = json.NewEncoder(w).Encode([]map[string]any{
{"digest": "sha256:aaa", "push_time": "2026-04-01T10:00:00Z"},
{"digest": "sha256:bbb", "push_time": "2026-04-01T09:00:00Z"},
})
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/artifacts/"):
w.WriteHeader(http.StatusAccepted)
default:
http.NotFound(w, r)
}
}))
}
func remoteArtifactNoteForTest(t *testing.T) string {
t.Helper()
app := &App{
settings: Settings{HarborRegistry: "registry.example", HarborProject: "metis"},
artifactStore: map[string]ArtifactSummary{
"titan-15": {Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"},
},
}
return app.remoteArtifactNote("titan-15")
}

32
pkg/util/run_test.go Normal file
View File

@ -0,0 +1,32 @@
package util
import (
"strings"
"testing"
)
func TestRunSucceeds(t *testing.T) {
if err := Run("sh", "-c", "exit 0"); err != nil {
t.Fatalf("Run: %v", err)
}
}
func TestRunLoggedReturnsCombinedOutput(t *testing.T) {
got, err := RunLogged("sh", "-c", "printf 'hello'; printf 'world' >&2")
if err != nil {
t.Fatalf("RunLogged: %v", err)
}
if got != "helloworld" {
t.Fatalf("RunLogged output = %q", got)
}
}
func TestRunLoggedWrapsFailures(t *testing.T) {
_, err := RunLogged("sh", "-c", "printf boom >&2; exit 7")
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "failed") || !strings.Contains(err.Error(), "boom") {
t.Fatalf("unexpected error: %v", err)
}
}

View File

@ -0,0 +1,41 @@
package writer
import (
"context"
"os"
"path/filepath"
"testing"
)
func TestWriteImageWithProgressBranches(t *testing.T) {
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
if err := os.WriteFile(src, []byte("writer-test"), 0o644); err != nil {
t.Fatal(err)
}
dest := filepath.Join(dir, "out", "dest.img")
var calls int
if err := WriteImageWithProgress(context.Background(), src, dest, func(written, total int64) {
calls++
if written == 0 || total == 0 {
t.Fatalf("unexpected progress: %d/%d", written, total)
}
}); err != nil {
t.Fatalf("WriteImageWithProgress: %v", err)
}
if calls == 0 {
t.Fatal("expected progress callback")
}
if got, err := os.ReadFile(dest); err != nil || string(got) != "writer-test" {
t.Fatalf("write result = %q err=%v", got, err)
}
if !isDevicePath("/dev/sdz") || isDevicePath(dest) {
t.Fatal("isDevicePath helper failed")
}
if err := WriteImageWithProgress(context.Background(), src, "", nil); err == nil {
t.Fatal("expected empty destination error")
}
if err := WriteImageWithProgress(context.Background(), filepath.Join(dir, "missing"), dest, nil); err == nil {
t.Fatal("expected missing source error")
}
}

View File

@ -26,3 +26,54 @@ func TestWriteImageCopiesFile(t *testing.T) {
t.Fatalf("expected %q got %q", string(content), string(got))
}
}
func TestWriteImageWithProgressAndCancel(t *testing.T) {
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
dest := filepath.Join(dir, "dest.img")
if err := os.WriteFile(src, []byte("metis-progress"), 0o644); err != nil {
t.Fatalf("write src: %v", err)
}
var calls []int64
if err := WriteImageWithProgress(context.Background(), src, dest, func(written, total int64) {
calls = append(calls, written)
if total <= 0 {
t.Fatalf("unexpected total: %d", total)
}
}); err != nil {
t.Fatalf("WriteImageWithProgress: %v", err)
}
if len(calls) == 0 || calls[len(calls)-1] != int64(len("metis-progress")) {
t.Fatalf("unexpected progress callbacks: %#v", calls)
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
if err := WriteImageWithProgress(ctx, src, filepath.Join(dir, "cancelled.img"), nil); err == nil {
t.Fatal("expected cancel error")
}
}
func TestIsDevicePath(t *testing.T) {
if !isDevicePath("/dev/sdz") {
t.Fatal("expected /dev/sdz to be a device path")
}
if isDevicePath("/tmp/image.img") {
t.Fatal("did not expect regular file path to be treated as device")
}
}
func TestWriteImageErrorBranches(t *testing.T) {
if err := WriteImageWithProgress(context.Background(), "missing-src", "", nil); err == nil {
t.Fatal("expected empty destination error before source lookup")
}
dir := t.TempDir()
src := filepath.Join(dir, "src.img")
if err := os.WriteFile(src, []byte("data"), 0o644); err != nil {
t.Fatal(err)
}
if err := WriteImageWithProgress(context.Background(), src, filepath.Join(dir, "missing", "dest.img"), nil); err != nil {
t.Fatalf("WriteImageWithProgress nested path: %v", err)
}
}

View File

@ -5,7 +5,6 @@ from __future__ import annotations
import json
import os
import sys
import urllib.request
import xml.etree.ElementTree as ET
@ -40,7 +39,6 @@ def _load_junit(path: str) -> dict[str, int]:
except ValueError:
return 0
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
@ -66,16 +64,8 @@ def _load_exit_code(path: str) -> int | None:
return None
try:
return int(raw)
except ValueError:
raise RuntimeError(f"invalid test exit code {raw!r} in {path}")
def _read_http(url: str) -> str:
try:
with urllib.request.urlopen(url, timeout=10) as resp:
return resp.read().decode("utf-8", errors="replace")
except Exception:
return ""
except ValueError as exc:
raise RuntimeError(f"invalid test exit code {raw!r} in {path}") from exc
def _post_text(url: str, payload: str) -> None:
@ -90,26 +80,6 @@ def _post_text(url: str, payload: str) -> None:
raise RuntimeError(f"metrics push failed status={resp.status}")
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics")
if not text:
return 0.0
for line in text.splitlines():
if not line.startswith(metric + "{"):
continue
if any(f'{k}="{v}"' not in line for k, v in labels.items()):
continue
parts = line.split()
if len(parts) < 2:
continue
try:
return float(parts[1])
except ValueError:
return 0.0
return 0.0
def main() -> int:
coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json")
junit_path = os.getenv("JUNIT_XML", "build/junit.xml")
@ -121,6 +91,7 @@ def main() -> int:
branch = os.getenv("BRANCH_NAME", "")
build_number = os.getenv("BUILD_NUMBER", "")
commit = os.getenv("GIT_COMMIT", "")
strict = os.getenv("METRICS_STRICT", "") == "1"
if not os.path.exists(coverage_path):
raise RuntimeError(f"missing coverage file {coverage_path}")
@ -141,32 +112,14 @@ def main() -> int:
):
outcome = "failed"
job_name = "platform-quality-ci"
ok_count = _fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "ok"},
)
failed_count = _fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "failed"},
)
if outcome == "ok":
ok_count += 1
else:
failed_count += 1
labels = {
"job": "platform-quality-ci",
"suite": suite,
"branch": branch,
"build_number": build_number,
"commit": commit,
}
payload_lines = [
"# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}',
"# TYPE metis_quality_gate_tests_total gauge",
f'metis_quality_gate_tests_total{{suite="{suite}",result="total"}} {totals["tests"]}',
f'metis_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
@ -182,7 +135,13 @@ def main() -> int:
f"metis_quality_gate_build_info{_label_str(labels)} 1",
]
payload = "\n".join(payload_lines) + "\n"
_post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}", payload)
try:
_post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{labels['job']}/suite/{suite}", payload)
except Exception as exc:
print(f"metrics push failed: {exc}")
if strict:
raise
print(
json.dumps(
@ -196,8 +155,6 @@ def main() -> int:
"tests_skipped": totals["skipped"],
"coverage_percent": round(coverage, 3),
"test_exit_code": test_exit_code,
"ok_counter": ok_count,
"failed_counter": failed_count,
},
indent=2,
)
@ -206,8 +163,4 @@ def main() -> int:
if __name__ == "__main__":
try:
raise SystemExit(main())
except Exception as exc:
print(f"metrics push failed: {exc}")
raise
raise SystemExit(main())

58
testing/config_test.go Normal file
View File

@ -0,0 +1,58 @@
package testing_test
import (
"testing"
"metis/pkg/config"
"metis/pkg/inventory"
)
func TestConfigBuildLabelsAndTaints(t *testing.T) {
inv := inventory.Inventory{
Classes: []inventory.NodeClass{{
Name: "c1",
Arch: "arm64",
OS: "linux",
Image: "file:///tmp/base.img",
DefaultLabels: map[string]string{"a": "1"},
DefaultTaints: []string{"t1"},
}},
Nodes: []inventory.NodeSpec{{
Name: "n1",
Class: "c1",
Hostname: "n1",
IP: "1.1.1.1",
K3sRole: "agent",
Labels: map[string]string{"b": "2"},
Taints: []string{"t2"},
LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/mnt/d1", UUID: "uuid-1", FS: "ext4"}},
SSHUser: "ubuntu",
SSHAuthorized: []string{"key"},
}},
}
cfg, err := config.Build(&inv, "n1")
if err != nil {
t.Fatalf("Build: %v", err)
}
if got, want := cfg.Labels["a"], "1"; got != want {
t.Fatalf("label a = %q, want %q", got, want)
}
if got, want := cfg.Labels["b"], "2"; got != want {
t.Fatalf("label b = %q, want %q", got, want)
}
if len(cfg.Taints) != 2 || cfg.Taints[0] != "t1" || cfg.Taints[1] != "t2" {
t.Fatalf("taints = %#v", cfg.Taints)
}
if len(cfg.Fstab) != 1 {
t.Fatalf("fstab entries = %d, want 1", len(cfg.Fstab))
}
if got, want := cfg.Fstab[0].Mountpoint, "/mnt/d1"; got != want {
t.Fatalf("fstab mountpoint = %q, want %q", got, want)
}
if got, want := cfg.Fstab[0].UUID, "uuid-1"; got != want {
t.Fatalf("fstab uuid = %q, want %q", got, want)
}
if got, want := cfg.K3s.Role, "agent"; got != want {
t.Fatalf("k3s role = %q, want %q", got, want)
}
}

View File

@ -0,0 +1,41 @@
{
"target_percent": 95,
"files": {
"metis/cmd/metis-sentinel/main.go": 0,
"metis/cmd/metis/config_cmd.go": 0,
"metis/cmd/metis/facts_cmd.go": 0,
"metis/cmd/metis/image_cmd.go": 0,
"metis/cmd/metis/inject_cmd.go": 0,
"metis/cmd/metis/main.go": 0,
"metis/cmd/metis/remote_cmd.go": 4.3,
"metis/cmd/metis/serve_cmd.go": 0,
"metis/pkg/config/config.go": 0,
"metis/pkg/facts/aggregate.go": 92,
"metis/pkg/facts/load.go": 80,
"metis/pkg/facts/recommend.go": 100,
"metis/pkg/facts/targets.go": 95,
"metis/pkg/image/download.go": 64.9,
"metis/pkg/image/rootfs.go": 41.1,
"metis/pkg/inject/inject.go": 0,
"metis/pkg/inventory/types.go": 75.4,
"metis/pkg/mount/mount.go": 0,
"metis/pkg/plan/burn.go": 0,
"metis/pkg/plan/image_build.go": 0,
"metis/pkg/plan/inject.go": 60.5,
"metis/pkg/plan/plan.go": 68.4,
"metis/pkg/secrets/vault.go": 70.8,
"metis/pkg/sentinel/collector.go": 0,
"metis/pkg/service/app.go": 66.6,
"metis/pkg/service/app_helpers.go": 40,
"metis/pkg/service/artifacts.go": 28.5,
"metis/pkg/service/cluster.go": 5,
"metis/pkg/service/harbor.go": 0,
"metis/pkg/service/metrics.go": 85.8,
"metis/pkg/service/remote.go": 0,
"metis/pkg/service/remote_helpers.go": 24.2,
"metis/pkg/service/server.go": 53.9,
"metis/pkg/service/settings.go": 0,
"metis/pkg/util/run.go": 0,
"metis/pkg/writer/writer.go": 70
}
}

250
testing/gate_test.go Normal file
View File

@ -0,0 +1,250 @@
package testing_test
import (
"bufio"
"encoding/json"
"fmt"
"go/ast"
"go/parser"
"go/token"
"math"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"testing"
)
type coveragePolicy struct {
TargetPercent float64 `json:"target_percent"`
Files map[string]float64 `json:"files"`
}
func TestSourceFileLineLimit(t *testing.T) {
root := repoRoot(t)
var offenders []string
for _, relRoot := range []string{"cmd", "pkg", "scripts", "testing"} {
walkSourceFiles(t, filepath.Join(root, relRoot), func(path string, info os.DirEntry) error {
if info.IsDir() {
return nil
}
switch filepath.Ext(path) {
case ".go", ".py", ".sh":
lines, err := countLines(path)
if err != nil {
return err
}
if lines > 500 {
offenders = append(offenders, fmt.Sprintf("%s:%d", rel(root, path), lines))
}
}
return nil
})
}
if len(offenders) > 0 {
sort.Strings(offenders)
t.Fatalf("source files exceed 500 LOC: %s", strings.Join(offenders, ", "))
}
}
func TestExportedDocs(t *testing.T) {
root := repoRoot(t)
var missing []string
fset := token.NewFileSet()
walkSourceFiles(t, root, func(path string, info os.DirEntry) error {
if info.IsDir() || filepath.Ext(path) != ".go" || strings.HasSuffix(path, "_test.go") {
return nil
}
if !strings.HasPrefix(rel(root, path), "cmd/") && !strings.HasPrefix(rel(root, path), "pkg/") {
return nil
}
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
if err != nil {
return err
}
for _, decl := range file.Decls {
switch d := decl.(type) {
case *ast.FuncDecl:
if d.Name.IsExported() && !hasUsefulDoc(d.Doc, d.Name.Name) {
missing = append(missing, fmt.Sprintf("%s:%s", rel(root, path), d.Name.Name))
}
case *ast.GenDecl:
for _, spec := range d.Specs {
switch s := spec.(type) {
case *ast.TypeSpec:
if s.Name.IsExported() && !hasUsefulDoc(d.Doc, s.Name.Name) {
missing = append(missing, fmt.Sprintf("%s:%s", rel(root, path), s.Name.Name))
}
}
}
}
}
return nil
})
if len(missing) > 0 {
sort.Strings(missing)
t.Fatalf("exported declarations without useful docs: %s", strings.Join(missing, ", "))
}
}
func TestGoFmtAndVet(t *testing.T) {
root := repoRoot(t)
gofmt := exec.Command("gofmt", "-l", "cmd", "pkg", "testing")
gofmt.Dir = root
out, err := gofmt.CombinedOutput()
if err != nil {
t.Fatalf("gofmt check failed: %v\n%s", err, out)
}
if trimmed := strings.TrimSpace(string(out)); trimmed != "" {
t.Fatalf("gofmt -l reported files:\n%s", trimmed)
}
vet := exec.Command("go", "vet", "./...")
vet.Dir = root
out, err = vet.CombinedOutput()
if err != nil {
t.Fatalf("go vet failed: %v\n%s", err, out)
}
}
func TestCoveragePolicy(t *testing.T) {
root := repoRoot(t)
coveragePath := filepath.Join(root, "build", "coverage.out")
if _, err := os.Stat(coveragePath); err != nil {
cmd := exec.Command("go", "test", "./...", "-coverprofile=build/coverage.out")
cmd.Dir = root
out, runErr := cmd.CombinedOutput()
if runErr != nil {
t.Fatalf("root coverage run failed: %v\n%s", runErr, out)
}
}
policyPath := filepath.Join(root, "testing", "coverage_policy.json")
policy := loadCoveragePolicy(t, policyPath)
actual := readCoverageProfile(t, coveragePath)
var regressions []string
var phased []string
for file, min := range policy.Files {
got, ok := actual[file]
if !ok {
regressions = append(regressions, fmt.Sprintf("%s missing from coverage", file))
continue
}
if got+0.05 < min {
regressions = append(regressions, fmt.Sprintf("%s %.1f < %.1f", file, got, min))
}
if got < policy.TargetPercent {
phased = append(phased, fmt.Sprintf("%s=%.1f", file, got))
}
}
if len(regressions) > 0 {
sort.Strings(regressions)
t.Fatalf("coverage regressed: %s", strings.Join(regressions, ", "))
}
if len(phased) > 0 {
sort.Strings(phased)
t.Fatalf("coverage below target %.1f%%: %s", policy.TargetPercent, strings.Join(phased, ", "))
}
}
func countLines(path string) (int, error) {
f, err := os.Open(path)
if err != nil {
return 0, err
}
defer f.Close()
s := bufio.NewScanner(f)
count := 0
for s.Scan() {
count++
}
return count, s.Err()
}
func rel(root, path string) string {
out, err := filepath.Rel(root, path)
if err != nil {
return path
}
return filepath.ToSlash(out)
}
func hasUsefulDoc(comment *ast.CommentGroup, name string) bool {
if comment == nil {
return false
}
text := strings.TrimSpace(comment.Text())
if text == "" {
return false
}
if len(strings.Fields(text)) < 4 {
return false
}
return strings.Contains(strings.ToLower(text), strings.ToLower(name[:1])) || len(text) > len(name)+12
}
func loadCoveragePolicy(t *testing.T, path string) coveragePolicy {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
var policy coveragePolicy
if err := json.Unmarshal(data, &policy); err != nil {
t.Fatal(err)
}
if policy.TargetPercent == 0 {
policy.TargetPercent = 95
}
if policy.Files == nil {
policy.Files = map[string]float64{}
}
return policy
}
func readCoverageProfile(t *testing.T, path string) map[string]float64 {
t.Helper()
f, err := os.Open(path)
if err != nil {
t.Fatal(err)
}
defer f.Close()
stats := map[string]struct{ covered, total int }{}
s := bufio.NewScanner(f)
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" || strings.HasPrefix(line, "mode:") {
continue
}
parts := strings.Fields(line)
if len(parts) != 3 {
continue
}
file := strings.SplitN(parts[0], ":", 2)[0]
stmts, err := strconv.Atoi(parts[1])
if err != nil {
continue
}
count, err := strconv.Atoi(parts[2])
if err != nil {
continue
}
entry := stats[file]
entry.total += stmts
if count > 0 {
entry.covered += stmts
}
stats[file] = entry
}
if err := s.Err(); err != nil {
t.Fatal(err)
}
out := map[string]float64{}
for file, stat := range stats {
if stat.total == 0 {
continue
}
out[file] = math.Round((float64(stat.covered)/float64(stat.total))*1000) / 10
}
return out
}

9
testing/go.mod Normal file
View File

@ -0,0 +1,9 @@
module metis-testing
go 1.23.1
require metis v0.0.0
require gopkg.in/yaml.v3 v3.0.1 // indirect
replace metis => ..

4
testing/go.sum Normal file
View File

@ -0,0 +1,4 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

46
testing/helpers_test.go Normal file
View File

@ -0,0 +1,46 @@
package testing_test
import (
"io/fs"
"os"
"path/filepath"
"runtime"
"testing"
)
func repoRoot(t *testing.T) string {
t.Helper()
_, file, _, ok := runtime.Caller(0)
if !ok {
t.Fatal("could not resolve testing module location")
}
return filepath.Clean(filepath.Join(filepath.Dir(file), ".."))
}
func walkSourceFiles(t *testing.T, root string, fn func(path string, info fs.DirEntry) error) {
t.Helper()
walkErr := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
switch d.Name() {
case ".git", ".venv", ".venv-ci", "build", "tmp", "artifacts", ".pytest_cache", ".ruff_cache":
return filepath.SkipDir
}
}
return fn(path, d)
})
if walkErr != nil {
t.Fatal(walkErr)
}
}
func readFile(t *testing.T, path string) []byte {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
return data
}

47
testing/image_test.go Normal file
View File

@ -0,0 +1,47 @@
package testing_test
import (
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"testing"
"metis/pkg/image"
)
func TestDownloadFileURL(t *testing.T) {
tmp := t.TempDir()
src := filepath.Join(tmp, "src.bin")
if err := os.WriteFile(src, []byte("hello"), 0o644); err != nil {
t.Fatal(err)
}
dest := filepath.Join(tmp, "dest.bin")
if err := image.Download("file://"+src, dest); err != nil {
t.Fatalf("Download: %v", err)
}
data, err := os.ReadFile(dest)
if err != nil {
t.Fatal(err)
}
if string(data) != "hello" {
t.Fatalf("downloaded content = %q", data)
}
}
func TestChecksumHelpers(t *testing.T) {
tmp := t.TempDir()
path := filepath.Join(tmp, "file.bin")
if err := os.WriteFile(path, []byte("abc"), 0o644); err != nil {
t.Fatal(err)
}
sha := sha256.Sum256([]byte("abc"))
if err := image.VerifyChecksum(path, "sha256:"+hex.EncodeToString(sha[:])); err != nil {
t.Fatalf("VerifyChecksum sha256: %v", err)
}
md5sum := md5.Sum([]byte("abc"))
if err := image.VerifyChecksum(path, "md5:"+hex.EncodeToString(md5sum[:])); err != nil {
t.Fatalf("VerifyChecksum md5: %v", err)
}
}

28
testing/inject_test.go Normal file
View File

@ -0,0 +1,28 @@
package testing_test
import (
"path/filepath"
"testing"
"metis/pkg/inject"
)
func TestInjectWrite(t *testing.T) {
tmp := t.TempDir()
boot := filepath.Join(tmp, "boot")
root := filepath.Join(tmp, "root")
inj := inject.Injector{BootPath: boot, RootPath: root}
files := []inject.FileSpec{
{Path: "config.txt", Content: []byte("bootcfg"), Mode: 0o644, RootFS: false},
{Path: "etc/hostname", Content: []byte("node"), Mode: 0o644, RootFS: true},
}
if err := inj.Write(files); err != nil {
t.Fatalf("Write: %v", err)
}
if got := readFile(t, filepath.Join(boot, "config.txt")); string(got) != "bootcfg" {
t.Fatalf("boot file = %q", got)
}
if got := readFile(t, filepath.Join(root, "etc/hostname")); string(got) != "node" {
t.Fatalf("root file = %q", got)
}
}

56
testing/inventory_test.go Normal file
View File

@ -0,0 +1,56 @@
package testing_test
import (
"os"
"path/filepath"
"testing"
"metis/pkg/inventory"
)
func TestLoadAndFindNode(t *testing.T) {
invPath := filepath.Join(t.TempDir(), "inventory.yaml")
if err := os.WriteFile(invPath, []byte(`
classes:
- name: rpi5
arch: arm64
os: ubuntu
image: file:///tmp/base.img
checksum: sha256:deadbeef
default_labels:
hardware: rpi5
nodes:
- name: titan-04
class: rpi5
hostname: titan-04
ip: 192.168.22.30
k3s_role: agent
`), 0o644); err != nil {
t.Fatal(err)
}
inv, err := inventory.Load(invPath)
if err != nil {
t.Fatalf("Load: %v", err)
}
node, class, err := inv.FindNode("titan-04")
if err != nil {
t.Fatalf("FindNode: %v", err)
}
if node.Hostname != "titan-04" {
t.Fatalf("hostname = %q", node.Hostname)
}
if class.Arch != "arm64" {
t.Fatalf("arch = %q", class.Arch)
}
}
func TestFindNodeMissing(t *testing.T) {
inv := inventory.Inventory{Classes: nil, Nodes: nil}
node, class, err := inv.FindNode("missing")
if err == nil {
t.Fatal("expected missing node error")
}
if node != nil || class != nil {
t.Fatalf("unexpected node/class: %#v %#v", node, class)
}
}

121
testing/plan_cli_test.go Normal file
View File

@ -0,0 +1,121 @@
package testing_test
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)
func TestPlanCLIProducesActions(t *testing.T) {
root := repoRoot(t)
dummy := filepath.Join(t.TempDir(), "dummy.img")
if err := os.WriteFile(dummy, []byte("dummy"), 0o644); err != nil {
t.Fatal(err)
}
checksum := sha256.Sum256(readFile(t, dummy))
invPath := filepath.Join(t.TempDir(), "inv.yaml")
inv := fmt.Sprintf(`{
"classes": [
{
"name": "test-class",
"arch": "arm64",
"os": "testos",
"image": "file://%s",
"checksum": "sha256:%s",
"default_labels": {"role": "worker"}
}
],
"nodes": [
{
"name": "node-a",
"class": "test-class",
"hostname": "node-a",
"ip": "10.0.0.10",
"k3s_role": "agent"
}
]
}`,
dummy,
hex.EncodeToString(checksum[:]),
)
if err := os.WriteFile(invPath, []byte(inv), 0o644); err != nil {
t.Fatal(err)
}
cmd := exec.Command("go", "run", "./cmd/metis", "plan", "--inventory", invPath, "--node", "node-a", "--device", "/dev/sdz", "--cache", filepath.Join(t.TempDir(), "cache"))
cmd.Dir = root
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("plan command failed: %v\n%s", err, out)
}
var plan struct {
Node string `json:"node"`
Actions []struct {
Type string `json:"type"`
} `json:"actions"`
}
if err := json.Unmarshal(out, &plan); err != nil {
t.Fatalf("decode plan: %v\n%s", err, out)
}
if plan.Node != "node-a" {
t.Fatalf("node = %q", plan.Node)
}
for _, action := range plan.Actions {
if action.Type == "fetch" {
return
}
}
t.Fatal("expected a fetch action")
}
func TestBurnDryRunPrintsPlan(t *testing.T) {
root := repoRoot(t)
dummy := filepath.Join(t.TempDir(), "dummy.img")
if err := os.WriteFile(dummy, []byte("dummy"), 0o644); err != nil {
t.Fatal(err)
}
checksum := sha256.Sum256(readFile(t, dummy))
invPath := filepath.Join(t.TempDir(), "inv.yaml")
inv := fmt.Sprintf(`{
"classes": [
{
"name": "test-class",
"arch": "arm64",
"os": "testos",
"image": "file://%s",
"checksum": "sha256:%s",
"default_labels": {"role": "worker"}
}
],
"nodes": [
{
"name": "node-a",
"class": "test-class",
"hostname": "node-a",
"ip": "10.0.0.10",
"k3s_role": "agent"
}
]
}`,
dummy,
hex.EncodeToString(checksum[:]),
)
if err := os.WriteFile(invPath, []byte(inv), 0o644); err != nil {
t.Fatal(err)
}
cmd := exec.Command("go", "run", "./cmd/metis", "burn", "--inventory", invPath, "--node", "node-a", "--device", "/dev/sdz", "--cache", filepath.Join(t.TempDir(), "cache"))
cmd.Dir = root
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("burn command failed: %v\n%s", err, out)
}
got := string(out)
if !strings.Contains(got, "Dry run") && !strings.Contains(got, "Plan for") {
t.Fatalf("unexpected output: %s", got)
}
}

View File

@ -1 +0,0 @@
# pytest package marker

View File

@ -1,36 +0,0 @@
from metis.pkg import inventory, config
def test_config_build_labels_and_taints():
inv = inventory.Inventory(
classes=[
inventory.NodeClass(
Name="c1",
Arch="arm64",
OS="linux",
Image="file:///tmp/base.img",
DefaultLabels={"a": "1"},
DefaultTaints=["t1"],
)
],
Nodes=[
inventory.NodeSpec(
Name="n1",
Class="c1",
Hostname="n1",
IP="1.1.1.1",
K3sRole="agent",
Labels={"b": "2"},
Taints=["t2"],
LonghornDisks=[inventory.LonghornDisk(Mountpoint="/mnt/d1", UUID="uuid-1", FS="ext4")],
SSHUser="ubuntu",
SSHAuthorized=["key"],
)
],
)
cfg, err = config.Build(inv, "n1")
assert err is None
assert cfg.Labels == {"a": "1", "b": "2"}
assert cfg.Taints == ["t1", "t2"]
assert cfg.Fstab[0].Mountpoint == "/mnt/d1"
assert cfg.Fstab[0].UUID == "uuid-1"
assert cfg.K3s.Role == "agent"

View File

@ -1,30 +0,0 @@
import hashlib
from pathlib import Path
from metis.pkg import image
def test_download_file_url(tmp_path):
src = tmp_path / "src.bin"
src.write_bytes(b"hello")
dest = tmp_path / "dest.bin"
image.Download(f"file://{src}", dest)
assert dest.read_bytes() == b"hello"
def test_checksum_ok(tmp_path):
f = tmp_path / "file.bin"
f.write_bytes(b"abc")
checksum = "sha256:" + hashlib.sha256(b"abc").hexdigest()
image.VerifyChecksum(f, checksum)
def test_checksum_bad(tmp_path):
f = tmp_path / "file.bin"
f.write_bytes(b"abc")
checksum = "sha256:deadbeef"
try:
image.VerifyChecksum(f, checksum)
except Exception:
return
raise AssertionError("expected checksum failure")

View File

@ -1,16 +0,0 @@
from pathlib import Path
from metis.pkg.inject import Injector, FileSpec
def test_inject_write(tmp_path):
boot = tmp_path / "boot"
root = tmp_path / "root"
inj = Injector(BootPath=str(boot), RootPath=str(root))
files = [
FileSpec(Path="config.txt", Content=b"bootcfg", Mode=0o644, RootFS=False),
FileSpec(Path="etc/hostname", Content=b"node", Mode=0o644, RootFS=True),
]
inj.Write(files)
assert (boot / "config.txt").read_bytes() == b"bootcfg"
assert (root / "etc/hostname").read_bytes() == b"node"

View File

@ -1,46 +0,0 @@
import json
import tempfile
from pathlib import Path
import pytest
from metis.pkg import inventory
def test_load_and_find_node():
data = {
"classes": [
{
"name": "rpi5",
"arch": "arm64",
"os": "ubuntu",
"image": "file:///tmp/base.img",
"checksum": "sha256:deadbeef",
"default_labels": {"hardware": "rpi5"},
}
],
"nodes": [
{
"name": "titan-04",
"class": "rpi5",
"hostname": "titan-04",
"ip": "192.168.22.30",
"k3s_role": "agent",
}
],
}
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "inv.yaml"
path.write_text(json.dumps(data))
inv = inventory.Load(path)
node, cls, err = inv.FindNode("titan-04")
assert err is None
assert node.Hostname == "titan-04"
assert cls.Arch == "arm64"
def test_find_node_missing():
inv = inventory.Inventory(classes=[], nodes=[])
node, cls, err = inv.FindNode("missing")
assert err is not None
assert node is None
assert cls is None

View File

@ -1,75 +0,0 @@
import json
import subprocess
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[1]
def write_inv(tmpdir, image_path, checksum):
inv = {
"classes": [
{
"name": "test-class",
"arch": "arm64",
"os": "testos",
"image": f"file://{image_path}",
"checksum": checksum,
"default_labels": {"role": "worker"},
}
],
"nodes": [
{
"name": "node-a",
"class": "test-class",
"hostname": "node-a",
"ip": "10.0.0.10",
"k3s_role": "agent",
}
],
}
inv_path = Path(tmpdir) / "inv.yaml"
inv_path.write_text(json.dumps(inv))
return inv_path
def test_plan_output_contains_actions(tmp_path):
dummy = tmp_path / "dummy.img"
dummy.write_bytes(b"dummy")
import hashlib
checksum = "sha256:" + hashlib.sha256(dummy.read_bytes()).hexdigest()
inv_path = write_inv(tmp_path, dummy, checksum)
cache_dir = tmp_path / "cache"
cmd = ["go", "run", "./cmd/metis", "plan", "--inventory", str(inv_path), "--node", "node-a", "--device", "/dev/sdz", "--cache", str(cache_dir)]
out = subprocess.check_output(cmd, cwd=ROOT)
plan = json.loads(out)
assert plan["node"] == "node-a"
assert any(a["type"] == "fetch" for a in plan["actions"])
def test_burn_dry_run(tmp_path):
dummy = tmp_path / "dummy.img"
dummy.write_bytes(b"dummy")
import hashlib
checksum = "sha256:" + hashlib.sha256(dummy.read_bytes()).hexdigest()
inv_path = write_inv(tmp_path, dummy, checksum)
cache_dir = tmp_path / "cache"
cmd = [
"go",
"run",
"./cmd/metis",
"burn",
"--inventory",
str(inv_path),
"--node",
"node-a",
"--device",
"/dev/sdz",
"--cache",
str(cache_dir),
]
out = subprocess.check_output(cmd, cwd=ROOT, text=True)
assert "Dry run" in out or "Plan for" in out