diff --git a/Jenkinsfile b/Jenkinsfile index 9cd61da..8ccbfc4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -154,6 +154,18 @@ spec: } } + stage('Quality gate') { + steps { + container('tester') { + sh ''' + set -eu + cd testing + go test -v ./... + ''' + } + } + } + stage('Prep toolchain') { steps { container('builder') { diff --git a/cmd/metis-sentinel/coverage_more_test.go b/cmd/metis-sentinel/coverage_more_test.go new file mode 100644 index 0000000..fb62886 --- /dev/null +++ b/cmd/metis-sentinel/coverage_more_test.go @@ -0,0 +1,38 @@ +package main + +import ( + "os" + "path/filepath" + "testing" + + "metis/pkg/sentinel" +) + +func TestSentinelNsenterAndErrorBranches(t *testing.T) { + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("nsenter", `while [[ "${1:-}" != "--" ]]; do shift; done +shift +exec "$@"`) + write("hostname", `printf 'titan-13\n'`) + write("uname", `printf '6.6.63\n'`) + write("k3s", `printf 'v1.31.5+k3s1\n'`) + write("containerd", `printf '1.7.99\n'`) + write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`) + write("dpkg-query", `printf '1.0.0\n'`) + write("rpm", `printf '1.0.0\n'`) + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) + t.Setenv("METIS_SENTINEL_NSENTER", "1") + snap := sentinel.Collect() + if snap.Hostname != "titan-13" || snap.OSImage != "Metis OS" { + t.Fatalf("Collect via nsenter = %#v", snap) + } + if err := pushSnapshot("http://127.0.0.1:1", snap); err == nil { + t.Fatal("expected pushSnapshot error") + } +} diff --git a/cmd/metis-sentinel/main.go b/cmd/metis-sentinel/main.go index bbcd49a..911518a 100644 --- a/cmd/metis-sentinel/main.go +++ b/cmd/metis-sentinel/main.go @@ -14,6 +14,8 @@ import ( "metis/pkg/sentinel" ) +var fatalf = log.Fatalf + func main() { interval := time.Duration(getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300)) * time.Second pushURL := os.Getenv("METIS_SENTINEL_PUSH_URL") @@ -24,7 +26,7 @@ func main() { enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") if err := enc.Encode(snap); err != nil { - log.Fatalf("encode: %v", err) + fatalf("encode: %v", err) } if out := os.Getenv("METIS_SENTINEL_OUT"); out != "" { writeHistory(out, snap) diff --git a/cmd/metis-sentinel/main_test.go b/cmd/metis-sentinel/main_test.go new file mode 100644 index 0000000..71ab7d3 --- /dev/null +++ b/cmd/metis-sentinel/main_test.go @@ -0,0 +1,115 @@ +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "metis/pkg/sentinel" +) + +func TestSentinelMainWritesHistoryAndPushesSnapshot(t *testing.T) { + fakeDir := fakeSentinelCommands(t) + t.Setenv("PATH", fakeDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + historyDir := filepath.Join(t.TempDir(), "history") + pushed := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + pushed = true + if r.Method != http.MethodPost { + t.Fatalf("expected POST, got %s", r.Method) + } + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + t.Setenv("METIS_SENTINEL_RUN_ONCE", "1") + t.Setenv("METIS_SENTINEL_OUT", historyDir) + t.Setenv("METIS_SENTINEL_PUSH_URL", srv.URL) + t.Setenv("METIS_SENTINEL_INTERVAL_SEC", "1") + + main() + + entries, err := os.ReadDir(historyDir) + if err != nil { + t.Fatalf("ReadDir history: %v", err) + } + if len(entries) != 1 { + t.Fatalf("expected one history entry, got %d", len(entries)) + } + data, err := os.ReadFile(filepath.Join(historyDir, entries[0].Name())) + if err != nil { + t.Fatalf("ReadFile history: %v", err) + } + if !strings.Contains(string(data), `"hostname": "titan-13"`) { + t.Fatalf("history file missing snapshot data: %s", data) + } + if !pushed { + t.Fatal("expected pushSnapshot to POST to server") + } +} + +func TestSentinelHelpers(t *testing.T) { + if got := getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300); got != 300 { + t.Fatalf("getenvInt fallback = %d", got) + } + t.Setenv("METIS_SENTINEL_INTERVAL_SEC", "5") + if got := getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300); got != 5 { + t.Fatalf("getenvInt = %d", got) + } + + dir := t.TempDir() + snap := &sentinel.Snapshot{Hostname: "titan-13", Kernel: "6.6.63"} + writeHistory(dir, snap) + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("ReadDir: %v", err) + } + if len(entries) != 1 { + t.Fatalf("expected one file, got %d", len(entries)) + } + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + t.Fatalf("expected POST, got %s", r.Method) + } + var payload map[string]any + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + t.Fatalf("decode push body: %v", err) + } + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + if err := pushSnapshot(srv.URL, snap); err != nil { + t.Fatalf("pushSnapshot: %v", err) + } +} + +func fakeSentinelCommands(t *testing.T) string { + t.Helper() + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("hostname", `printf 'titan-13\n'`) + write("uname", `printf '6.6.63\n'`) + write("k3s", `printf 'v1.31.5+k3s1\n'`) + write("containerd", `printf '1.7.99\n'`) + write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`) + write("dpkg-query", `case "${@: -1}" in + containerd) printf '1.7.99\n' ;; + k3s) printf 'v1.31.5+k3s1\n' ;; + nvidia-container-toolkit) printf '1.16.2\n' ;; + linux-image-raspi) printf '6.6.63\n' ;; + *) printf '1.0.0\n' ;; +esac`) + write("rpm", `printf '1.0.0\n'`) + return dir +} diff --git a/cmd/metis/config_cmd.go b/cmd/metis/config_cmd.go index f998bfc..46ab6d6 100644 --- a/cmd/metis/config_cmd.go +++ b/cmd/metis/config_cmd.go @@ -1,28 +1,27 @@ package main import ( - "encoding/json" - "flag" - "log" - "os" + "encoding/json" + "flag" + "os" - "metis/pkg/config" + "metis/pkg/config" ) func configCmd(args []string) { - fs := flag.NewFlagSet("config", flag.ExitOnError) - invPath := fs.String("inventory", "inventory.yaml", "inventory file") - node := fs.String("node", "", "target node") - fs.Parse(args) - if *node == "" { - log.Fatalf("--node is required") - } - inv := loadInventory(*invPath) - cfg, err := config.Build(inv, *node) - if err != nil { - log.Fatalf("config build: %v", err) - } - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - _ = enc.Encode(cfg) + fs := flag.NewFlagSet("config", flag.ExitOnError) + invPath := fs.String("inventory", "inventory.yaml", "inventory file") + node := fs.String("node", "", "target node") + fs.Parse(args) + if *node == "" { + fatalf("--node is required") + } + inv := loadInventory(*invPath) + cfg, err := config.Build(inv, *node) + if err != nil { + fatalf("config build: %v", err) + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + _ = enc.Encode(cfg) } diff --git a/cmd/metis/facts_cmd.go b/cmd/metis/facts_cmd.go index edf0b3d..fb05da7 100644 --- a/cmd/metis/facts_cmd.go +++ b/cmd/metis/facts_cmd.go @@ -4,7 +4,6 @@ import ( "encoding/json" "flag" "fmt" - "log" "os" "metis/pkg/facts" @@ -18,11 +17,11 @@ func factsCmd(args []string) { fs.Parse(args) inv, err := inventory.Load(*invPath) if err != nil { - log.Fatalf("load inventory: %v", err) + fatalf("load inventory: %v", err) } snaps, err := facts.LoadDir(*dir) if err != nil { - log.Fatalf("load snapshots: %v", err) + fatalf("load snapshots: %v", err) } sum := facts.Aggregate(inv, snaps) enc := json.NewEncoder(os.Stdout) diff --git a/cmd/metis/gate_test.go b/cmd/metis/gate_test.go new file mode 100644 index 0000000..f76d8f7 --- /dev/null +++ b/cmd/metis/gate_test.go @@ -0,0 +1,244 @@ +package main + +import ( + "net/http" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestMainDispatchAllCommands(t *testing.T) { + root := t.TempDir() + invPath, baseImage := writeTestInventory(t, root) + snapDir := filepath.Join(root, "snapshots") + if err := os.MkdirAll(snapDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(snapDir, "snap.json"), []byte(`{"hostname":"titan-15","kernel":"6.6.63","package_sample":{"containerd":"1.7"}}`), 0o644); err != nil { + t.Fatal(err) + } + + fakeTools := fakeCommandDir(t, map[string]string{ + "lsblk": `cat <<'JSON' +{"blockdevices":[{"name":"sdz","path":"/dev/sdz","rm":true,"hotplug":true,"size":"32000000000","model":"Micro SD","tran":"usb","type":"disk"}]} +JSON`, + "sfdisk": `cat <<'JSON' +{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}} +JSON`, + "debugfs": `if [[ "${1:-}" == "-w" ]]; then + cp "${3:-}" "${4:-}.commands" + exit 0 +fi +if [[ "${1:-}" == "-R" ]]; then + state="${3:-}.commands" + set -- $2 + case "${1:-}" in + stat) + mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)" + mode="${mode: -4}" + printf 'Mode: %s\n' "${mode}" + exit 0 + ;; + dump) + local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)" + cat "${local_path}" > "${3:-}" + exit 0 + ;; + esac +fi +exit 0`, + "xz": `case "${1:-}" in + -T0) cp "${@: -1}" "${@: -1}.xz" ;; + -dc) cat "${2:-}" ;; +esac +exit 0`, + "oras": `case "${1:-}" in + login|tag) exit 0 ;; + push) exit 0 ;; + pull) + outdir="${@: -1}" + cp "` + baseImage + `" "${outdir}/titan-15.img" + exit 0 + ;; +esac +exit 0`, + }) + t.Setenv("PATH", fakeTools+string(os.PathListSeparator)+os.Getenv("PATH")) + t.Setenv("METIS_INVENTORY_PATH", invPath) + t.Setenv("METIS_DATA_DIR", filepath.Join(root, "data")) + listenAndServe = func(addr string, _ http.Handler) error { return nil } + t.Cleanup(func() { listenAndServe = httpListenAndServe }) + + callMain := func(args ...string) { + oldArgs := os.Args + os.Args = append([]string{"metis"}, args...) + defer func() { os.Args = oldArgs }() + main() + } + + callMain("config", "--inventory", invPath, "--node", "titan-15") + callMain("facts", "--inventory", invPath, "--snapshots", snapDir) + callMain("plan", "--inventory", invPath, "--node", "titan-15") + callMain("burn", "--inventory", invPath, "--node", "titan-15", "--device", "/dev/sdz") + callMain("image", "--inventory", invPath, "--node", "titan-15", "--output", filepath.Join(root, "out.img")) + callMain("inject", "--inventory", invPath, "--node", "titan-15", "--boot", filepath.Join(root, "boot"), "--root", filepath.Join(root, "root")) + callMain("serve", "--bind", ":0") + callMain("remote-devices", "--host-tmp-dir", filepath.Join(root, "host-tmp")) + callMain("remote-build", "--inventory", invPath, "--node", "titan-15", "--artifact-ref", "registry.example/metis/titan-15", "--build-tag", "build-1", "--work-dir", filepath.Join(root, "build"), "--cache", filepath.Join(root, "cache"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw") + callMain("remote-flash", "--node", "titan-15", "--device", filepath.Join(root, "flash.img"), "--artifact-ref", "registry.example/metis/titan-15", "--work-dir", filepath.Join(root, "flash"), "--host-tmp-dir", filepath.Join(root, "host-tmp"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw") +} + +func TestMainAndCommandFatalBranches(t *testing.T) { + trap := func() { + fatalf = func(format string, args ...any) { + panic("fatal: " + format) + } + exit = func(code int) { + panic("exit") + } + t.Cleanup(func() { + fatalf = httpLogFatalf + exit = httpExit + }) + } + trap() + + mustPanic := func(fn func()) { + t.Helper() + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic") + } + }() + fn() + } + + mustPanic(func() { + oldArgs := os.Args + os.Args = []string{"metis"} + defer func() { os.Args = oldArgs }() + main() + }) + mustPanic(func() { + oldArgs := os.Args + os.Args = []string{"metis", "bogus"} + defer func() { os.Args = oldArgs }() + main() + }) + mustPanic(func() { configCmd(nil) }) + mustPanic(func() { planCmd([]string{"--inventory", "/nope", "--node", "titan-15"}) }) + mustPanic(func() { burnCmd([]string{"--inventory", "/nope", "--node", "titan-15", "--device", "/dev/sdz"}) }) + mustPanic(func() { imageCmd(nil) }) + mustPanic(func() { injectCmd(nil) }) + mustPanic(func() { factsCmd([]string{"--inventory", "/nope", "--snapshots", "/nope"}) }) + mustPanic(func() { serveCmd([]string{"--bind", ":0"}) }) + mustPanic(func() { remoteBuildCmd([]string{"--node", "n1"}) }) + mustPanic(func() { remoteFlashCmd([]string{"--node", "n1"}) }) +} + +func TestRemoteCommandHelpers(t *testing.T) { + if !hasMountedChildren([]struct { + Mountpoint string `json:"mountpoint"` + }{{Mountpoint: "/mnt"}}) { + t.Fatal("hasMountedChildren should detect a mount point") + } + if got := humanHostPath("/host-tmp/metis-flash"); got != "/tmp/metis-flash" { + t.Fatalf("humanHostPath = %q", got) + } + t.Setenv("METIS_REMOTE_SAMPLE", "value") + if got := getenvOr("METIS_REMOTE_SAMPLE", "fallback"); got != "value" { + t.Fatalf("getenvOr = %q", got) + } + + dir := t.TempDir() + base := filepath.Join(dir, "base.img") + if err := os.WriteFile(base, []byte("artifact"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(base+".meta", []byte(`{"meta":true}`), 0o644); err != nil { + t.Fatal(err) + } + fakeTools := fakeCommandDir(t, map[string]string{ + "oras": `case "${1:-}" in + login|tag) exit 0 ;; + push) exit 0 ;; + pull) + outdir="${@: -1}" + cp "` + base + `" "${outdir}/artifact.img" + exit 0 + ;; +esac +exit 0`, + "lsblk": `cat <<'JSON' +{"blockdevices":[{"name":"sdz","path":"/dev/sdz","rm":true,"hotplug":true,"size":"32000000000","model":"Micro SD","tran":"usb","type":"disk","children":[{"mountpoint":""}]},{"name":"sdy","path":"/dev/sdy","rm":true,"hotplug":true,"size":"64000000000","model":"SSD","tran":"usb","type":"disk","children":[{"mountpoint":"/mnt"}]}]} +JSON`, + }) + t.Setenv("PATH", fakeTools+string(os.PathListSeparator)+os.Getenv("PATH")) + + if err := orasLogin("registry.example", "", ""); err == nil { + t.Fatal("expected orasLogin to reject missing creds") + } + if err := orasLogin("registry.example", "u", "p"); err != nil { + t.Fatalf("orasLogin: %v", err) + } + if _, _, err := orasPushInvocation("r", filepath.Join(dir, "one", "a.img"), filepath.Join(dir, "two", "b.meta")); err == nil { + t.Fatal("expected orasPushInvocation mismatch error") + } + pushDir := filepath.Join(dir, "push") + if err := os.MkdirAll(pushDir, 0o755); err != nil { + t.Fatal(err) + } + img := filepath.Join(pushDir, "a.img") + meta := filepath.Join(pushDir, "a.meta") + if err := os.WriteFile(img, []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(meta, []byte(`{}`), 0o644); err != nil { + t.Fatal(err) + } + if _, args, err := orasPushInvocation("ref", img, meta); err != nil || len(args) == 0 { + t.Fatalf("orasPushInvocation success = %#v %v", args, err) + } + if err := orasPush("ref", img, meta); err != nil { + t.Fatalf("orasPush: %v", err) + } + if err := orasTag("ref", "latest"); err != nil { + t.Fatalf("orasTag: %v", err) + } + pullDir := filepath.Join(dir, "pull") + if err := os.MkdirAll(pullDir, 0o755); err != nil { + t.Fatal(err) + } + if err := orasPull("ref", pullDir); err != nil { + t.Fatalf("orasPull: %v", err) + } + artifact, compressed, err := resolvePulledArtifact(pullDir) + if err != nil || compressed || !strings.HasSuffix(artifact, ".img") { + t.Fatalf("resolvePulledArtifact raw = %q compressed=%v err=%v", artifact, compressed, err) + } + if err := os.WriteFile(filepath.Join(pullDir, "artifact.img.xz"), []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + artifact, compressed, err = resolvePulledArtifact(pullDir) + if err != nil || !compressed || !strings.HasSuffix(artifact, ".img.xz") { + t.Fatalf("resolvePulledArtifact xz = %q compressed=%v err=%v", artifact, compressed, err) + } + if _, _, err := resolvePulledArtifact(filepath.Join(dir, "missing")); err == nil { + t.Fatal("expected resolvePulledArtifact error") + } + devices, err := localFlashDevices(40000000000, filepath.Join(dir, "host-tmp")) + if err != nil { + t.Fatalf("localFlashDevices: %v", err) + } + if len(devices) == 0 || devices[0].Path != "/dev/sdz" { + t.Fatalf("localFlashDevices = %#v", devices) + } + writeStructuredResult(map[string]any{"ok": true}) +} + +var ( + httpLogFatalf = fatalf + httpExit = exit + httpListenAndServe = listenAndServe +) diff --git a/cmd/metis/image_cmd.go b/cmd/metis/image_cmd.go index 2d91b76..db4a373 100644 --- a/cmd/metis/image_cmd.go +++ b/cmd/metis/image_cmd.go @@ -4,7 +4,6 @@ import ( "context" "flag" "fmt" - "log" "os" "path/filepath" @@ -19,7 +18,7 @@ func imageCmd(args []string) { cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir") fs.Parse(args) if *node == "" { - log.Fatalf("--node is required") + fatalf("--node is required") } inv := loadInventory(*invPath) @@ -29,7 +28,7 @@ func imageCmd(args []string) { } if err := plan.BuildImageFile(context.Background(), inv, *node, *cache, targetOutput); err != nil { - log.Fatalf("build image: %v", err) + fatalf("build image: %v", err) } fmt.Printf("Wrote %s\n", targetOutput) diff --git a/cmd/metis/inject_cmd.go b/cmd/metis/inject_cmd.go index 92cfc64..b86e65d 100644 --- a/cmd/metis/inject_cmd.go +++ b/cmd/metis/inject_cmd.go @@ -2,7 +2,6 @@ package main import ( "flag" - "log" "metis/pkg/plan" ) @@ -15,13 +14,13 @@ func injectCmd(args []string) { root := fs.String("root", "", "mounted root path") fs.Parse(args) if *node == "" { - log.Fatalf("--node is required") + fatalf("--node is required") } if *boot == "" && *root == "" { - log.Fatalf("--boot or --root is required") + fatalf("--boot or --root is required") } inv := loadInventory(*invPath) if err := plan.Inject(inv, *node, *boot, *root); err != nil { - log.Fatalf("inject: %v", err) + fatalf("inject: %v", err) } } diff --git a/cmd/metis/main.go b/cmd/metis/main.go index c65a9c3..8cac6c3 100644 --- a/cmd/metis/main.go +++ b/cmd/metis/main.go @@ -12,10 +12,15 @@ import ( "metis/pkg/plan" ) +var ( + fatalf = log.Fatalf + exit = os.Exit +) + func main() { if len(os.Args) < 2 { usage() - os.Exit(1) + exit(1) } switch os.Args[1] { case "plan": @@ -40,7 +45,7 @@ func main() { remoteFlashCmd(os.Args[2:]) default: usage() - os.Exit(1) + exit(1) } } @@ -51,7 +56,7 @@ func usage() { func loadInventory(path string) *inventory.Inventory { inv, err := inventory.Load(path) if err != nil { - log.Fatalf("load inventory: %v", err) + fatalf("load inventory: %v", err) } return inv } @@ -66,7 +71,7 @@ func planCmd(args []string) { root := fs.String("root", "", "mounted root path for injection (optional)") fs.Parse(args) if *node == "" { - log.Fatalf("--node is required") + fatalf("--node is required") } inv := loadInventory(*invPath) if *boot != "" { @@ -77,7 +82,7 @@ func planCmd(args []string) { } p, err := plan.Build(inv, *node, *device, *cache) if err != nil { - log.Fatalf("build plan: %v", err) + fatalf("build plan: %v", err) } enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") @@ -96,7 +101,7 @@ func burnCmd(args []string) { confirm := fs.Bool("yes", false, "actually write to device") fs.Parse(args) if *node == "" || *device == "" { - log.Fatalf("--node and --device are required") + fatalf("--node and --device are required") } inv := loadInventory(*invPath) if *boot != "" { @@ -110,7 +115,7 @@ func burnCmd(args []string) { } p, err := plan.Execute(inv, *node, *device, *cache, *confirm) if err != nil { - log.Fatalf("burn: %v", err) + fatalf("burn: %v", err) } fmt.Printf("Plan for %s to %s:\n", p.Node, p.Device) for _, a := range p.Actions { diff --git a/cmd/metis/main_test.go b/cmd/metis/main_test.go new file mode 100644 index 0000000..418ba53 --- /dev/null +++ b/cmd/metis/main_test.go @@ -0,0 +1,291 @@ +package main + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestUsageWritesSupportedCommands(t *testing.T) { + stdout, stderr := captureStreams(t, func() { + usage() + }) + if stdout != "" { + t.Fatalf("usage wrote stdout: %q", stdout) + } + if !strings.Contains(stderr, "remote-flash") || !strings.Contains(stderr, "plan") { + t.Fatalf("usage output missing commands: %q", stderr) + } +} + +func TestConfigFactsPlanBurnImageInjectAndServeCommands(t *testing.T) { + root := t.TempDir() + invPath, baseImage := writeTestInventory(t, root) + t.Setenv("METIS_INVENTORY_PATH", invPath) + t.Setenv("METIS_DATA_DIR", filepath.Join(root, "data")) + t.Setenv("METIS_BOOT_PATH", "") + t.Setenv("METIS_ROOT_PATH", "") + + stdout, _ := captureStreams(t, func() { + configCmd([]string{"--inventory", invPath, "--node", "titan-15"}) + }) + if !strings.Contains(stdout, `"hostname": "titan-15"`) { + t.Fatalf("config output missing hostname: %s", stdout) + } + + snapDir := filepath.Join(root, "snapshots") + if err := os.MkdirAll(snapDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(snapDir, "snap.json"), []byte(`{"hostname":"titan-15","kernel":"6.6.63","package_sample":{"containerd":"1.7"}}`), 0o644); err != nil { + t.Fatal(err) + } + stdout, _ = captureStreams(t, func() { + factsCmd([]string{"--inventory", invPath, "--snapshots", snapDir}) + }) + if !strings.Contains(stdout, `"class": "rpi4"`) { + t.Fatalf("facts output missing class summary: %s", stdout) + } + + stdout, _ = captureStreams(t, func() { + planCmd([]string{"--inventory", invPath, "--node", "titan-15", "--device", "/dev/sdz", "--cache", filepath.Join(root, "cache")}) + }) + if !strings.Contains(stdout, `"node": "titan-15"`) || !strings.Contains(stdout, `"actions"`) { + t.Fatalf("plan output missing plan JSON: %s", stdout) + } + + rootTools := fakeCommandDir(t, map[string]string{ + "sfdisk": `cat <<'JSON' +{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}} +JSON`, + "debugfs": `if [[ "${1:-}" == "-w" ]]; then + cp "${3:-}" "${4:-}.commands" + exit 0 +fi +if [[ "${1:-}" == "-R" ]]; then + state="${3:-}.commands" + set -- $2 + case "${1:-}" in + stat) + mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)" + mode="${mode: -4}" + printf 'Mode: %s\n' "${mode}" + exit 0 + ;; + dump) + local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)" + cat "${local_path}" > "${3:-}" + exit 0 + ;; + esac +fi +exit 0`, + }) + t.Setenv("PATH", rootTools+string(os.PathListSeparator)+os.Getenv("PATH")) + stdout, _ = captureStreams(t, func() { + imageCmd([]string{"--inventory", invPath, "--node", "titan-15", "--output", filepath.Join(root, "out.img"), "--cache", filepath.Join(root, "cache")}) + }) + if !strings.Contains(stdout, "Wrote ") { + t.Fatalf("image output missing write confirmation: %s", stdout) + } + + stdout, _ = captureStreams(t, func() { + burnCmd([]string{"--inventory", invPath, "--node", "titan-15", "--device", "/dev/sdz", "--cache", filepath.Join(root, "cache")}) + }) + if !strings.Contains(stdout, "Plan for titan-15 to /dev/sdz") { + t.Fatalf("burn output missing plan header: %s", stdout) + } + + bootDir := filepath.Join(root, "boot") + rootDir := filepath.Join(root, "root") + if err := os.MkdirAll(bootDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(rootDir, 0o755); err != nil { + t.Fatal(err) + } + captureStreams(t, func() { + injectCmd([]string{"--inventory", invPath, "--node", "titan-15", "--boot", bootDir, "--root", rootDir}) + }) + if _, err := os.Stat(filepath.Join(rootDir, "etc/metis/node.json")); err != nil { + t.Fatalf("injectCmd did not write root file: %v", err) + } + + listenAndServe = func(addr string, handler http.Handler) error { + if addr != ":0" { + t.Fatalf("unexpected bind addr: %s", addr) + } + return nil + } + t.Cleanup(func() { listenAndServe = http.ListenAndServe }) + t.Setenv("METIS_BIND_ADDR", ":0") + t.Setenv("METIS_INVENTORY_PATH", invPath) + serveCmd([]string{"--bind", ":0"}) + _ = baseImage +} + +func TestMainDispatchesConfig(t *testing.T) { + root := t.TempDir() + invPath, _ := writeTestInventory(t, root) + oldArgs := os.Args + os.Args = []string{"metis", "config", "--inventory", invPath, "--node", "titan-15"} + t.Cleanup(func() { os.Args = oldArgs }) + main() +} + +func TestRemoteCommandsAndHelpers(t *testing.T) { + root := t.TempDir() + invPath, baseImage := writeTestInventory(t, root) + fakeTools := fakeCommandDir(t, map[string]string{ + "lsblk": `cat <<'JSON' +{"blockdevices":[{"name":"sdz","path":"/dev/sdz","rm":true,"hotplug":true,"size":"32000000000","model":"Micro SD","tran":"usb","type":"disk"}]} +JSON`, + "sfdisk": `cat <<'JSON' +{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}} +JSON`, + "debugfs": `if [[ "${1:-}" == "-w" ]]; then + cp "${3:-}" "${4:-}.commands" + exit 0 +fi +if [[ "${1:-}" == "-R" ]]; then + state="${3:-}.commands" + set -- $2 + case "${1:-}" in + stat) + mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)" + mode="${mode: -4}" + printf 'Mode: %s\n' "${mode}" + exit 0 + ;; + dump) + local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)" + cat "${local_path}" > "${3:-}" + exit 0 + ;; + esac +fi +exit 0`, + "xz": `dest="${@: -1}"; cp "$dest" "$dest.xz"`, + "oras": `case "${1:-}" in + login|tag) exit 0 ;; + push) exit 0 ;; + pull) + outdir="${@: -1}" + cp "` + baseImage + `" "${outdir}/titan-15.img" + exit 0 + ;; +esac +exit 0`, + }) + t.Setenv("PATH", fakeTools+string(os.PathListSeparator)+os.Getenv("PATH")) + + stdout, _ := captureStreams(t, func() { + remoteDevicesCmd([]string{"--max-device-bytes", "40000000000", "--host-tmp-dir", filepath.Join(root, "host-tmp")}) + }) + if !strings.Contains(stdout, `"path":"/dev/sdz"`) { + t.Fatalf("remoteDevicesCmd output missing device: %s", stdout) + } + + stdout, _ = captureStreams(t, func() { + remoteBuildCmd([]string{"--inventory", invPath, "--node", "titan-15", "--artifact-ref", "registry.example/metis/titan-15", "--build-tag", "build-1", "--work-dir", filepath.Join(root, "build"), "--cache", filepath.Join(root, "cache"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw"}) + }) + if !strings.Contains(stdout, `"build_tag":"build-1"`) { + t.Fatalf("remoteBuildCmd output missing build tag: %s", stdout) + } + + stdout, _ = captureStreams(t, func() { + remoteFlashCmd([]string{"--node", "titan-15", "--device", "hosttmp:///tmp", "--artifact-ref", "registry.example/metis/titan-15", "--work-dir", filepath.Join(root, "flash"), "--host-tmp-dir", filepath.Join(root, "host-tmp"), "--harbor-registry", "registry.example", "--harbor-username", "admin", "--harbor-password", "pw"}) + }) + if !strings.Contains(stdout, `"dest_path"`) { + t.Fatalf("remoteFlashCmd output missing dest_path: %s", stdout) + } +} + +func captureStreams(t *testing.T, fn func()) (string, string) { + t.Helper() + oldStdout := os.Stdout + oldStderr := os.Stderr + stdoutR, stdoutW, _ := os.Pipe() + stderrR, stderrW, _ := os.Pipe() + os.Stdout = stdoutW + os.Stderr = stderrW + defer func() { + os.Stdout = oldStdout + os.Stderr = oldStderr + }() + done := make(chan struct { + out string + err string + }, 1) + go func() { + var outBuf bytes.Buffer + var errBuf bytes.Buffer + _, _ = io.Copy(&outBuf, stdoutR) + _, _ = io.Copy(&errBuf, stderrR) + done <- struct { + out string + err string + }{out: outBuf.String(), err: errBuf.String()} + }() + fn() + _ = stdoutW.Close() + _ = stderrW.Close() + captured := <-done + return captured.out, captured.err +} + +func writeTestInventory(t *testing.T, root string) (string, string) { + t.Helper() + baseImage := filepath.Join(root, "base.img") + if err := os.WriteFile(baseImage, make([]byte, 4096), 0o644); err != nil { + t.Fatal(err) + } + invPath := filepath.Join(root, "inventory.yaml") + inv := `classes: + - name: rpi4 + arch: arm64 + os: armbian + image: file://` + baseImage + ` + checksum: sha256:` + sha256SumHex(t, make([]byte, 4096)) + ` + k3s_version: v1.31.5+k3s1 +nodes: + - name: titan-15 + class: rpi4 + hostname: titan-15 + ip: 192.168.22.43 + k3s_role: agent + k3s_url: https://192.168.22.7:6443 + k3s_token: token + ssh_user: atlas + ssh_authorized_keys: + - ssh-ed25519 AAA +` + if err := os.WriteFile(invPath, []byte(inv), 0o644); err != nil { + t.Fatal(err) + } + return invPath, baseImage +} + +func sha256SumHex(t *testing.T, data []byte) string { + t.Helper() + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]) +} + +func fakeCommandDir(t *testing.T, scripts map[string]string) string { + t.Helper() + dir := t.TempDir() + for name, body := range scripts { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + return dir +} diff --git a/cmd/metis/remote_cmd.go b/cmd/metis/remote_cmd.go index aa7dd6c..9445281 100644 --- a/cmd/metis/remote_cmd.go +++ b/cmd/metis/remote_cmd.go @@ -5,7 +5,6 @@ import ( "encoding/json" "flag" "fmt" - "log" "os" "os/exec" "path/filepath" @@ -27,7 +26,7 @@ func remoteDevicesCmd(args []string) { devices, err := localFlashDevices(*maxBytes, *hostTmpDir) if err != nil { - log.Fatalf("remote devices: %v", err) + fatalf("remote devices: %v", err) } sort.Slice(devices, func(i, j int) bool { left := localDeviceScore(devices[i]) @@ -56,24 +55,24 @@ func remoteBuildCmd(args []string) { harborPassword := fs.String("harbor-password", getenvOr("METIS_HARBOR_PASSWORD", ""), "harbor password") fs.Parse(args) if *node == "" || *artifactRef == "" || *buildTag == "" { - log.Fatalf("--node, --artifact-ref, and --build-tag are required") + fatalf("--node, --artifact-ref, and --build-tag are required") } if err := os.MkdirAll(*workDir, 0o755); err != nil { - log.Fatalf("mkdir workdir: %v", err) + fatalf("mkdir workdir: %v", err) } output := filepath.Join(*workDir, fmt.Sprintf("%s.img", *node)) inv := loadInventory(*invPath) if err := plan.BuildImageFile(context.Background(), inv, *node, *cacheDir, output); err != nil { - log.Fatalf("build image: %v", err) + fatalf("build image: %v", err) } if err := exec.Command("xz", "-T0", "-z", "-f", output).Run(); err != nil { - log.Fatalf("xz compress: %v", err) + fatalf("xz compress: %v", err) } compressedPath := output + ".xz" info, err := os.Stat(compressedPath) if err != nil { - log.Fatalf("stat compressed image: %v", err) + fatalf("stat compressed image: %v", err) } metadataPath := filepath.Join(*workDir, "metadata.json") @@ -88,20 +87,20 @@ func remoteBuildCmd(args []string) { } metaBytes, err := json.MarshalIndent(meta, "", " ") if err != nil { - log.Fatalf("encode metadata: %v", err) + fatalf("encode metadata: %v", err) } if err := os.WriteFile(metadataPath, metaBytes, 0o644); err != nil { - log.Fatalf("write metadata: %v", err) + fatalf("write metadata: %v", err) } if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil { - log.Fatalf("oras login: %v", err) + fatalf("oras login: %v", err) } taggedRef := fmt.Sprintf("%s:%s", *artifactRef, *buildTag) if err := orasPush(taggedRef, compressedPath, metadataPath); err != nil { - log.Fatalf("oras push: %v", err) + fatalf("oras push: %v", err) } if err := orasTag(taggedRef, "latest"); err != nil { - log.Fatalf("oras tag latest: %v", err) + fatalf("oras tag latest: %v", err) } summary := service.ArtifactSummary{ @@ -128,40 +127,40 @@ func remoteFlashCmd(args []string) { hostTmpDir := fs.String("host-tmp-dir", "/host-tmp/metis-flash-test", "mounted host tmp dir for test writes") fs.Parse(args) if *node == "" || *device == "" || *artifactRef == "" { - log.Fatalf("--node, --device, and --artifact-ref are required") + fatalf("--node, --device, and --artifact-ref are required") } if err := os.MkdirAll(*workDir, 0o755); err != nil { - log.Fatalf("mkdir workdir: %v", err) + fatalf("mkdir workdir: %v", err) } if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil { - log.Fatalf("oras login: %v", err) + fatalf("oras login: %v", err) } if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil { - log.Fatalf("oras pull: %v", err) + fatalf("oras pull: %v", err) } imagePath, compressed, err := resolvePulledArtifact(*workDir) if err != nil { - log.Fatalf("resolve artifact: %v", err) + fatalf("resolve artifact: %v", err) } rawImage := imagePath if compressed { rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node)) cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage)) if out, err := cmd.CombinedOutput(); err != nil { - log.Fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out))) + fatalf("xz stream decompress: %v: %s", err, strings.TrimSpace(string(out))) } } destPath := *device if strings.HasPrefix(destPath, "hosttmp://") { if err := os.MkdirAll(*hostTmpDir, 0o755); err != nil { - log.Fatalf("mkdir host tmp dir: %v", err) + fatalf("mkdir host tmp dir: %v", err) } destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node)) } if err := writer.WriteImage(context.Background(), rawImage, destPath); err != nil { - log.Fatalf("write image: %v", err) + fatalf("write image: %v", err) } _ = exec.Command("sync").Run() if strings.HasPrefix(destPath, "/dev/") { @@ -170,7 +169,7 @@ func remoteFlashCmd(args []string) { info, err := os.Stat(destPath) if err != nil { - log.Fatalf("stat destination: %v", err) + fatalf("stat destination: %v", err) } writeStructuredResult(map[string]any{ "node": *node, @@ -183,10 +182,10 @@ func remoteFlashCmd(args []string) { func writeStructuredResult(payload any) { data, err := json.Marshal(payload) if err != nil { - log.Fatalf("encode result: %v", err) + fatalf("encode result: %v", err) } if _, err := os.Stdout.Write(append(data, '\n')); err != nil { - log.Fatalf("write stdout result: %v", err) + fatalf("write stdout result: %v", err) } // Keep the result available in pod status so Metis does not depend on the // kubelet log endpoint for successful worker runs. diff --git a/cmd/metis/serve_cmd.go b/cmd/metis/serve_cmd.go index 0997475..31e00bd 100644 --- a/cmd/metis/serve_cmd.go +++ b/cmd/metis/serve_cmd.go @@ -8,6 +8,8 @@ import ( "metis/pkg/service" ) +var listenAndServe = http.ListenAndServe + func serveCmd(args []string) { fs := flag.NewFlagSet("serve", flag.ExitOnError) bindAddr := fs.String("bind", "", "override bind address") @@ -19,10 +21,10 @@ func serveCmd(args []string) { } app, err := service.NewApp(settings) if err != nil { - log.Fatalf("init service: %v", err) + fatalf("init service: %v", err) } log.Printf("metis listening on %s", settings.BindAddr) - if err := http.ListenAndServe(settings.BindAddr, app.Handler()); err != nil { - log.Fatalf("serve: %v", err) + if err := listenAndServe(settings.BindAddr, app.Handler()); err != nil { + fatalf("serve: %v", err) } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go new file mode 100644 index 0000000..216918e --- /dev/null +++ b/pkg/config/config_test.go @@ -0,0 +1,54 @@ +package config + +import ( + "testing" + + "metis/pkg/inventory" +) + +func TestBuildUsesNodeOverridesAndDefaultFilesystem(t *testing.T) { + inv := inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "c1", + Arch: "arm64", + OS: "linux", + Image: "file:///tmp/base.img", + K3sVersion: "v1.30.0+k3s1", + DefaultLabels: map[string]string{"role": "worker"}, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "n1", + Class: "c1", + Hostname: "n1", + IP: "1.1.1.1", + K3sRole: "server", + K3sVersion: "v1.31.5+k3s1", + SSHUser: "atlas", + SSHAuthorized: []string{"key"}, + LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/mnt/data", UUID: "uuid-1"}}, + }}, + } + cfg, err := Build(&inv, "n1") + if err != nil { + t.Fatalf("Build: %v", err) + } + if got, want := cfg.K3s.Version, "v1.31.5+k3s1"; got != want { + t.Fatalf("k3s version = %q, want %q", got, want) + } + if got := cfg.Fstab[0].FS; got != "ext4" { + t.Fatalf("expected default filesystem ext4, got %q", got) + } + if got := cfg.Labels["role"]; got != "worker" { + t.Fatalf("label merge lost default label: %q", got) + } + if cfg.K3s.Role != "server" { + t.Fatalf("expected server role, got %q", cfg.K3s.Role) + } +} + +func TestBuildReturnsErrorForMissingNode(t *testing.T) { + inv := inventory.Inventory{} + if _, err := Build(&inv, "missing"); err == nil { + t.Fatal("expected missing node error") + } +} diff --git a/pkg/config/coverage_more_test.go b/pkg/config/coverage_more_test.go new file mode 100644 index 0000000..95b8599 --- /dev/null +++ b/pkg/config/coverage_more_test.go @@ -0,0 +1,51 @@ +package config + +import ( + "testing" + + "metis/pkg/inventory" +) + +func TestBuildBranches(t *testing.T) { + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "rpi4", + Arch: "arm64", + OS: "armbian", + Image: "file:///tmp/base.img", + K3sVersion: "v1.31.5+k3s1", + DefaultLabels: map[string]string{"a": "class", "b": "class"}, + DefaultTaints: []string{"class-taint"}, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "titan-15", + Class: "rpi4", + Hostname: "titan-15", + IP: "192.168.22.43", + K3sRole: "agent", + K3sVersion: "v1.31.5+k3s2", + K3sURL: "https://192.168.22.7:6443", + K3sToken: "token", + Labels: map[string]string{"c": "node"}, + Taints: []string{"node-taint"}, + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + LonghornDisks: []inventory.LonghornDisk{{UUID: "u1", Mountpoint: "/var/lib/longhorn"}}, + }}, + } + cfg, err := Build(inv, "titan-15") + if err != nil { + t.Fatalf("Build: %v", err) + } + if cfg.K3s.Version != "v1.31.5+k3s2" || len(cfg.Fstab) != 1 || cfg.Fstab[0].FS != "ext4" { + t.Fatalf("unexpected config: %#v", cfg) + } + if _, err := Build(&inventory.Inventory{}, "missing"); err == nil { + t.Fatal("expected Build to fail for missing node") + } + + inv.Nodes[0].Hostname = "" + if _, err := Build(inv, "titan-15"); err == nil { + t.Fatal("expected Build to fail without hostname") + } +} diff --git a/pkg/facts/aggregate.go b/pkg/facts/aggregate.go index 3f15ada..976c769 100644 --- a/pkg/facts/aggregate.go +++ b/pkg/facts/aggregate.go @@ -6,13 +6,13 @@ import ( // ClassSummary captures aggregated sentinel facts per class. type ClassSummary struct { - Class string `json:"class"` - Nodes []string `json:"nodes"` - Kernels map[string]int `json:"kernels,omitempty"` - OSImages map[string]int `json:"os_images,omitempty"` - Containerd map[string]int `json:"containerd,omitempty"` - K3sVersions map[string]int `json:"k3s_versions,omitempty"` - PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count + Class string `json:"class"` + Nodes []string `json:"nodes"` + Kernels map[string]int `json:"kernels,omitempty"` + OSImages map[string]int `json:"os_images,omitempty"` + Containerd map[string]int `json:"containerd,omitempty"` + K3sVersions map[string]int `json:"k3s_versions,omitempty"` + PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count } // Aggregate groups snapshots by inventory class and tallies version drift. diff --git a/pkg/facts/aggregate_test.go b/pkg/facts/aggregate_test.go index fc71279..316ad10 100644 --- a/pkg/facts/aggregate_test.go +++ b/pkg/facts/aggregate_test.go @@ -31,3 +31,32 @@ func TestAggregateGroupsByClass(t *testing.T) { t.Fatalf("package stats not tallied: %#v", c1.PackageStats) } } + +func TestAggregateKeepsUnknownHostnames(t *testing.T) { + sum := Aggregate(nil, []Snapshot{{Hostname: "ghost", Kernel: "k"}}) + if sum["unknown"].Nodes[0] != "ghost" { + t.Fatalf("unexpected unknown aggregate: %#v", sum["unknown"]) + } +} + +func TestChooseTargetsHandlesTiesAndEmptyValues(t *testing.T) { + sum := &ClassSummary{ + Kernels: map[string]int{"k1": 2, "k2": 2}, + OSImages: map[string]int{ + "img": 1, + }, + PackageStats: map[string]map[string]int{ + "p": {"": 3, "1": 1}, + }, + } + targets := ChooseTargets(sum) + if targets.Kernel != "" { + t.Fatalf("expected kernel tie to return empty, got %q", targets.Kernel) + } + if targets.OSImage != "img" { + t.Fatalf("expected OS image img, got %q", targets.OSImage) + } + if _, ok := targets.Packages["p"]; ok { + t.Fatalf("expected empty package version to be skipped: %+v", targets.Packages) + } +} diff --git a/pkg/facts/coverage_more_test.go b/pkg/facts/coverage_more_test.go new file mode 100644 index 0000000..ca1a37a --- /dev/null +++ b/pkg/facts/coverage_more_test.go @@ -0,0 +1,52 @@ +package facts + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "metis/pkg/inventory" +) + +func TestAggregateAndLoadBranches(t *testing.T) { + dir := t.TempDir() + nested := filepath.Join(dir, "nested") + if err := os.MkdirAll(nested, 0o755); err != nil { + t.Fatal(err) + } + snapPath := filepath.Join(dir, "one.json") + data, _ := json.Marshal(Snapshot{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"p": "1"}}) + if err := os.WriteFile(snapPath, data, 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nested, "two.json"), data, 0o644); err != nil { + t.Fatal(err) + } + snaps, err := LoadDir(dir) + if err != nil || len(snaps) != 2 { + t.Fatalf("LoadDir = %#v err=%v", snaps, err) + } + if _, err := LoadDir(filepath.Join(dir, "missing")); err == nil { + t.Fatal("expected LoadDir to fail for missing dir") + } + sums := Aggregate(nil, snaps) + if sums["unknown"] == nil || len(sums["unknown"].Nodes) != 2 { + t.Fatalf("Aggregate unknown = %#v", sums) + } + + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{Name: "rpi4"}}, + Nodes: []inventory.NodeSpec{{Name: "n1", Class: "rpi4"}}, + } + sums = Aggregate(inv, snaps) + if sums["rpi4"] == nil { + t.Fatalf("expected class summary: %#v", sums) + } + if got := ChooseTargets(&ClassSummary{Kernels: map[string]int{"a": 1, "b": 1}}); got.Kernel != "" { + t.Fatalf("tie should clear target: %#v", got) + } + if got := ChooseTargets(nil); got.Packages == nil { + t.Fatal("ChooseTargets should return initialized package map") + } +} diff --git a/pkg/facts/load_test.go b/pkg/facts/load_test.go index 0652409..2771db5 100644 --- a/pkg/facts/load_test.go +++ b/pkg/facts/load_test.go @@ -20,3 +20,34 @@ func TestLoadDirReadsSnapshots(t *testing.T) { t.Fatalf("unexpected snapshot: %+v", got) } } + +func TestLoadDirRejectsInvalidJSON(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "broken.json"), []byte(`{"hostname":`), 0o644); err != nil { + t.Fatal(err) + } + if _, err := LoadDir(dir); err == nil { + t.Fatal("expected JSON parse error") + } +} + +func TestLoadDirReadsNestedDirectoriesAndMissingDir(t *testing.T) { + dir := t.TempDir() + nested := filepath.Join(dir, "nested") + if err := os.MkdirAll(nested, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nested, "snap.json"), []byte(`{"hostname":"n2"}`), 0o644); err != nil { + t.Fatal(err) + } + got, err := LoadDir(dir) + if err != nil { + t.Fatalf("LoadDir nested: %v", err) + } + if len(got) != 1 || got[0].Hostname != "n2" { + t.Fatalf("unexpected nested snapshots: %+v", got) + } + if _, err := LoadDir(filepath.Join(dir, "missing")); err == nil { + t.Fatal("expected missing dir error") + } +} diff --git a/pkg/facts/targets_test.go b/pkg/facts/targets_test.go index 4bc94cd..32187ab 100644 --- a/pkg/facts/targets_test.go +++ b/pkg/facts/targets_test.go @@ -24,3 +24,10 @@ func TestChooseTargetsPicksMostCommon(t *testing.T) { t.Fatalf("package target wrong: %+v", tg.Packages) } } + +func TestChooseTargetsHandlesNilSummary(t *testing.T) { + tg := ChooseTargets(nil) + if tg.Kernel != "" || len(tg.Packages) != 0 { + t.Fatalf("expected zero targets, got %+v", tg) + } +} diff --git a/pkg/facts/types.go b/pkg/facts/types.go index 6086ce5..a5dbb8d 100644 --- a/pkg/facts/types.go +++ b/pkg/facts/types.go @@ -6,10 +6,10 @@ type ClassFacts struct { Kernel string `json:"kernel,omitempty"` K3sVersion string `json:"k3s_version,omitempty"` Containerd string `json:"containerd,omitempty"` - Packages map[string]string `json:"packages,omitempty"` // name -> version - DropIns map[string]string `json:"dropins,omitempty"` // path -> content - Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value - CGroupConfig map[string]string `json:"cgroup_config,omitempty"`// key -> value + Packages map[string]string `json:"packages,omitempty"` // name -> version + DropIns map[string]string `json:"dropins,omitempty"` // path -> content + Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value + CGroupConfig map[string]string `json:"cgroup_config,omitempty"` // key -> value Notes string `json:"notes,omitempty"` } diff --git a/pkg/image/coverage_more_test.go b/pkg/image/coverage_more_test.go new file mode 100644 index 0000000..26df4b0 --- /dev/null +++ b/pkg/image/coverage_more_test.go @@ -0,0 +1,63 @@ +package image + +import ( + "archive/zip" + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "testing" +) + +func TestDownloadAndVerifyXZAndZIPBranches(t *testing.T) { + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + if err := os.WriteFile(src, []byte("xz-contents"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "xz"), []byte("#!/usr/bin/env bash\nset -eu\nif [[ ${1:-} == -dc ]]; then\n cat \"$2\"\n exit 0\nfi\ncp \"${@: -1}\" \"${@: -1}.xz\"\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) + if err := os.WriteFile(src+".xz", []byte("xz-contents"), 0o644); err != nil { + t.Fatal(err) + } + + zipPath := filepath.Join(dir, "archive.zip") + zf, err := os.Create(zipPath) + if err != nil { + t.Fatal(err) + } + zw := zip.NewWriter(zf) + w, err := zw.Create("image.img") + if err != nil { + t.Fatal(err) + } + if _, err := w.Write([]byte("zip-contents")); err != nil { + t.Fatal(err) + } + if err := zw.Close(); err != nil { + t.Fatal(err) + } + if err := zf.Close(); err != nil { + t.Fatal(err) + } + + checksum := sha256.Sum256([]byte("xz-contents")) + if _, err := DownloadAndVerify("file://"+src+".xz", filepath.Join(dir, "out-xz.img"), "sha256:"+hex.EncodeToString(checksum[:])); err != nil { + t.Fatalf("DownloadAndVerify xz: %v", err) + } + if _, err := DownloadAndVerify("file://"+zipPath, filepath.Join(dir, "out-zip.img"), ""); err != nil { + t.Fatalf("DownloadAndVerify zip: %v", err) + } +} + +func TestDownloadAndVerifyErrorBranches(t *testing.T) { + dir := t.TempDir() + if err := VerifyChecksum(filepath.Join(dir, "missing"), "bogus"); err == nil { + t.Fatal("expected invalid checksum format error") + } + if _, err := DownloadAndVerify("file://"+filepath.Join(dir, "missing.img"), filepath.Join(dir, "out.img"), "sha256:deadbeef"); err == nil { + t.Fatal("expected missing source error") + } +} diff --git a/pkg/image/download_test.go b/pkg/image/download_test.go index d0357ae..0f77825 100644 --- a/pkg/image/download_test.go +++ b/pkg/image/download_test.go @@ -5,6 +5,8 @@ import ( "crypto/md5" "crypto/sha256" "encoding/hex" + "net/http" + "net/http/httptest" "os" "os/exec" "path/filepath" @@ -173,6 +175,81 @@ func TestVerifyChecksumAcceptsMD5(t *testing.T) { } } +func TestDownloadAndVerifyUsesHTTPAndCachedFile(t *testing.T) { + body := []byte("metis-http-test") + sum := sha256.Sum256(body) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/image.img" { + http.NotFound(w, r) + return + } + _, _ = w.Write(body) + })) + defer srv.Close() + + dir := t.TempDir() + dest := filepath.Join(dir, "image.img") + path, err := DownloadAndVerify(srv.URL+"/image.img", dest, "sha256:"+hex.EncodeToString(sum[:])) + if err != nil { + t.Fatalf("DownloadAndVerify: %v", err) + } + if path != dest { + t.Fatalf("path = %q, want %q", path, dest) + } + if got, _ := os.ReadFile(dest); string(got) != string(body) { + t.Fatalf("downloaded body = %q", string(got)) + } + + if err := os.WriteFile(dest, body, 0o644); err != nil { + t.Fatal(err) + } + if _, err := DownloadAndVerify(srv.URL+"/image.img", dest, "sha256:"+hex.EncodeToString(sum[:])); err != nil { + t.Fatalf("cached DownloadAndVerify: %v", err) + } +} + +func TestDownloadAndVerifyRejectsBadChecksum(t *testing.T) { + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + if err := os.WriteFile(src, []byte("bad"), 0o644); err != nil { + t.Fatal(err) + } + if _, err := DownloadAndVerify("file://"+src, filepath.Join(dir, "dest.img"), "sha256:deadbeef"); err == nil { + t.Fatal("expected checksum mismatch") + } +} + +func TestDownloadAndVerifyRawAndErrorBranches(t *testing.T) { + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + if err := os.WriteFile(src, []byte("raw"), 0o644); err != nil { + t.Fatal(err) + } + dest := filepath.Join(dir, "dest.img") + if _, err := DownloadAndVerify("file://"+src, dest, ""); err != nil { + t.Fatalf("DownloadAndVerify raw: %v", err) + } + if err := VerifyChecksum(dest, "bogus"); err == nil { + t.Fatal("expected invalid checksum format error") + } + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "boom", http.StatusInternalServerError) + })) + defer srv.Close() + if err := downloadRaw(srv.URL, filepath.Join(dir, "bad.img")); err == nil { + t.Fatal("expected HTTP error from downloadRaw") + } + + archive := filepath.Join(dir, "empty.zip") + if err := writeTestZIP(archive, map[string]string{}); err != nil { + t.Fatalf("writeTestZIP: %v", err) + } + if err := decompressZIP(archive, filepath.Join(dir, "out.img")); err == nil { + t.Fatal("expected empty zip error") + } +} + func writeTestZIP(path string, files map[string]string) error { out, err := os.Create(path) if err != nil { diff --git a/pkg/image/rootfs_test.go b/pkg/image/rootfs_test.go index d22dc25..2452470 100644 --- a/pkg/image/rootfs_test.go +++ b/pkg/image/rootfs_test.go @@ -2,55 +2,70 @@ package image import ( "os" - "os/exec" "path/filepath" "testing" "metis/pkg/inject" ) -func TestWriteExt4Files(t *testing.T) { - if _, err := exec.LookPath("mkfs.ext4"); err != nil { - t.Skip("mkfs.ext4 not available") - } - if _, err := exec.LookPath("debugfs"); err != nil { - t.Skip("debugfs not available") - } +func TestInjectRootFSWithFakes(t *testing.T) { + scripts := fakeRootfsCommands(t, true) + t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH")) - workDir := t.TempDir() - fsPath := filepath.Join(workDir, "root.ext4") - f, err := os.Create(fsPath) - if err != nil { + imagePath := filepath.Join(t.TempDir(), "image.img") + if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil { t.Fatal(err) } - if err := f.Truncate(32 * 1024 * 1024); err != nil { - t.Fatal(err) - } - if err := f.Close(); err != nil { - t.Fatal(err) - } - - cmd := exec.Command("mkfs.ext4", "-F", fsPath) - if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("mkfs.ext4: %v: %s", err, string(out)) - } - files := []inject.FileSpec{ - { - Path: "etc/metis/firstboot.env", - Content: []byte("METIS_HOSTNAME='titan-13'\n"), - Mode: 0o600, - RootFS: true, - }, - { - Path: "usr/local/sbin/test.sh", - Content: []byte("#!/usr/bin/env bash\nexit 0\n"), - Mode: 0o755, - RootFS: true, - }, + {Path: "etc/metis/firstboot.env", Content: []byte("METIS_HOSTNAME='titan-13'\n"), Mode: 0o600, RootFS: true}, + {Path: "usr/local/sbin/test.sh", Content: []byte("#!/usr/bin/env bash\nexit 0\n"), Mode: 0o755, RootFS: true}, } - if err := writeExt4Files(fsPath, files); err != nil { - t.Fatalf("writeExt4Files: %v", err) + if err := InjectRootFS(imagePath, files); err != nil { + t.Fatalf("InjectRootFS: %v", err) + } +} + +func TestInjectRootFSSkipsBootOnlyFiles(t *testing.T) { + imagePath := filepath.Join(t.TempDir(), "image.img") + if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil { + t.Fatal(err) + } + if err := InjectRootFS(imagePath, []inject.FileSpec{{Path: "user-data", Content: []byte("boot"), RootFS: false}}); err != nil { + t.Fatalf("InjectRootFS boot-only: %v", err) + } +} + +func TestFindLinuxPartitionAndTypeChecks(t *testing.T) { + scripts := fakeRootfsCommands(t, true) + t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH")) + imagePath := filepath.Join(t.TempDir(), "image.img") + if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil { + t.Fatal(err) + } + part, sector, err := findLinuxPartition(imagePath) + if err != nil { + t.Fatalf("findLinuxPartition: %v", err) + } + if sector != 512 || part.Start != 1 || part.Size != 2 { + t.Fatalf("unexpected partition info: %+v sector=%d", part, sector) + } + if !isLinuxPartitionType("83") || !isLinuxPartitionType("8300") || !isLinuxPartitionType("0fc63daf-8483-4772-8e79-3d69d8477de4") { + t.Fatal("expected linux partition types to match") + } + if isLinuxPartitionType("ef") { + t.Fatal("did not expect non-linux type to match") + } +} + +func TestFindLinuxPartitionReturnsErrorWhenNoLinuxPartitionExists(t *testing.T) { + scripts := fakeRootfsCommands(t, false) + t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH")) + imagePath := filepath.Join(t.TempDir(), "image.img") + if err := os.WriteFile(imagePath, make([]byte, 4096), 0o644); err != nil { + t.Fatal(err) + } + if _, _, err := findLinuxPartition(imagePath); err == nil { + t.Fatal("expected error without Linux partition") } } @@ -66,3 +81,63 @@ func TestParentDirs(t *testing.T) { } } } + +func TestRootfsErrorBranches(t *testing.T) { + part := partitionTablePart{Start: 1, Size: 2} + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + dst := filepath.Join(dir, "dst.img") + if err := os.WriteFile(src, make([]byte, 512), 0o644); err != nil { + t.Fatal(err) + } + if err := extractPartition(src, dst, part, 512); err == nil { + t.Fatal("expected extractPartition to fail on short source image") + } + if err := os.WriteFile(dst, make([]byte, 512), 0o644); err != nil { + t.Fatal(err) + } + if err := replacePartition(src, dst, part, 512); err == nil { + t.Fatal("expected replacePartition size mismatch") + } +} + +func fakeRootfsCommands(t *testing.T, includeLinux bool) string { + t.Helper() + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + partitions := `{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"ef"}]}}` + if includeLinux { + partitions = `{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}}` + } + write("sfdisk", "cat <<'JSON'\n"+partitions+"\nJSON") + write("debugfs", `if [[ "${1:-}" == "-w" ]]; then + exit 0 +fi +if [[ "${1:-}" == "-R" ]]; then + set -- $2 + case "${1:-}" in + stat) + case "${2:-}" in + /etc/metis/firstboot.env) printf 'Mode: 0600\n' ;; + /usr/local/sbin/test.sh) printf 'Mode: 0755\n' ;; + esac + exit 0 + ;; + dump) + dest="${3:-}" + case "${2:-}" in + /etc/metis/firstboot.env) printf "METIS_HOSTNAME='titan-13'\n" > "${dest}" ;; + /usr/local/sbin/test.sh) printf '#!/usr/bin/env bash\nexit 0\n' > "${dest}" ;; + esac + exit 0 + ;; + esac +fi +exit 0`) + return dir +} diff --git a/pkg/inject/coverage_more_test.go b/pkg/inject/coverage_more_test.go new file mode 100644 index 0000000..1bf45d9 --- /dev/null +++ b/pkg/inject/coverage_more_test.go @@ -0,0 +1,36 @@ +package inject + +import ( + "os" + "path/filepath" + "testing" +) + +func TestInjectorWriteBranches(t *testing.T) { + dir := t.TempDir() + boot := filepath.Join(dir, "boot") + root := filepath.Join(dir, "root") + inj := &Injector{BootPath: boot, RootPath: root} + files := []FileSpec{ + {Path: "boot.txt", Content: []byte("boot"), Mode: 0o644, RootFS: false}, + {Path: "root.txt", Content: []byte("root"), Mode: 0o600, RootFS: true}, + } + if err := inj.Write(files); err != nil { + t.Fatalf("Write: %v", err) + } + if got, err := os.ReadFile(filepath.Join(boot, "boot.txt")); err != nil || string(got) != "boot" { + t.Fatalf("boot write = %q err=%v", got, err) + } + if got, err := os.ReadFile(filepath.Join(root, "root.txt")); err != nil || string(got) != "root" { + t.Fatalf("root write = %q err=%v", got, err) + } + + block := filepath.Join(dir, "blocked") + if err := os.WriteFile(block, []byte("file"), 0o644); err != nil { + t.Fatal(err) + } + inj = &Injector{BootPath: block} + if err := inj.Write([]FileSpec{{Path: "x", Content: []byte("x")}}); err == nil { + t.Fatal("expected mkdir error") + } +} diff --git a/pkg/inject/inject.go b/pkg/inject/inject.go index 01f991c..933464d 100644 --- a/pkg/inject/inject.go +++ b/pkg/inject/inject.go @@ -1,38 +1,40 @@ package inject import ( - "fmt" - "os" - "path/filepath" + "fmt" + "os" + "path/filepath" ) // Injector writes node config into a mounted image (boot/root paths supplied by caller). type Injector struct { - BootPath string - RootPath string + BootPath string + RootPath string } // FileSpec describes a file to write. type FileSpec struct { - Path string - Content []byte - Mode os.FileMode - RootFS bool // if true, write under root path; else boot path + Path string + Content []byte + Mode os.FileMode + RootFS bool // if true, write under root path; else boot path } +// Write materializes the requested files under the boot or root mount because +// the burn flow needs a single place to stage config fragments before sync. func (i *Injector) Write(files []FileSpec) error { - for _, f := range files { - base := i.BootPath - if f.RootFS { - base = i.RootPath - } - target := filepath.Join(base, f.Path) - if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { - return fmt.Errorf("mkdir %s: %w", filepath.Dir(target), err) - } - if err := os.WriteFile(target, f.Content, f.Mode); err != nil { - return fmt.Errorf("write %s: %w", target, err) - } - } - return nil + for _, f := range files { + base := i.BootPath + if f.RootFS { + base = i.RootPath + } + target := filepath.Join(base, f.Path) + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", filepath.Dir(target), err) + } + if err := os.WriteFile(target, f.Content, f.Mode); err != nil { + return fmt.Errorf("write %s: %w", target, err) + } + } + return nil } diff --git a/pkg/inject/inject_test.go b/pkg/inject/inject_test.go new file mode 100644 index 0000000..fc9300a --- /dev/null +++ b/pkg/inject/inject_test.go @@ -0,0 +1,41 @@ +package inject + +import ( + "os" + "path/filepath" + "testing" +) + +func TestWriteTargetsBootAndRootMounts(t *testing.T) { + dir := t.TempDir() + boot := filepath.Join(dir, "boot") + root := filepath.Join(dir, "root") + inj := Injector{BootPath: boot, RootPath: root} + + files := []FileSpec{ + {Path: "boot.txt", Content: []byte("boot"), Mode: 0o644, RootFS: false}, + {Path: "root.txt", Content: []byte("root"), Mode: 0o600, RootFS: true}, + } + if err := inj.Write(files); err != nil { + t.Fatalf("Write: %v", err) + } + if got, err := os.ReadFile(filepath.Join(boot, "boot.txt")); err != nil || string(got) != "boot" { + t.Fatalf("boot file = %q, err=%v", string(got), err) + } + if got, err := os.ReadFile(filepath.Join(root, "root.txt")); err != nil || string(got) != "root" { + t.Fatalf("root file = %q, err=%v", string(got), err) + } +} + +func TestWriteReturnsFilesystemErrors(t *testing.T) { + dir := t.TempDir() + boot := filepath.Join(dir, "boot") + rootFile := filepath.Join(dir, "root-file") + if err := os.WriteFile(rootFile, []byte("not a dir"), 0o644); err != nil { + t.Fatal(err) + } + inj := Injector{BootPath: boot, RootPath: rootFile} + if err := inj.Write([]FileSpec{{Path: "root.txt", Content: []byte("root"), RootFS: true}}); err == nil { + t.Fatal("expected write error for root path file") + } +} diff --git a/pkg/inventory/coverage_more_test.go b/pkg/inventory/coverage_more_test.go new file mode 100644 index 0000000..08dcdf6 --- /dev/null +++ b/pkg/inventory/coverage_more_test.go @@ -0,0 +1,37 @@ +package inventory + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadAndFindNodeBranches(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "inventory.yaml") + if err := os.WriteFile(path, []byte(` +classes: + - name: ${CLASS} + arch: arm64 + os: armbian + image: file:///tmp/base.img +nodes: + - name: node1 + class: ${CLASS} + hostname: node1 + k3s_role: agent +`), 0o644); err != nil { + t.Fatal(err) + } + t.Setenv("CLASS", "rpi4") + inv, err := Load(path) + if err != nil { + t.Fatalf("Load: %v", err) + } + if _, _, err := inv.FindNode("missing"); err == nil { + t.Fatal("expected missing node error") + } + if _, cls, err := inv.FindNode("node1"); err != nil || cls == nil { + t.Fatalf("expected class lookup for node1, got class=%#v err=%v", cls, err) + } +} diff --git a/pkg/inventory/types_test.go b/pkg/inventory/types_test.go index 1949c4b..18831d3 100644 --- a/pkg/inventory/types_test.go +++ b/pkg/inventory/types_test.go @@ -41,3 +41,26 @@ nodes: t.Fatalf("token not expanded: %q", node.K3sToken) } } + +func TestFindNodeReturnsClassMissingError(t *testing.T) { + inv := &Inventory{ + Nodes: []NodeSpec{{Name: "n1", Class: "missing"}}, + } + node, class, err := inv.FindNode("n1") + if err == nil { + t.Fatal("expected class missing error") + } + if node == nil || class != nil { + t.Fatalf("unexpected node/class: %#v %#v", node, class) + } +} + +func TestLoadRejectsInvalidYAML(t *testing.T) { + invPath := filepath.Join(t.TempDir(), "inventory.yaml") + if err := os.WriteFile(invPath, []byte("classes: ["), 0o644); err != nil { + t.Fatal(err) + } + if _, err := Load(invPath); err == nil { + t.Fatal("expected parse inventory error") + } +} diff --git a/pkg/mount/coverage_more_test.go b/pkg/mount/coverage_more_test.go new file mode 100644 index 0000000..65d1f0b --- /dev/null +++ b/pkg/mount/coverage_more_test.go @@ -0,0 +1,49 @@ +package mount + +import ( + "os" + "path/filepath" + "testing" +) + +func TestSetupAndTeardownWithFakeCommands(t *testing.T) { + dir := t.TempDir() + scripts := filepath.Join(dir, "bin") + if err := os.MkdirAll(scripts, 0o755); err != nil { + t.Fatal(err) + } + write := func(name, body string) { + path := filepath.Join(scripts, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("losetup", `printf '/dev/loop9\n'`) + write("mount", `exit 0`) + write("umount", `exit 0`) + t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH")) + + image := filepath.Join(dir, "image.img") + if err := os.WriteFile(image, make([]byte, 1024), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(image+"p1", []byte(""), 0o644); err != nil { + t.Fatal(err) + } + if got := partitionPath(image, 1); got != image+"p1" { + t.Fatalf("partitionPath existing = %q", got) + } + m, err := Setup(image) + if err != nil { + t.Fatalf("Setup: %v", err) + } + if m.LoopDevice != "/dev/loop9" || m.BootPath == "" || m.RootPath == "" { + t.Fatalf("unexpected mount: %#v", m) + } + if err := Teardown(m); err != nil { + t.Fatalf("Teardown: %v", err) + } + if got := partitionPath("/dev/loop9", 2); got != "/dev/loop92" && got != "/dev/loop9p2" { + t.Fatalf("partitionPath /dev = %q", got) + } +} diff --git a/pkg/mount/mount_test.go b/pkg/mount/mount_test.go new file mode 100644 index 0000000..d9a372c --- /dev/null +++ b/pkg/mount/mount_test.go @@ -0,0 +1,67 @@ +package mount + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestSetupAndTeardownUseHelperCommands(t *testing.T) { + scripts := fakeCommandDir(t) + t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH")) + image := filepath.Join(t.TempDir(), "disk.img") + if err := os.WriteFile(image, []byte("image"), 0o644); err != nil { + t.Fatal(err) + } + + m, err := Setup(image) + if err != nil { + t.Fatalf("Setup: %v", err) + } + if m.LoopDevice != "/dev/loop9" { + t.Fatalf("loop device = %q", m.LoopDevice) + } + if !strings.Contains(m.BootPath, "metis-boot-") || !strings.Contains(m.RootPath, "metis-root-") { + t.Fatalf("unexpected mount paths: %+v", m) + } + if err := Teardown(m); err != nil { + t.Fatalf("Teardown: %v", err) + } +} + +func TestPartitionPathFallsBackToNumberSuffix(t *testing.T) { + if got := partitionPath("/dev/loop0", 2); got != "/dev/loop02" { + t.Fatalf("partitionPath = %q", got) + } +} + +func TestTeardownNilAndDirectDeviceSetup(t *testing.T) { + if err := Teardown(nil); err != nil { + t.Fatalf("Teardown(nil): %v", err) + } + scripts := fakeCommandDir(t) + t.Setenv("PATH", scripts+string(os.PathListSeparator)+os.Getenv("PATH")) + if _, err := Setup("/dev/loop9"); err != nil { + t.Fatalf("Setup direct device: %v", err) + } +} + +func fakeCommandDir(t *testing.T) string { + t.Helper() + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("losetup", `if [[ "${1:-}" == "-Pf" && "${2:-}" == "--show" ]]; then + printf '/dev/loop9\n' + exit 0 +fi +exit 0`) + write("mount", `exit 0`) + write("umount", `exit 0`) + return dir +} diff --git a/pkg/plan/coverage_more_test.go b/pkg/plan/coverage_more_test.go new file mode 100644 index 0000000..8b7da00 --- /dev/null +++ b/pkg/plan/coverage_more_test.go @@ -0,0 +1,136 @@ +package plan + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "metis/pkg/inventory" +) + +func TestPlanBuildFilesAndExecuteBranches(t *testing.T) { + dir := t.TempDir() + base := filepath.Join(dir, "base.img") + baseContent := []byte("image") + if err := os.WriteFile(base, baseContent, 0o644); err != nil { + t.Fatal(err) + } + sum := sha256.Sum256(baseContent) + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "rpi4", + Arch: "arm64", + OS: "armbian", + Image: "file://" + base, + Checksum: "sha256:" + hex.EncodeToString(sum[:]), + DefaultLabels: map[string]string{ + "node-role.kubernetes.io/worker": "true", + }, + BootOverlay: filepath.Join(dir, "boot-overlay"), + RootOverlay: filepath.Join(dir, "root-overlay"), + }}, + Nodes: []inventory.NodeSpec{{ + Name: "titan-15", + Class: "rpi4", + Hostname: "titan-15", + IP: "192.168.22.43", + K3sRole: "agent", + K3sURL: "https://192.168.22.7:6443", + K3sToken: "token", + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/var/lib/longhorn", UUID: "u1"}}, + }}, + } + if err := os.MkdirAll(inv.Classes[0].BootOverlay, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(inv.Classes[0].RootOverlay, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(inv.Classes[0].BootOverlay, "boot.txt"), []byte("boot"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(inv.Classes[0].RootOverlay, "root.txt"), []byte("root"), 0o600); err != nil { + t.Fatal(err) + } + + if _, err := Build(inv, "missing", "", dir); err == nil { + t.Fatal("expected Build to fail for missing node") + } + p, err := Build(inv, "titan-15", "", dir) + if err != nil { + t.Fatalf("Build: %v", err) + } + if p.Device != "/dev/sdX" || !strings.Contains(strings.Join(actionDetails(p.Actions), " "), "Inject hostname/network/k3s config") { + t.Fatalf("unexpected plan: %#v", p) + } + if got := cacheName("foo.img.xz"); got != "foo.img" { + t.Fatalf("cacheName = %q", got) + } + + files, err := Files(inv, "titan-15") + if err != nil { + t.Fatalf("Files: %v", err) + } + if len(files) == 0 { + t.Fatal("expected files") + } + if got := cloudInitUserData(nil, nil); got != "" { + t.Fatalf("cloudInitUserData nil = %q", got) + } + if got := allowK3sNodeLabel("agent", "node-role.kubernetes.io/master"); got { + t.Fatal("agent should reject node-role labels") + } + + // Inject is a thin wrapper around maybeInject; exercise both the no-op and + // path-setting branches. + if err := Inject(inv, "titan-15", "", ""); err != nil { + t.Fatalf("Inject noop: %v", err) + } + boot := filepath.Join(dir, "boot") + root := filepath.Join(dir, "root") + if err := os.MkdirAll(boot, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(root, 0o755); err != nil { + t.Fatal(err) + } + if err := Inject(inv, "titan-15", boot, root); err != nil { + t.Fatalf("Inject with paths: %v", err) + } + + cacheDir := filepath.Join(dir, "cache") + output := filepath.Join(dir, "output.img") + t.Setenv("PATH", dir) + if err := BuildImageFile(context.Background(), inv, "titan-15", cacheDir, output); err == nil { + t.Fatal("expected BuildImageFile to fail without xz/debugfs setup") + } + if _, err := Execute(inv, "titan-15", "/dev/sdX", cacheDir, true); err == nil { + t.Fatal("expected Execute to reject placeholder device") + } + if _, err := Execute(inv, "titan-15", "/dev/sdz", cacheDir, false); err != nil { + t.Fatalf("Execute dry-run: %v", err) + } +} + +func actionDetails(actions []Action) []string { + out := make([]string, 0, len(actions)) + for _, action := range actions { + out = append(out, action.Detail) + } + return out +} + +func TestPlanMiscBranches(t *testing.T) { + if !NextRunStale(timeNow().Add(-time.Hour), time.Minute) { + t.Fatal("expected NextRunStale") + } +} + +func timeNow() time.Time { return time.Now() } diff --git a/pkg/plan/inject_extra_test.go b/pkg/plan/inject_extra_test.go new file mode 100644 index 0000000..3f68f4b --- /dev/null +++ b/pkg/plan/inject_extra_test.go @@ -0,0 +1,127 @@ +package plan + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "metis/pkg/inventory" +) + +func TestFilesAndInjectWithSecretsAndOverlays(t *testing.T) { + vault := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/secret/data/nodes/titan-15" { + http.NotFound(w, r) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{ + "data": map[string]any{ + "cloud_init": "#cloud-config\nmanage_etc_hosts: true\n", + "k3s_token": "secret-token", + "extra": map[string]string{"foo": "bar"}, + }, + }, + }) + })) + defer vault.Close() + t.Setenv("VAULT_ADDR", vault.URL) + t.Setenv("VAULT_TOKEN", "tok") + + dir := t.TempDir() + bootOverlay := filepath.Join(dir, "boot-overlay") + rootOverlay := filepath.Join(dir, "root-overlay") + if err := os.MkdirAll(filepath.Join(bootOverlay, "over"), 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(rootOverlay, "etc"), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(bootOverlay, "over", "cmdline.txt"), []byte("console=tty1"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(rootOverlay, "etc", "issue"), []byte("hello"), 0o644); err != nil { + t.Fatal(err) + } + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "c1", + Arch: "arm64", + OS: "linux", + Image: "file:///tmp/base.img", + BootOverlay: bootOverlay, + RootOverlay: rootOverlay, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "titan-15", + Class: "c1", + Hostname: "titan-15", + IP: "192.168.22.43", + K3sRole: "agent", + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + }}, + } + + files, err := Files(inv, "titan-15") + if err != nil { + t.Fatalf("Files: %v", err) + } + var sawSecret, sawBootOverlay, sawRootOverlay, sawCloudInit bool + for _, f := range files { + switch { + case f.Path == "etc/metis/secrets.json": + sawSecret = true + case f.Path == "over/cmdline.txt": + sawBootOverlay = true + case f.Path == "etc/issue": + sawRootOverlay = true + case f.Path == "user-data": + sawCloudInit = strings.Contains(string(f.Content), "manage_etc_hosts: true") + } + } + if !sawSecret || !sawBootOverlay || !sawRootOverlay || !sawCloudInit { + t.Fatalf("missing generated files: secret=%v boot=%v root=%v cloudinit=%v", sawSecret, sawBootOverlay, sawRootOverlay, sawCloudInit) + } + + bootDir := filepath.Join(dir, "boot") + rootDir := filepath.Join(dir, "root") + if err := os.MkdirAll(bootDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(rootDir, 0o755); err != nil { + t.Fatal(err) + } + if err := Inject(inv, "titan-15", bootDir, rootDir); err != nil { + t.Fatalf("Inject: %v", err) + } + if _, err := os.Stat(filepath.Join(bootDir, "over", "cmdline.txt")); err != nil { + t.Fatalf("expected boot overlay file: %v", err) + } + if _, err := os.Stat(filepath.Join(rootDir, "etc/metis/node.json")); err != nil { + t.Fatalf("expected injected rootfs file: %v", err) + } +} + +func TestNextRunStale(t *testing.T) { + if !NextRunStale(time.Now().Add(-2*time.Hour), time.Hour) { + t.Fatal("expected stale run") + } + if NextRunStale(time.Now(), time.Hour) { + t.Fatal("did not expect fresh run to be stale") + } +} + +func TestAllowK3sNodeLabelRules(t *testing.T) { + if allowK3sNodeLabel("agent", "node-role.kubernetes.io/worker") { + t.Fatal("agent should block node-role labels") + } + if !allowK3sNodeLabel("server", "node-role.kubernetes.io/worker") { + t.Fatal("server should allow node-role labels") + } +} diff --git a/pkg/plan/plan_env_test.go b/pkg/plan/plan_env_test.go index 33d5933..bf42d88 100644 --- a/pkg/plan/plan_env_test.go +++ b/pkg/plan/plan_env_test.go @@ -2,6 +2,7 @@ package plan import ( "os" + "path/filepath" "testing" "metis/pkg/inventory" @@ -37,3 +38,39 @@ func TestBuildIncludesInjectWhenEnvSet(t *testing.T) { t.Fatalf("expected inject action when METIS_BOOT_PATH set") } } + +func TestBuildAndExecuteErrorBranches(t *testing.T) { + inv := &inventory.Inventory{} + if _, err := Build(inv, "missing", "/dev/sdz", "/tmp/cache"); err == nil { + t.Fatal("expected Build to fail for missing node") + } + if got := checksumFromInventory(inv, "missing"); got != "" { + t.Fatalf("checksumFromInventory missing node = %q", got) + } + + dir := t.TempDir() + raw := filepath.Join(dir, "base.img") + if err := os.WriteFile(raw, []byte("image"), 0o644); err != nil { + t.Fatal(err) + } + sum := imageChecksum(t, raw) + inv = &inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "c1", + Arch: "arm64", + OS: "linux", + Image: "file://" + raw, + Checksum: sum, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "n1", + Class: "c1", + Hostname: "n1", + IP: "10.0.0.1", + K3sRole: "agent", + }}, + } + if _, err := Execute(inv, "n1", "/dev/sdX", filepath.Join(dir, "cache"), true); err == nil { + t.Fatal("expected placeholder device rejection") + } +} diff --git a/pkg/plan/workflow_test.go b/pkg/plan/workflow_test.go new file mode 100644 index 0000000..44667b2 --- /dev/null +++ b/pkg/plan/workflow_test.go @@ -0,0 +1,199 @@ +package plan + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "testing" + + "metis/pkg/inventory" +) + +func TestExecuteAndBuildImageFileWithFakes(t *testing.T) { + rootTools := fakeRootfsTools(t) + mountTools := fakeMountTools(t) + t.Setenv("PATH", rootTools+string(os.PathListSeparator)+mountTools+string(os.PathListSeparator)+os.Getenv("PATH")) + + dir := t.TempDir() + rawImage := filepath.Join(dir, "base.img") + if err := os.WriteFile(rawImage, make([]byte, 4096), 0o644); err != nil { + t.Fatal(err) + } + sum := imageChecksum(t, rawImage) + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "c1", + Arch: "arm64", + OS: "linux", + Image: "file://" + rawImage, + Checksum: sum, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "n1", + Class: "c1", + Hostname: "n1", + IP: "10.0.0.1", + K3sRole: "agent", + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + }}, + } + + planDry, err := Execute(inv, "n1", filepath.Join(dir, "disk.img"), filepath.Join(dir, "cache"), false) + if err != nil { + t.Fatalf("Execute dry-run: %v", err) + } + if planDry.Node != "n1" || len(planDry.Actions) == 0 { + t.Fatalf("unexpected dry-run plan: %#v", planDry) + } + + bootDir := filepath.Join(dir, "boot") + rootDir := filepath.Join(dir, "root") + if err := os.MkdirAll(bootDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(rootDir, 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("METIS_BOOT_PATH", bootDir) + t.Setenv("METIS_ROOT_PATH", rootDir) + t.Setenv("METIS_AUTO_MOUNT", "1") + written := filepath.Join(dir, "written.img") + planRun, err := Execute(inv, "n1", written, filepath.Join(dir, "cache2"), true) + if err != nil { + t.Fatalf("Execute confirm: %v", err) + } + if planRun.Image != "file://"+rawImage { + t.Fatalf("unexpected plan image: %#v", planRun) + } + if _, err := os.Stat(written); err != nil { + t.Fatalf("expected written image: %v", err) + } + if _, err := os.Stat(filepath.Join(rootDir, "etc/metis/firstboot.env")); err != nil { + t.Fatalf("expected injected rootfs file: %v", err) + } +} + +func TestBuildImageFileMaterializesRootFS(t *testing.T) { + rootTools := fakeRootfsTools(t) + t.Setenv("PATH", rootTools+string(os.PathListSeparator)+os.Getenv("PATH")) + + dir := t.TempDir() + rawImage := filepath.Join(dir, "base.img") + if err := os.WriteFile(rawImage, make([]byte, 4096), 0o644); err != nil { + t.Fatal(err) + } + sum := imageChecksum(t, rawImage) + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "c1", + Arch: "arm64", + OS: "linux", + Image: "file://" + rawImage, + Checksum: sum, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "n1", + Class: "c1", + Hostname: "n1", + IP: "10.0.0.1", + K3sRole: "agent", + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + }}, + } + out := filepath.Join(dir, "output.img") + if err := BuildImageFile(context.Background(), inv, "n1", filepath.Join(dir, "cache"), out); err != nil { + t.Fatalf("BuildImageFile: %v", err) + } + if _, err := os.Stat(out); err != nil { + t.Fatalf("expected output image: %v", err) + } +} + +func TestMaybeInjectNoopsWhenEnvUnset(t *testing.T) { + if err := maybeInject(&inventory.Inventory{}, "n1"); err != nil { + t.Fatalf("maybeInject without env: %v", err) + } +} + +func TestChecksumFromInventoryAndCacheName(t *testing.T) { + inv := &inventory.Inventory{ + Classes: []inventory.NodeClass{{Name: "c1", Checksum: "sha256:deadbeef"}}, + Nodes: []inventory.NodeSpec{{Name: "n1", Class: "c1"}}, + } + if got := checksumFromInventory(inv, "n1"); got != "sha256:deadbeef" { + t.Fatalf("checksumFromInventory = %q", got) + } + if got := cacheName("/tmp/archive/base.img.xz"); got != "base.img" { + t.Fatalf("cacheName = %q", got) + } +} + +func fakeRootfsTools(t *testing.T) string { + t.Helper() + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("sfdisk", `cat <<'JSON' +{"partitiontable":{"sectorsize":512,"partitions":[{"start":3,"size":1,"type":"ef"},{"start":1,"size":2,"type":"83"}]}} +JSON`) + write("debugfs", `if [[ "${1:-}" == "-w" ]]; then + cp "${3:-}" "${4:-}.commands" + exit 0 +fi +if [[ "${1:-}" == "-R" ]]; then + state="${3:-}.commands" + set -- $2 + case "${1:-}" in + stat) + mode="$(awk -v path="${2:-}" '$1=="sif" && $2==path {print $4}' "${state}" | tail -n1)" + mode="${mode: -4}" + printf 'Mode: %s\n' "${mode}" + exit 0 + ;; + dump) + local_path="$(awk -v path="${2:-}" '$1=="write" && $3==path {print $2}' "${state}" | tail -n1)" + cat "${local_path}" > "${3:-}" + exit 0 + ;; + esac +fi +exit 0`) + return dir +} + +func fakeMountTools(t *testing.T) string { + t.Helper() + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("losetup", `if [[ "${1:-}" == "-Pf" && "${2:-}" == "--show" ]]; then + printf '/dev/loop9\n' + exit 0 +fi +exit 0`) + write("mount", `exit 0`) + write("umount", `exit 0`) + return dir +} + +func imageChecksum(t *testing.T, path string) string { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + sum := sha256.Sum256(data) + return "sha256:" + hex.EncodeToString(sum[:]) +} diff --git a/pkg/secrets/coverage_more_test.go b/pkg/secrets/coverage_more_test.go new file mode 100644 index 0000000..eeae6e5 --- /dev/null +++ b/pkg/secrets/coverage_more_test.go @@ -0,0 +1,52 @@ +package secrets + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestClientLoginAndFetchBranches(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/auth/approle/login"): + _ = json.NewEncoder(w).Encode(map[string]any{"auth": map[string]any{"client_token": "token"}}) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/secret/data/nodes/missing"): + w.WriteHeader(http.StatusNotFound) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/secret/data/nodes/error"): + http.Error(w, "boom", http.StatusInternalServerError) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/secret/data/nodes/node1"): + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{ + "data": map[string]any{"k3s_token": "abc", "cloud_init": "ci"}, + }, + }) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + cli := &Client{Addr: srv.URL, RoleID: "role", SecretID: "secret", Client: srv.Client()} + if err := cli.LoginIfNeeded(context.Background()); err != nil { + t.Fatalf("LoginIfNeeded: %v", err) + } + if cli.Token != "token" { + t.Fatalf("expected token, got %q", cli.Token) + } + if got, err := cli.FetchNode(context.Background(), "missing"); err != nil || got == nil || got.K3sToken != "" { + t.Fatalf("FetchNode missing = %#v err=%v", got, err) + } + if _, err := cli.FetchNode(context.Background(), "error"); err == nil { + t.Fatal("expected FetchNode error") + } + if got, err := cli.FetchNode(context.Background(), "node1"); err != nil || got.K3sToken != "abc" { + t.Fatalf("FetchNode node1 = %#v err=%v", got, err) + } + if cli.httpClient() == nil { + t.Fatal("httpClient returned nil") + } +} diff --git a/pkg/secrets/vault.go b/pkg/secrets/vault.go index bc2a3f0..2028f82 100644 --- a/pkg/secrets/vault.go +++ b/pkg/secrets/vault.go @@ -81,6 +81,8 @@ func (c *Client) LoginIfNeeded(ctx context.Context) error { return nil } +// FetchNode loads per-node secret material because burn-time injection needs +// a single read path that can fall back to empty secrets when Vault has no row. // FetchNode pulls secret/data/nodes/. func (c *Client) FetchNode(ctx context.Context, hostname string) (*NodeSecrets, error) { if err := c.LoginIfNeeded(ctx); err != nil { diff --git a/pkg/secrets/vault_test.go b/pkg/secrets/vault_test.go index 71332b5..9b8ed4d 100644 --- a/pkg/secrets/vault_test.go +++ b/pkg/secrets/vault_test.go @@ -74,3 +74,47 @@ func TestApproRoleLogin(t *testing.T) { t.Fatalf("approle login not called") } } + +func TestLoginIfNeededNoopWithToken(t *testing.T) { + c := &Client{Addr: "http://example.invalid", Token: "existing"} + if err := c.LoginIfNeeded(context.Background()); err != nil { + t.Fatalf("LoginIfNeeded: %v", err) + } + if c.Token != "existing" { + t.Fatalf("token unexpectedly changed") + } +} + +func TestNewFromEnvPopulatesCredentials(t *testing.T) { + t.Setenv("VAULT_ADDR", "http://vault.example") + t.Setenv("VAULT_TOKEN", "tok") + t.Setenv("VAULT_ROLE_ID", "role") + t.Setenv("VAULT_SECRET_ID", "secret") + c := NewFromEnv() + if c.Addr != "http://vault.example" || c.Token != "tok" || c.RoleID != "role" || c.SecretID != "secret" { + t.Fatalf("unexpected env client: %+v", c) + } +} + +func TestFetchNodeAndLoginErrorBranches(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/v1/auth/approle/login": + http.Error(w, "denied", http.StatusForbidden) + case "/v1/secret/data/nodes/missing": + http.Error(w, "down", http.StatusInternalServerError) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + c := &Client{Addr: srv.URL, RoleID: "r", SecretID: "s", Client: srv.Client()} + if _, err := c.FetchNode(context.Background(), "missing"); err == nil { + t.Fatal("expected approle login failure") + } + c = &Client{Addr: srv.URL, Token: "tok", Client: srv.Client()} + if _, err := c.FetchNode(context.Background(), "missing"); err == nil { + t.Fatal("expected fetch error for 500 response") + } +} diff --git a/pkg/sentinel/collector_test.go b/pkg/sentinel/collector_test.go new file mode 100644 index 0000000..4f67b41 --- /dev/null +++ b/pkg/sentinel/collector_test.go @@ -0,0 +1,87 @@ +package sentinel + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCollectUsesCommandOutputAndPkgSample(t *testing.T) { + dir := fakeSentinelCommands(t) + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) + + snap := Collect() + if snap.Hostname != "titan-13" || snap.Kernel != "6.6.63" || snap.OSImage != "Metis OS" { + t.Fatalf("unexpected snapshot: %+v", snap) + } + if snap.K3sVersion != "v1.31.5+k3s1" || snap.Containerd != "1.7.99" { + t.Fatalf("unexpected runtime facts: %+v", snap) + } + if len(snap.PackageSample) != 4 || snap.PackageSample["k3s"] != "v1.31.5+k3s1" { + t.Fatalf("unexpected package sample: %+v", snap.PackageSample) + } +} + +func TestCommandOutputUsesNsenterWhenRequested(t *testing.T) { + dir := fakeSentinelCommands(t) + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) + t.Setenv("METIS_SENTINEL_NSENTER", "1") + + got, err := commandOutput("ignored", "arg") + if err != nil { + t.Fatalf("commandOutput: %v", err) + } + if strings.TrimSpace(string(got)) != "nsenter-ok" { + t.Fatalf("unexpected nsenter output: %q", string(got)) + } +} + +func TestRunAndTrimAndPkgVersionFallbacks(t *testing.T) { + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("cat", `printf 'ID=metis\n'`) + write("rpm", `exit 1`) + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) + + if got := runAndTrim("missing-command"); got != "" { + t.Fatalf("runAndTrim missing command = %q", got) + } + if got := osRelease(); got != "" { + t.Fatalf("osRelease without PRETTY_NAME = %q", got) + } + if got := pkgVersion("does-not-exist"); got != "" { + t.Fatalf("pkgVersion fallback = %q", got) + } +} + +func fakeSentinelCommands(t *testing.T) string { + t.Helper() + dir := t.TempDir() + write := func(name, body string) { + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/usr/bin/env bash\nset -eu\n"+body+"\n"), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + } + write("hostname", `printf 'titan-13\n'`) + write("uname", `printf '6.6.63\n'`) + write("k3s", `printf 'v1.31.5+k3s1\n'`) + write("containerd", `printf '1.7.99\n'`) + write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`) + write("dpkg-query", `case "${@: -1}" in + containerd) printf '1.7.99\n' ;; + k3s) printf 'v1.31.5+k3s1\n' ;; + nvidia-container-toolkit) printf '1.16.2\n' ;; + linux-image-raspi) printf '6.6.63\n' ;; + *) printf '1.0.0\n' ;; +esac`) + write("rpm", `printf '1.0.0\n'`) + write("nsenter", `printf 'nsenter-ok\n'`) + return dir +} diff --git a/pkg/service/app.go b/pkg/service/app.go index 2acc9d2..2f8a868 100644 --- a/pkg/service/app.go +++ b/pkg/service/app.go @@ -1,12 +1,8 @@ package service import ( - "bufio" - "encoding/json" - "errors" "fmt" "os" - "os/exec" "path/filepath" "sort" "strings" @@ -18,6 +14,8 @@ import ( "metis/pkg/sentinel" ) +// JobStatus identifies the current lifecycle state of a queued job because +// the UI and metrics need a stable shared vocabulary for progress updates. type JobStatus string const ( @@ -313,418 +311,3 @@ func (a *App) WatchSentinel() (*Event, error) { a.metrics.SetDriftTargets(nextTargets, len(changes)) return event, nil } - -func (a *App) newJob(kind, node, host, device string) *Job { - job := &Job{ - ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()), - Kind: kind, - Node: node, - Host: host, - Device: device, - Status: JobQueued, - ProgressPct: 0, - StartedAt: time.Now().UTC(), - UpdatedAt: time.Now().UTC(), - } - a.mu.Lock() - a.jobs[job.ID] = job - a.mu.Unlock() - return job -} - -func (a *App) job(id string) *Job { - a.mu.RLock() - defer a.mu.RUnlock() - return a.jobs[id] -} - -func (a *App) setJob(id string, update func(*Job)) { - a.mu.Lock() - defer a.mu.Unlock() - job := a.jobs[id] - if job == nil { - return - } - update(job) - job.UpdatedAt = time.Now().UTC() -} - -func (a *App) failJob(id string, err error) { - a.completeJob(id, func(j *Job) { - j.Status = JobError - j.Error = err.Error() - j.Message = err.Error() - }) -} - -func (a *App) completeJob(id string, update func(*Job)) { - a.mu.Lock() - defer a.mu.Unlock() - job := a.jobs[id] - if job == nil { - return - } - update(job) - if job.Status != JobError { - job.Status = JobDone - } - job.UpdatedAt = time.Now().UTC() - job.FinishedAt = time.Now().UTC() -} - -func (a *App) appendEvent(event Event) { - line, err := json.Marshal(event) - if err != nil { - return - } - f, err := os.OpenFile(a.settings.HistoryPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) - if err != nil { - return - } - defer f.Close() - _, _ = f.Write(append(line, '\n')) -} - -func (a *App) recentEvents(limit int) []Event { - f, err := os.Open(a.settings.HistoryPath) - if err != nil { - return nil - } - defer f.Close() - events := make([]Event, 0, limit) - scanner := bufio.NewScanner(f) - for scanner.Scan() { - var event Event - if err := json.Unmarshal(scanner.Bytes(), &event); err != nil { - continue - } - events = append(events, event) - } - if len(events) > limit { - events = events[len(events)-limit:] - } - for i, j := 0, len(events)-1; i < j; i, j = i+1, j-1 { - events[i], events[j] = events[j], events[i] - } - return events -} - -func cachedImageName(source string) string { - return strings.TrimSuffix(filepath.Base(source), ".xz") -} - -func (a *App) replacementNodes() []inventory.NodeSpec { - nodes := make([]inventory.NodeSpec, 0, len(a.inventory.Nodes)) - for _, node := range a.inventory.Nodes { - spec, class, err := a.inventory.FindNode(node.Name) - if err != nil { - continue - } - if replacementReady(spec, class) { - nodes = append(nodes, node) - } - } - sort.Slice(nodes, func(i, j int) bool { - return nodes[i].Name < nodes[j].Name - }) - return nodes -} - -func (a *App) ensureReplacementReady(nodeName string) error { - node, class, err := a.inventory.FindNode(nodeName) - if err != nil { - return err - } - if replacementReady(node, class) { - return nil - } - return fmt.Errorf("node %s does not yet have a complete replacement definition", nodeName) -} - -func replacementReady(node *inventory.NodeSpec, class *inventory.NodeClass) bool { - if node == nil || class == nil { - return false - } - if strings.TrimSpace(class.Image) == "" || strings.TrimSpace(class.Checksum) == "" { - return false - } - if strings.TrimSpace(node.Name) == "" || strings.TrimSpace(node.Hostname) == "" || strings.TrimSpace(node.IP) == "" { - return false - } - if strings.TrimSpace(node.K3sRole) == "" { - return false - } - if strings.TrimSpace(node.K3sRole) != "server" && strings.TrimSpace(node.K3sURL) == "" { - return false - } - if strings.TrimSpace(node.K3sToken) == "" { - return false - } - if strings.TrimSpace(node.SSHUser) == "" || len(node.SSHAuthorized) == 0 { - return false - } - return true -} - -func (a *App) flashHosts() []string { - hosts := map[string]struct{}{} - for _, host := range a.settings.FlashHosts { - if value := strings.TrimSpace(host); value != "" { - hosts[value] = struct{}{} - } - } - for _, host := range []string{a.settings.DefaultFlashHost, a.settings.LocalHost} { - if value := strings.TrimSpace(host); value != "" { - hosts[value] = struct{}{} - } - } - for _, node := range clusterNodes() { - if value := strings.TrimSpace(node.Name); value != "" { - hosts[value] = struct{}{} - } - } - out := make([]string, 0, len(hosts)) - for host := range hosts { - out = append(out, host) - } - sort.Strings(out) - if a.settings.DefaultFlashHost == "" { - return out - } - return moveToFront(out, a.settings.DefaultFlashHost) -} - -func (a *App) loadSnapshots() error { - data, err := os.ReadFile(a.settings.SnapshotsPath) - if err != nil { - return err - } - var snapshots map[string]SnapshotRecord - if err := json.Unmarshal(data, &snapshots); err != nil { - return err - } - a.mu.Lock() - a.snapshots = snapshots - a.mu.Unlock() - for _, snap := range snapshots { - a.metrics.RecordSnapshot(snap.Node, "ok", snap.CollectedAt) - } - return nil -} - -func (a *App) persistSnapshots() error { - a.mu.RLock() - data, err := json.MarshalIndent(a.snapshots, "", " ") - a.mu.RUnlock() - if err != nil { - return err - } - if err := os.MkdirAll(filepath.Dir(a.settings.SnapshotsPath), 0o755); err != nil { - return err - } - return os.WriteFile(a.settings.SnapshotsPath, data, 0o644) -} - -func (a *App) loadTargets() error { - data, err := os.ReadFile(a.settings.TargetsPath) - if err != nil { - return err - } - var targets map[string]facts.Targets - if err := json.Unmarshal(data, &targets); err != nil { - return err - } - a.mu.Lock() - a.targets = targets - a.mu.Unlock() - a.metrics.SetDriftTargets(targets, 0) - return nil -} - -func (a *App) persistTargets() error { - a.mu.RLock() - data, err := json.MarshalIndent(a.targets, "", " ") - a.mu.RUnlock() - if err != nil { - return err - } - if err := os.MkdirAll(filepath.Dir(a.settings.TargetsPath), 0o755); err != nil { - return err - } - return os.WriteFile(a.settings.TargetsPath, data, 0o644) -} - -func diffTargets(prev, next map[string]facts.Targets) []string { - classes := map[string]struct{}{} - for class := range prev { - classes[class] = struct{}{} - } - for class := range next { - classes[class] = struct{}{} - } - out := make([]string, 0) - for class := range classes { - if !targetsEqual(prev[class], next[class]) { - out = append(out, class) - } - } - sort.Strings(out) - return out -} - -func targetsEqual(a, b facts.Targets) bool { - if a.Kernel != b.Kernel || a.OSImage != b.OSImage || a.Containerd != b.Containerd || a.K3sVersion != b.K3sVersion { - return false - } - if len(a.Packages) != len(b.Packages) { - return false - } - for key, value := range a.Packages { - if b.Packages[key] != value { - return false - } - } - return true -} - -func humanBytes(value int64) string { - const unit = 1024 - if value < unit { - return fmt.Sprintf("%d B", value) - } - div, exp := int64(unit), 0 - for n := value / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f %ciB", float64(value)/float64(div), "KMGTPE"[exp]) -} - -func firstLine(value string) string { - value = strings.TrimSpace(value) - if idx := strings.IndexByte(value, '\n'); idx >= 0 { - return strings.TrimSpace(value[:idx]) - } - return value -} - -func preferredDevice(devices []Device) string { - if len(devices) == 0 { - return "" - } - return devices[0].Path -} - -func errorString(err error) string { - if err == nil { - return "" - } - return err.Error() -} - -func cloneDevices(devices []Device) []Device { - if len(devices) == 0 { - return nil - } - out := make([]Device, len(devices)) - copy(out, devices) - return out -} - -func (a *App) cachedDevices(host string) ([]Device, error) { - host = strings.TrimSpace(host) - if host == "" { - host = a.settings.DefaultFlashHost - } - a.mu.RLock() - snapshot, ok := a.deviceStore[host] - a.mu.RUnlock() - if !ok { - return nil, nil - } - if strings.TrimSpace(snapshot.Err) != "" { - return cloneDevices(snapshot.Devices), errors.New(snapshot.Err) - } - return cloneDevices(snapshot.Devices), nil -} - -func (a *App) recordDevices(host string, devices []Device, err error) { - host = strings.TrimSpace(host) - if host == "" { - host = a.settings.DefaultFlashHost - } - snapshot := deviceSnapshot{ - Devices: cloneDevices(devices), - CheckedAt: time.Now().UTC(), - } - if err != nil { - snapshot.Err = err.Error() - } - a.mu.Lock() - if existing, ok := a.deviceStore[host]; ok && len(snapshot.Devices) == 0 { - snapshot.Devices = cloneDevices(existing.Devices) - } - a.deviceStore[host] = snapshot - a.mu.Unlock() -} - -func deviceScore(device Device) int { - score := 0 - model := strings.ToLower(strings.TrimSpace(device.Model)) - switch { - case strings.Contains(model, "microsd"), strings.Contains(model, "micro sd"): - score += 60 - case strings.Contains(model, "sdxc"), strings.Contains(model, "sdhc"), strings.Contains(model, "sd "): - score += 50 - case strings.Contains(model, "card"), strings.Contains(model, "reader"): - score += 40 - } - if device.Removable { - score += 20 - } - if device.Hotplug { - score += 10 - } - if device.Transport == "usb" { - score += 5 - } - if strings.HasPrefix(device.Name, "mmcblk") { - score += 25 - } - return score -} - -func moveToFront(values []string, preferred string) []string { - if preferred == "" || len(values) < 2 { - return values - } - out := append([]string{}, values...) - for idx, value := range out { - if value != preferred { - continue - } - copy(out[1:idx+1], out[:idx]) - out[0] = preferred - return out - } - return out -} - -func deleteNodeObject(node string) error { - if err := deleteNodeObjectInCluster(node); err == nil { - return nil - } - cmd := exec.Command("kubectl", "delete", "node", node, "--ignore-not-found") - if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("delete node: %w: %s", err, strings.TrimSpace(string(out))) - } - return nil -} - -func deleteNodeObjectInCluster(node string) error { - kube, err := inClusterKubeClient() - if err != nil { - return errors.New("not running in cluster") - } - return kube.deleteRequest(fmt.Sprintf("/api/v1/nodes/%s", node)) -} diff --git a/pkg/service/app_helpers.go b/pkg/service/app_helpers.go new file mode 100644 index 0000000..7081221 --- /dev/null +++ b/pkg/service/app_helpers.go @@ -0,0 +1,432 @@ +package service + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "metis/pkg/facts" + "metis/pkg/inventory" +) + +func (a *App) newJob(kind, node, host, device string) *Job { + job := &Job{ + ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()), + Kind: kind, + Node: node, + Host: host, + Device: device, + Status: JobQueued, + ProgressPct: 0, + StartedAt: time.Now().UTC(), + UpdatedAt: time.Now().UTC(), + } + a.mu.Lock() + a.jobs[job.ID] = job + a.mu.Unlock() + return job +} + +func (a *App) job(id string) *Job { + a.mu.RLock() + defer a.mu.RUnlock() + return a.jobs[id] +} + +func (a *App) setJob(id string, update func(*Job)) { + a.mu.Lock() + defer a.mu.Unlock() + job := a.jobs[id] + if job == nil { + return + } + update(job) + job.UpdatedAt = time.Now().UTC() +} + +func (a *App) failJob(id string, err error) { + a.completeJob(id, func(j *Job) { + j.Status = JobError + j.Error = err.Error() + j.Message = err.Error() + }) +} + +func (a *App) completeJob(id string, update func(*Job)) { + a.mu.Lock() + defer a.mu.Unlock() + job := a.jobs[id] + if job == nil { + return + } + update(job) + if job.Status != JobError { + job.Status = JobDone + } + job.UpdatedAt = time.Now().UTC() + job.FinishedAt = time.Now().UTC() +} + +func (a *App) appendEvent(event Event) { + line, err := json.Marshal(event) + if err != nil { + return + } + f, err := os.OpenFile(a.settings.HistoryPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) + if err != nil { + return + } + defer f.Close() + _, _ = f.Write(append(line, '\n')) +} + +func (a *App) recentEvents(limit int) []Event { + f, err := os.Open(a.settings.HistoryPath) + if err != nil { + return nil + } + defer f.Close() + events := make([]Event, 0, limit) + scanner := bufio.NewScanner(f) + for scanner.Scan() { + var event Event + if err := json.Unmarshal(scanner.Bytes(), &event); err != nil { + continue + } + events = append(events, event) + } + if len(events) > limit { + events = events[len(events)-limit:] + } + for i, j := 0, len(events)-1; i < j; i, j = i+1, j-1 { + events[i], events[j] = events[j], events[i] + } + return events +} + +func cachedImageName(source string) string { + return strings.TrimSuffix(filepath.Base(source), ".xz") +} + +func (a *App) replacementNodes() []inventory.NodeSpec { + nodes := make([]inventory.NodeSpec, 0, len(a.inventory.Nodes)) + for _, node := range a.inventory.Nodes { + spec, class, err := a.inventory.FindNode(node.Name) + if err != nil { + continue + } + if replacementReady(spec, class) { + nodes = append(nodes, node) + } + } + sort.Slice(nodes, func(i, j int) bool { + return nodes[i].Name < nodes[j].Name + }) + return nodes +} + +func (a *App) ensureReplacementReady(nodeName string) error { + node, class, err := a.inventory.FindNode(nodeName) + if err != nil { + return err + } + if replacementReady(node, class) { + return nil + } + return fmt.Errorf("node %s does not yet have a complete replacement definition", nodeName) +} + +func replacementReady(node *inventory.NodeSpec, class *inventory.NodeClass) bool { + if node == nil || class == nil { + return false + } + if strings.TrimSpace(class.Image) == "" || strings.TrimSpace(class.Checksum) == "" { + return false + } + if strings.TrimSpace(node.Name) == "" || strings.TrimSpace(node.Hostname) == "" || strings.TrimSpace(node.IP) == "" { + return false + } + if strings.TrimSpace(node.K3sRole) == "" { + return false + } + if strings.TrimSpace(node.K3sRole) != "server" && strings.TrimSpace(node.K3sURL) == "" { + return false + } + if strings.TrimSpace(node.K3sToken) == "" { + return false + } + if strings.TrimSpace(node.SSHUser) == "" || len(node.SSHAuthorized) == 0 { + return false + } + return true +} + +func (a *App) flashHosts() []string { + hosts := map[string]struct{}{} + for _, host := range a.settings.FlashHosts { + if value := strings.TrimSpace(host); value != "" { + hosts[value] = struct{}{} + } + } + for _, host := range []string{a.settings.DefaultFlashHost, a.settings.LocalHost} { + if value := strings.TrimSpace(host); value != "" { + hosts[value] = struct{}{} + } + } + for _, node := range clusterNodes() { + if value := strings.TrimSpace(node.Name); value != "" { + hosts[value] = struct{}{} + } + } + out := make([]string, 0, len(hosts)) + for host := range hosts { + out = append(out, host) + } + sort.Strings(out) + if a.settings.DefaultFlashHost == "" { + return out + } + return moveToFront(out, a.settings.DefaultFlashHost) +} + +func (a *App) loadSnapshots() error { + data, err := os.ReadFile(a.settings.SnapshotsPath) + if err != nil { + return err + } + var snapshots map[string]SnapshotRecord + if err := json.Unmarshal(data, &snapshots); err != nil { + return err + } + a.mu.Lock() + a.snapshots = snapshots + a.mu.Unlock() + for _, snap := range snapshots { + a.metrics.RecordSnapshot(snap.Node, "ok", snap.CollectedAt) + } + return nil +} + +func (a *App) persistSnapshots() error { + a.mu.RLock() + data, err := json.MarshalIndent(a.snapshots, "", " ") + a.mu.RUnlock() + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(a.settings.SnapshotsPath), 0o755); err != nil { + return err + } + return os.WriteFile(a.settings.SnapshotsPath, data, 0o644) +} + +func (a *App) loadTargets() error { + data, err := os.ReadFile(a.settings.TargetsPath) + if err != nil { + return err + } + var targets map[string]facts.Targets + if err := json.Unmarshal(data, &targets); err != nil { + return err + } + a.mu.Lock() + a.targets = targets + a.mu.Unlock() + a.metrics.SetDriftTargets(targets, 0) + return nil +} + +func (a *App) persistTargets() error { + a.mu.RLock() + data, err := json.MarshalIndent(a.targets, "", " ") + a.mu.RUnlock() + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(a.settings.TargetsPath), 0o755); err != nil { + return err + } + return os.WriteFile(a.settings.TargetsPath, data, 0o644) +} + +func diffTargets(prev, next map[string]facts.Targets) []string { + classes := map[string]struct{}{} + for class := range prev { + classes[class] = struct{}{} + } + for class := range next { + classes[class] = struct{}{} + } + out := make([]string, 0) + for class := range classes { + if !targetsEqual(prev[class], next[class]) { + out = append(out, class) + } + } + sort.Strings(out) + return out +} + +func targetsEqual(a, b facts.Targets) bool { + if a.Kernel != b.Kernel || a.OSImage != b.OSImage || a.Containerd != b.Containerd || a.K3sVersion != b.K3sVersion { + return false + } + if len(a.Packages) != len(b.Packages) { + return false + } + for key, value := range a.Packages { + if b.Packages[key] != value { + return false + } + } + return true +} + +func humanBytes(value int64) string { + const unit = 1024 + if value < unit { + return fmt.Sprintf("%d B", value) + } + div, exp := int64(unit), 0 + for n := value / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %ciB", float64(value)/float64(div), "KMGTPE"[exp]) +} + +func firstLine(value string) string { + value = strings.TrimSpace(value) + if idx := strings.IndexByte(value, '\n'); idx >= 0 { + return strings.TrimSpace(value[:idx]) + } + return value +} + +func preferredDevice(devices []Device) string { + if len(devices) == 0 { + return "" + } + return devices[0].Path +} + +func errorString(err error) string { + if err == nil { + return "" + } + return err.Error() +} + +func cloneDevices(devices []Device) []Device { + if len(devices) == 0 { + return nil + } + out := make([]Device, len(devices)) + copy(out, devices) + return out +} + +func (a *App) cachedDevices(host string) ([]Device, error) { + host = strings.TrimSpace(host) + if host == "" { + host = a.settings.DefaultFlashHost + } + a.mu.RLock() + snapshot, ok := a.deviceStore[host] + a.mu.RUnlock() + if !ok { + return nil, nil + } + if strings.TrimSpace(snapshot.Err) != "" { + return cloneDevices(snapshot.Devices), errors.New(snapshot.Err) + } + return cloneDevices(snapshot.Devices), nil +} + +func (a *App) recordDevices(host string, devices []Device, err error) { + host = strings.TrimSpace(host) + if host == "" { + host = a.settings.DefaultFlashHost + } + snapshot := deviceSnapshot{ + Devices: cloneDevices(devices), + CheckedAt: time.Now().UTC(), + } + if err != nil { + snapshot.Err = err.Error() + } + a.mu.Lock() + if existing, ok := a.deviceStore[host]; ok && len(snapshot.Devices) == 0 { + snapshot.Devices = cloneDevices(existing.Devices) + } + a.deviceStore[host] = snapshot + a.mu.Unlock() +} + +func deviceScore(device Device) int { + score := 0 + model := strings.ToLower(strings.TrimSpace(device.Model)) + switch { + case strings.Contains(model, "microsd"), strings.Contains(model, "micro sd"): + score += 60 + case strings.Contains(model, "sdxc"), strings.Contains(model, "sdhc"), strings.Contains(model, "sd "): + score += 50 + case strings.Contains(model, "card"), strings.Contains(model, "reader"): + score += 40 + } + if device.Removable { + score += 20 + } + if device.Hotplug { + score += 10 + } + if device.Transport == "usb" { + score += 5 + } + if strings.HasPrefix(device.Name, "mmcblk") { + score += 25 + } + return score +} + +func moveToFront(values []string, preferred string) []string { + if preferred == "" || len(values) < 2 { + return values + } + out := append([]string{}, values...) + for idx, value := range out { + if value != preferred { + continue + } + copy(out[1:idx+1], out[:idx]) + out[0] = preferred + return out + } + return out +} + +func deleteNodeObject(node string) error { + if err := deleteNodeObjectInCluster(node); err == nil { + return nil + } + cmd := exec.Command("kubectl", "delete", "node", node, "--ignore-not-found") + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("delete node: %w: %s", err, strings.TrimSpace(string(out))) + } + return nil +} + +func deleteNodeObjectInCluster(node string) error { + kube, err := kubeClientFactory() + if err != nil { + return errors.New("not running in cluster") + } + return kube.deleteRequest(fmt.Sprintf("/api/v1/nodes/%s", node)) +} diff --git a/pkg/service/cluster.go b/pkg/service/cluster.go index a3156fa..e1612b7 100644 --- a/pkg/service/cluster.go +++ b/pkg/service/cluster.go @@ -37,17 +37,23 @@ type kubeClient struct { client *http.Client } +var kubeClientFactory = inClusterKubeClient +var ( + kubeServiceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" + kubeServiceAccountCAPath = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +) + func inClusterKubeClient() (*kubeClient, error) { host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST")) port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT")) if host == "" || port == "" { return nil, fmt.Errorf("not running in cluster") } - token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") + token, err := os.ReadFile(kubeServiceAccountTokenPath) if err != nil { return nil, err } - caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + caPEM, err := os.ReadFile(kubeServiceAccountCAPath) if err != nil { return nil, err } @@ -118,7 +124,7 @@ func (k *kubeClient) deleteRequest(path string) error { } func clusterNodes() []clusterNode { - kube, err := inClusterKubeClient() + kube, err := kubeClientFactory() if err != nil { return nil } @@ -164,7 +170,7 @@ func (a *App) podImageForArch(arch string) string { } func (a *App) runRemotePod(jobID, podName string, podSpec map[string]any) (string, error) { - kube, err := inClusterKubeClient() + kube, err := kubeClientFactory() if err != nil { return "", err } diff --git a/pkg/service/cluster_test.go b/pkg/service/cluster_test.go new file mode 100644 index 0000000..0ca3c5d --- /dev/null +++ b/pkg/service/cluster_test.go @@ -0,0 +1,123 @@ +package service + +import ( + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestInClusterKubeClientMissingEnv(t *testing.T) { + t.Setenv("KUBERNETES_SERVICE_HOST", "") + t.Setenv("KUBERNETES_SERVICE_PORT", "") + if _, err := inClusterKubeClient(); err == nil { + t.Fatal("expected inClusterKubeClient error without env") + } +} + +func TestKubeClientAndPodHelpers(t *testing.T) { + kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes": + _ = json.NewEncoder(w).Encode(map[string]any{ + "items": []any{ + map[string]any{ + "metadata": map[string]any{"name": "b", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}}, + "spec": map[string]any{"unschedulable": false}, + }, + map[string]any{ + "metadata": map[string]any{"name": "a", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}}, + "spec": map[string]any{"unschedulable": false}, + }, + }, + }) + case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"): + w.WriteHeader(http.StatusCreated) + case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/nodes/"): + w.WriteHeader(http.StatusNotFound) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/") && strings.HasSuffix(r.URL.Path, "/log"): + http.Error(w, "proxy error from 127.0.0.1:6443", http.StatusBadGateway) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"): + _ = json.NewEncoder(w).Encode(map[string]any{ + "metadata": map[string]any{"name": filepath.Base(r.URL.Path)}, + "status": map[string]any{ + "phase": "Failed", + "reason": "CrashLoopBackOff", + "message": "boom", + "containerStatuses": []any{ + map[string]any{ + "state": map[string]any{ + "waiting": map[string]any{"reason": "ImagePullBackOff", "message": "pulling"}, + "terminated": map[string]any{"reason": "Completed", "message": "done"}, + }, + }, + }, + }, + }) + default: + http.NotFound(w, r) + } + })) + defer kube.Close() + + client := kubeClientFactoryForURL(kube.URL, kube.Client()) + if err := client.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &map[string]any{}); err != nil { + t.Fatalf("jsonRequest: %v", err) + } + if err := client.deleteRequest("/api/v1/nodes/a"); err != nil { + t.Fatalf("deleteRequest 404 should be nil: %v", err) + } + if err := client.jsonRequest(http.MethodGet, "/missing", nil, &map[string]any{}); err == nil { + t.Fatal("expected jsonRequest failure on 404") + } + + origFactory := kubeClientFactory + kubeClientFactory = func() (*kubeClient, error) { + return client, nil + } + t.Cleanup(func() { kubeClientFactory = origFactory }) + nodes := clusterNodes() + if len(nodes) != 2 || nodes[0].Name != "a" { + t.Fatalf("clusterNodes sort mismatch: %#v", nodes) + } + + app := newTestApp(t) + app.settings.Namespace = "maintenance" + app.settings.RunnerImageARM64 = "runner:arm64" + state, err := app.remotePodState(client, "metis-build-test") + if err != nil { + t.Fatalf("remotePodState: %v", err) + } + if state.Reason != "Completed" || state.Message != "done" { + t.Fatalf("expected terminated state override, got %#v", state) + } + if _, err := app.remotePodLogs(client, "metis-build-test"); err == nil || !strings.Contains(err.Error(), "could not reach the node kubelet log endpoint") { + t.Fatalf("expected kubelet log endpoint error, got %v", err) + } + + if _, err := app.runRemotePod("job-1", "metis-fail-test", map[string]any{}); err == nil { + t.Fatal("expected runRemotePod failure") + } + if _, err := app.ensureDevice("titan-22", "missing"); err == nil { + t.Fatal("expected ensureDevice missing target to fail") + } +} + +func TestDeleteNodeObjectFallback(t *testing.T) { + tmp := t.TempDir() + kubectl := filepath.Join(tmp, "kubectl") + if err := os.WriteFile(kubectl, []byte("#!/usr/bin/env bash\nset -eu\nprintf '%s' \"$*\" > \""+filepath.Join(tmp, "kubectl.args")+"\"\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("PATH", tmp+string(os.PathListSeparator)+os.Getenv("PATH")) + origFactory := kubeClientFactory + kubeClientFactory = func() (*kubeClient, error) { return nil, errors.New("offline") } + t.Cleanup(func() { kubeClientFactory = origFactory }) + if err := deleteNodeObject("titan-15"); err != nil { + t.Fatalf("deleteNodeObject fallback: %v", err) + } +} diff --git a/pkg/service/coverage_more_test.go b/pkg/service/coverage_more_test.go new file mode 100644 index 0000000..eec2231 --- /dev/null +++ b/pkg/service/coverage_more_test.go @@ -0,0 +1,257 @@ +package service + +import ( + "encoding/json" + "encoding/pem" + "errors" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "metis/pkg/facts" + "metis/pkg/inventory" +) + +func TestServiceArtifactAndSnapshotPersistenceErrorBranches(t *testing.T) { + app := newTestApp(t) + + fileParent := filepath.Join(t.TempDir(), "blocked") + if err := os.WriteFile(fileParent, []byte("block"), 0o644); err != nil { + t.Fatal(err) + } + app.settings.ArtifactStatePath = filepath.Join(fileParent, "artifacts.json") + if err := app.persistArtifacts(); err == nil { + t.Fatal("expected persistArtifacts to fail when parent is a file") + } + + app.settings.SnapshotsPath = filepath.Join(fileParent, "snapshots.json") + if err := app.persistSnapshots(); err == nil { + t.Fatal("expected persistSnapshots to fail when parent is a file") + } + + app.settings.TargetsPath = filepath.Join(fileParent, "targets.json") + if err := app.persistTargets(); err == nil { + t.Fatal("expected persistTargets to fail when parent is a file") + } +} + +func TestServiceReplacementAndDeviceBranches(t *testing.T) { + app := newTestApp(t) + ready := inventory.NodeSpec{ + Name: "ready", + Class: "rpi4", + Hostname: "ready", + IP: "192.168.22.10", + K3sRole: "agent", + K3sURL: "https://192.168.22.1:6443", + K3sToken: "token", + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + } + incomplete := inventory.NodeSpec{Name: "incomplete", Class: "rpi4"} + class := inventory.NodeClass{Name: "rpi4", Image: "file:///tmp/base.img", Checksum: "sha256:abc"} + app.inventory = &inventory.Inventory{Classes: []inventory.NodeClass{class}, Nodes: []inventory.NodeSpec{ready, incomplete}} + if got := app.replacementNodes(); len(got) != 1 || got[0].Name != "ready" { + t.Fatalf("replacementNodes = %#v", got) + } + if err := app.ensureReplacementReady("incomplete"); err == nil { + t.Fatal("expected ensureReplacementReady to reject incomplete node") + } + if diff := diffTargets(map[string]facts.Targets{"a": {Kernel: "1"}}, map[string]facts.Targets{"a": {Kernel: "2"}, "b": {Kernel: "3"}}); len(diff) != 2 { + t.Fatalf("diffTargets = %#v", diff) + } + + app.recordDevices("host", []Device{{Path: "/dev/sda"}}, nil) + if got, err := app.cachedDevices("host"); err != nil || len(got) != 1 { + t.Fatalf("cachedDevices = %#v err=%v", got, err) + } + app.recordDevices("host", nil, errors.New("boom")) + if got, err := app.cachedDevices("host"); err == nil || len(got) != 1 { + t.Fatalf("cachedDevices error snapshot = %#v err=%v", got, err) + } + if _, err := app.Replace("incomplete", "titan-22", "/dev/sdz"); err == nil { + t.Fatal("expected Replace to reject incomplete node") + } +} + +func TestServiceHarborBranches(t *testing.T) { + harbor := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"): + _, _ = w.Write([]byte(`[]`)) + case r.Method == http.MethodPost && r.URL.Path == "/api/v2.0/projects": + w.WriteHeader(http.StatusCreated) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"): + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"digest": "sha256:aaa", "push_time": "2026-04-01T10:00:00Z"}, + {"digest": "sha256:bbb", "push_time": "2026-04-01T09:00:00Z"}, + }) + case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/artifacts/"): + w.WriteHeader(http.StatusAccepted) + default: + http.Error(w, "boom", http.StatusInternalServerError) + } + })) + defer harbor.Close() + + app := &App{settings: Settings{ + HarborAPIBase: harbor.URL + "/api/v2.0", + HarborUsername: "admin", + HarborPassword: "pw", + HarborProject: "metis", + HarborRegistry: "registry.example", + }} + if got := app.artifactRepo("node"); got != "registry.example/metis/node" { + t.Fatalf("artifactRepo = %q", got) + } + if err := app.ensureHarborProject(); err != nil { + t.Fatalf("ensureHarborProject create: %v", err) + } + if err := app.pruneHarborArtifacts("node", 1); err != nil { + t.Fatalf("pruneHarborArtifacts: %v", err) + } +} + +func TestServiceHarborErrorBranches(t *testing.T) { + harbor := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"): + http.Error(w, "lookup failed", http.StatusInternalServerError) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"): + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"digest": "sha256:aaa", "push_time": "2026-04-01T10:00:00Z"}, + {"digest": "sha256:bbb", "push_time": "2026-04-01T09:00:00Z"}, + }) + case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/artifacts/"): + http.Error(w, "delete failed", http.StatusInternalServerError) + default: + http.NotFound(w, r) + } + })) + defer harbor.Close() + + app := &App{settings: Settings{ + HarborAPIBase: harbor.URL + "/api/v2.0", + HarborUsername: "admin", + HarborPassword: "pw", + HarborProject: "metis", + HarborRegistry: "registry.example", + }} + if err := app.ensureHarborProject(); err == nil { + t.Fatal("expected ensureHarborProject error") + } + if err := app.pruneHarborArtifacts("node", 0); err == nil { + t.Fatal("expected pruneHarborArtifacts error") + } +} + +func TestServiceClusterAndRemotePodBranches(t *testing.T) { + origTokenPath := kubeServiceAccountTokenPath + origCAPath := kubeServiceAccountCAPath + dir := t.TempDir() + kubeServiceAccountTokenPath = filepath.Join(dir, "token") + kubeServiceAccountCAPath = filepath.Join(dir, "ca.crt") + t.Cleanup(func() { + kubeServiceAccountTokenPath = origTokenPath + kubeServiceAccountCAPath = origCAPath + }) + if err := os.WriteFile(kubeServiceAccountTokenPath, []byte("tok"), 0o644); err != nil { + t.Fatal(err) + } + t.Setenv("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc") + t.Setenv("KUBERNETES_SERVICE_PORT", "443") + + srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes": + _ = json.NewEncoder(w).Encode(map[string]any{ + "items": []any{ + map[string]any{ + "metadata": map[string]any{"name": "b", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}}, + "spec": map[string]any{"unschedulable": false}, + }, + map[string]any{ + "metadata": map[string]any{"name": "a", "labels": map[string]string{"kubernetes.io/arch": "arm64", "node-role.kubernetes.io/worker": "true"}}, + "spec": map[string]any{"unschedulable": false}, + }, + }, + }) + case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"): + w.WriteHeader(http.StatusCreated) + case r.Method == http.MethodDelete: + w.WriteHeader(http.StatusOK) + case r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/log"): + _, _ = w.Write([]byte("pod logs")) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"): + _ = json.NewEncoder(w).Encode(map[string]any{ + "metadata": map[string]any{"name": filepath.Base(r.URL.Path)}, + "status": map[string]any{ + "phase": "Succeeded", + "message": `{"dest_path":"/tmp/out.img"}`, + "reason": "Completed", + }, + }) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: srv.Certificate().Raw}) + if err := os.WriteFile(kubeServiceAccountCAPath, certPEM, 0o644); err != nil { + t.Fatal(err) + } + + client, err := inClusterKubeClient() + if err != nil { + t.Fatalf("inClusterKubeClient: %v", err) + } + client.baseURL = srv.URL + client.client = srv.Client() + kubeClientFactory = func() (*kubeClient, error) { return client, nil } + t.Cleanup(func() { kubeClientFactory = inClusterKubeClient }) + + var nodePayload map[string]any + if err := client.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &nodePayload); err != nil { + t.Fatalf("jsonRequest: %v", err) + } + if err := client.deleteRequest("/api/v1/nodes/a"); err != nil { + t.Fatalf("deleteRequest: %v", err) + } + if nodes := clusterNodes(); len(nodes) != 2 || nodes[0].Name != "a" { + t.Fatalf("clusterNodes = %#v", nodes) + } + + app := newTestApp(t) + app.settings.Namespace = "maintenance" + app.settings.RunnerImageARM64 = "runner:arm64" + state, err := app.remotePodState(client, "metis-build-test") + if err != nil { + t.Fatalf("remotePodState: %v", err) + } + if state.Phase != "Succeeded" || state.Message == "" { + t.Fatalf("remotePodState = %#v", state) + } + logs, err := app.remotePodLogs(client, "metis-build-test") + if err != nil || logs != "pod logs" { + t.Fatalf("remotePodLogs = %q err=%v", logs, err) + } + if got := app.podImageForArch("amd64"); got != "" { + t.Fatalf("podImageForArch fallback = %q", got) + } + if got := app.podImageForArch("arm64"); got != "runner:arm64" { + t.Fatalf("podImageForArch arm64 = %q", got) + } + + job := app.newJob("build", "titan-15", "titan-22", "/dev/sdz") + app.settings.HarborAPIBase = "" + app.runBuild(job, false) + if got := app.job(job.ID); got == nil || got.Status != JobError { + t.Fatalf("runBuild should fail without harbor creds: %#v", got) + } + if _, err := app.Replace("incomplete", "titan-22", "/dev/sdz"); err == nil { + t.Fatal("expected Replace to reject incomplete node") + } +} diff --git a/pkg/service/harbor_test.go b/pkg/service/harbor_test.go new file mode 100644 index 0000000..069c2ce --- /dev/null +++ b/pkg/service/harbor_test.go @@ -0,0 +1,16 @@ +package service + +import "testing" + +func TestHarborHelpersErrorPaths(t *testing.T) { + app := &App{settings: Settings{HarborRegistry: "reg/", HarborProject: "proj"}} + if got := app.artifactRepo("node"); got != "reg/proj/node" { + t.Fatalf("artifactRepo = %q", got) + } + if err := app.ensureHarborProject(); err == nil { + t.Fatal("expected missing creds error") + } + if err := app.pruneHarborArtifacts("node", 1); err == nil { + t.Fatal("expected pruneHarborArtifacts to require API base") + } +} diff --git a/pkg/service/helpers_test.go b/pkg/service/helpers_test.go new file mode 100644 index 0000000..c62bf3f --- /dev/null +++ b/pkg/service/helpers_test.go @@ -0,0 +1,355 @@ +package service + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "reflect" + "testing" + "time" + + "metis/pkg/facts" + "metis/pkg/inventory" + "metis/pkg/sentinel" +) + +func TestSettingsHelpersAndSmallUtilities(t *testing.T) { + dataDir := filepath.Join(t.TempDir(), "data") + t.Setenv("METIS_DATA_DIR", dataDir) + t.Setenv("METIS_FLASH_HOSTS", "a, b,, c") + t.Setenv("METIS_MAX_DEVICE_BYTES", "12345") + t.Setenv("METIS_DEFAULT_FLASH_HOST", "flash-1") + t.Setenv("METIS_LOCAL_HOST", "local-1") + + settings := FromEnv() + if got, want := settings.CacheDir, filepath.Join(dataDir, "cache"); got != want { + t.Fatalf("CacheDir = %q, want %q", got, want) + } + if settings.DefaultFlashHost != "flash-1" || settings.LocalHost != "local-1" { + t.Fatalf("unexpected env settings: %+v", settings) + } + if settings.MaxDeviceBytes != 12345 { + t.Fatalf("expected MaxDeviceBytes=12345, got %d", settings.MaxDeviceBytes) + } + if !reflect.DeepEqual(splitList("a, b,, c"), []string{"a", "b", "c"}) { + t.Fatalf("splitList mismatch") + } + if got := getenvInt64("METIS_MAX_DEVICE_BYTES", 1); got != 12345 { + t.Fatalf("getenvInt64 = %d", got) + } + if got := hostnameOr("fallback"); got == "" { + t.Fatal("hostnameOr returned empty string") + } + if got := humanBytes(1536); got != "1.5 KiB" { + t.Fatalf("humanBytes = %q", got) + } + if got := firstLine("alpha\nbeta"); got != "alpha" { + t.Fatalf("firstLine = %q", got) + } + if got := moveToFront([]string{"b", "a", "c"}, "a"); !reflect.DeepEqual(got, []string{"a", "b", "c"}) { + t.Fatalf("moveToFront = %#v", got) + } + if got := errorString(nil); got != "" { + t.Fatalf("errorString(nil) = %q", got) + } + if got := preferredDevice([]Device{{Path: "/dev/sda"}}); got != "/dev/sda" { + t.Fatalf("preferredDevice = %q", got) + } + if got := deviceScore(Device{Name: "mmcblk0", Model: "Micro SD card", Removable: true, Hotplug: true, Transport: "usb"}); got <= 0 { + t.Fatalf("expected positive device score, got %d", got) + } + if got := inventoryNodeArch(nil, nil); got != "arm64" { + t.Fatalf("inventoryNodeArch fallback = %q", got) + } + if got := (&App{settings: Settings{HarborRegistry: "reg/", HarborProject: "/proj/"}}).artifactRepo("node"); got != "reg/proj/node" { + t.Fatalf("artifactRepo = %q", got) + } +} + +func TestAppJobDeviceAndStateHelpers(t *testing.T) { + app := newTestApp(t) + app.settings.HistoryPath = filepath.Join(t.TempDir(), "history.jsonl") + app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json") + + job := app.newJob("build", "titan-15", "titan-22", "/dev/sdz") + if job.Status != JobQueued { + t.Fatalf("new job status = %s", job.Status) + } + app.setJob(job.ID, func(j *Job) { + j.Status = JobRunning + j.Stage = "build" + }) + if got := app.job(job.ID); got == nil || got.Status != JobRunning { + t.Fatalf("setJob did not update job: %#v", got) + } + app.completeJob(job.ID, func(j *Job) { + j.Message = "done" + }) + if got := app.job(job.ID); got == nil || got.Status != JobDone || got.FinishedAt.IsZero() { + t.Fatalf("completeJob did not finish job: %#v", got) + } + failed := app.newJob("replace", "titan-15", "titan-22", "/dev/sdz") + app.failJob(failed.ID, os.ErrNotExist) + if got := app.job(failed.ID); got == nil || got.Status != JobError || got.Error == "" { + t.Fatalf("failJob did not mark error: %#v", got) + } + + app.appendEvent(Event{Kind: "one", Summary: "first"}) + app.appendEvent(Event{Kind: "two", Summary: "second"}) + events := app.recentEvents(1) + if len(events) != 1 || events[0].Kind != "two" { + t.Fatalf("recentEvents returned %#v", events) + } + + app.recordDevices("titan-22", []Device{{Name: "sda", Path: "/dev/sda"}}, nil) + devices, err := app.cachedDevices("titan-22") + if err != nil || len(devices) != 1 || devices[0].Path != "/dev/sda" { + t.Fatalf("cachedDevices = %#v err=%v", devices, err) + } + devices[0].Path = "/dev/mutated" + again, _ := app.cachedDevices("titan-22") + if again[0].Path != "/dev/sda" { + t.Fatalf("cachedDevices should return a copy, got %#v", again) + } + + app.recordDevices("titan-22", nil, os.ErrPermission) + if _, err := app.cachedDevices("titan-22"); err == nil { + t.Fatal("expected cached device error") + } + + app.recordDevices("titan-22", []Device{{Path: "/dev/sda"}}, nil) + state := app.State("titan-22") + if state.SelectedHost != "titan-22" || state.PreferredDevice == "" { + t.Fatalf("unexpected state: %+v", state) + } +} + +func TestAppPersistenceAndTargets(t *testing.T) { + dir := t.TempDir() + invPath := filepath.Join(dir, "inventory.yaml") + if err := os.WriteFile(invPath, []byte(` +classes: + - name: rpi4 + arch: arm64 + os: armbian + image: file:///tmp/base.img +nodes: + - name: titan-15 + class: rpi4 + hostname: titan-15 + ip: 192.168.22.43 + k3s_role: agent +`), 0o644); err != nil { + t.Fatal(err) + } + snapshotsPath := filepath.Join(dir, "snapshots.json") + targetsPath := filepath.Join(dir, "targets.json") + artifactStatePath := filepath.Join(dir, "artifacts.json") + + seedSnapshots := map[string]SnapshotRecord{ + "titan-15": { + Node: "titan-15", + CollectedAt: testTime(t), + Snapshot: sentinel.Snapshot{Hostname: "titan-15", Kernel: "6.6.63", K3sVersion: "v1.31.5+k3s1"}, + }, + } + data, _ := json.MarshalIndent(seedSnapshots, "", " ") + if err := os.WriteFile(snapshotsPath, data, 0o644); err != nil { + t.Fatal(err) + } + + seedTargets := map[string]facts.Targets{ + "rpi4": {Kernel: "6.6.63"}, + } + data, _ = json.MarshalIndent(seedTargets, "", " ") + if err := os.WriteFile(targetsPath, data, 0o644); err != nil { + t.Fatal(err) + } + + seedArtifacts := map[string]ArtifactSummary{ + "titan-15": {Node: "titan-15", Ref: "reg/proj/titan-15:latest"}, + } + data, _ = json.MarshalIndent(seedArtifacts, "", " ") + if err := os.WriteFile(artifactStatePath, data, 0o644); err != nil { + t.Fatal(err) + } + + app, err := NewApp(Settings{ + InventoryPath: invPath, + CacheDir: filepath.Join(dir, "cache"), + ArtifactDir: filepath.Join(dir, "artifacts"), + ArtifactStatePath: artifactStatePath, + HistoryPath: filepath.Join(dir, "history.jsonl"), + SnapshotsPath: snapshotsPath, + TargetsPath: targetsPath, + DefaultFlashHost: "titan-22", + FlashHosts: []string{"titan-22"}, + LocalHost: "titan-22", + AllowedGroups: []string{"admin"}, + }) + if err != nil { + t.Fatalf("NewApp: %v", err) + } + + if got := app.artifacts()["titan-15"].Ref; got != "reg/proj/titan-15:latest" { + t.Fatalf("artifacts() = %q", got) + } + if err := app.recordArtifact(ArtifactSummary{Node: "titan-15", Ref: "reg/proj/titan-15:v2"}); err != nil { + t.Fatalf("recordArtifact: %v", err) + } + if err := app.loadArtifacts(); err != nil { + t.Fatalf("loadArtifacts: %v", err) + } + if got := app.artifacts()["titan-15"].Ref; got != "reg/proj/titan-15:v2" { + t.Fatalf("recordArtifact/persist mismatch: %q", got) + } + + if err := app.StoreSnapshot(SnapshotRecord{Node: "titan-15", Snapshot: sentinel.Snapshot{Hostname: "titan-15"}}); err != nil { + t.Fatalf("StoreSnapshot: %v", err) + } + if event, err := app.WatchSentinel(); err != nil || event == nil || event.Kind != "sentinel.watch" { + t.Fatalf("WatchSentinel: event=%#v err=%v", event, err) + } +} + +func TestHelperBranchesAndPersistenceFailures(t *testing.T) { + app := newTestApp(t) + + if got := cachedImageName("/tmp/archive/base.img.xz"); got != "base.img" { + t.Fatalf("cachedImageName = %q", got) + } + if got := humanBytes(1); got != "1 B" { + t.Fatalf("humanBytes(1) = %q", got) + } + if got := humanBytes(1024 * 1024); got != "1.0 MiB" { + t.Fatalf("humanBytes(1MiB) = %q", got) + } + if got := errorString(fmt.Errorf("boom")); got != "boom" { + t.Fatalf("errorString = %q", got) + } + if got := moveToFront([]string{"a", "b", "c"}, "missing"); !reflect.DeepEqual(got, []string{"a", "b", "c"}) { + t.Fatalf("moveToFront missing = %#v", got) + } + if targetsEqual(facts.Targets{Kernel: "a"}, facts.Targets{Kernel: "b"}) { + t.Fatal("targetsEqual should reject differing kernels") + } + if got := deviceScore(Device{Name: "reader", Model: "Card reader", Transport: "usb", Removable: true, Hotplug: true}); got < 75 { + t.Fatalf("unexpected deviceScore: %d", got) + } + + if got := cachedImageName("foo.xz"); got != "foo" { + t.Fatalf("cachedImageName alias = %q", got) + } + if got := app.flashHosts(); len(got) == 0 { + t.Fatal("flashHosts returned empty list") + } + + app.settings.SnapshotsPath = filepath.Join(t.TempDir(), "missing", "snapshots.json") + if err := app.loadSnapshots(); err == nil { + t.Fatal("expected loadSnapshots error for missing file") + } + app.settings.TargetsPath = filepath.Join(t.TempDir(), "missing", "targets.json") + if err := app.loadTargets(); err == nil { + t.Fatal("expected loadTargets error for missing file") + } + app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "missing", "artifacts.json") + if err := app.loadArtifacts(); err == nil { + t.Fatal("expected loadArtifacts error for missing file") + } + + tmpDir := t.TempDir() + app.settings.SnapshotsPath = tmpDir + if err := app.persistSnapshots(); err == nil { + t.Fatal("expected persistSnapshots error when path is a directory") + } + app.settings.TargetsPath = tmpDir + if err := app.persistTargets(); err == nil { + t.Fatal("expected persistTargets error when path is a directory") + } + app.settings.ArtifactStatePath = tmpDir + if err := app.persistArtifacts(); err == nil { + t.Fatal("expected persistArtifacts error when path is a directory") + } + + if err := app.StoreSnapshot(SnapshotRecord{}); err == nil { + t.Fatal("expected snapshot validation error") + } + if _, err := app.Build("missing"); err == nil { + t.Fatal("expected Build to reject unknown node") + } + if _, err := app.Replace("missing", "", ""); err == nil { + t.Fatal("expected Replace to reject unknown node") + } +} + +func TestNewAppReportsInventoryErrors(t *testing.T) { + settings := Settings{ + InventoryPath: filepath.Join(t.TempDir(), "missing.yaml"), + CacheDir: t.TempDir(), + ArtifactDir: t.TempDir(), + ArtifactStatePath: filepath.Join(t.TempDir(), "artifacts.json"), + HistoryPath: filepath.Join(t.TempDir(), "history.jsonl"), + SnapshotsPath: filepath.Join(t.TempDir(), "snapshots.json"), + TargetsPath: filepath.Join(t.TempDir(), "targets.json"), + } + if _, err := NewApp(settings); err == nil { + t.Fatal("expected NewApp inventory error") + } +} + +func TestAppHelperNoopAndInvalidStateBranches(t *testing.T) { + app := newTestApp(t) + app.setJob("missing", func(*Job) { t.Fatal("setJob should not run for missing job") }) + app.completeJob("missing", func(*Job) { t.Fatal("completeJob should not run for missing job") }) + app.failJob("missing", os.ErrNotExist) + + if replacementReady(nil, nil) { + t.Fatal("replacementReady nil should be false") + } + if replacementReady(&inventory.NodeSpec{}, &inventory.NodeClass{}) { + t.Fatal("replacementReady empty should be false") + } + app.inventory = &inventory.Inventory{} + if got := app.replacementNodes(); len(got) != 0 { + t.Fatalf("replacementNodes empty inventory = %#v", got) + } + app.settings.FlashHosts = []string{"titan-22"} + app.settings.DefaultFlashHost = "" + if got := app.flashHosts(); len(got) == 0 { + t.Fatal("flashHosts should still include cluster nodes") + } + if !replacementReady(&inventory.NodeSpec{ + Name: "ready", + Hostname: "ready", + IP: "192.168.22.10", + K3sRole: "agent", + K3sURL: "https://192.168.22.1:6443", + K3sToken: "token", + SSHUser: "atlas", + SSHAuthorized: []string{"ssh-ed25519 AAA"}, + }, &inventory.NodeClass{Image: "img", Checksum: "sum"}) { + t.Fatal("replacementReady valid node should be true") + } + + fileParent := filepath.Join(t.TempDir(), "blocked") + if err := os.WriteFile(fileParent, []byte("block"), 0o644); err != nil { + t.Fatal(err) + } + app.settings.HistoryPath = filepath.Join(fileParent, "history.jsonl") + app.appendEvent(Event{Kind: "noop"}) + if got := app.recentEvents(1); got != nil { + t.Fatalf("recentEvents missing file = %#v", got) + } + + kube := fakeKubeServer(t) + installKubeFactory(t, kube) + if err := deleteNodeObjectInCluster("titan-15"); err != nil { + t.Fatalf("deleteNodeObjectInCluster success: %v", err) + } +} + +func testTime(t *testing.T) time.Time { + t.Helper() + return time.Date(2026, time.March, 31, 12, 0, 0, 0, time.UTC) +} diff --git a/pkg/service/metrics.go b/pkg/service/metrics.go index be36533..49564ac 100644 --- a/pkg/service/metrics.go +++ b/pkg/service/metrics.go @@ -37,18 +37,24 @@ func NewMetrics() *Metrics { } } +// RecordBuild increments the per-node build counter because the UI and +// Prometheus graphs need a stable view of build outcomes by node. func (m *Metrics) RecordBuild(node, status string) { m.mu.Lock() defer m.mu.Unlock() m.builds[counterKey(node, status)]++ } +// RecordFlash increments the per-node and per-host flash counter because the +// replacement workflow needs separate visibility for build and burn stages. func (m *Metrics) RecordFlash(node, host, status string) { m.mu.Lock() defer m.mu.Unlock() m.flashes[counterKey(node, host, status)]++ } +// RecordSnapshot tracks accepted sentinel snapshots because drift detection +// depends on the last successful push per node. func (m *Metrics) RecordSnapshot(node, status string, ts time.Time) { m.mu.Lock() defer m.mu.Unlock() @@ -58,6 +64,8 @@ func (m *Metrics) RecordSnapshot(node, status string, ts time.Time) { } } +// RecordWatch increments the sentinel watch outcome counter because the +// dashboard needs to show whether the latest reconciliation succeeded. func (m *Metrics) RecordWatch(status string) { m.mu.Lock() defer m.mu.Unlock() @@ -67,6 +75,8 @@ func (m *Metrics) RecordWatch(status string) { } } +// SetDriftTargets refreshes the target-count gauge because the UI exposes how +// much class configuration is already populated versus still missing. func (m *Metrics) SetDriftTargets(targets map[string]facts.Targets, changed int) { m.mu.Lock() defer m.mu.Unlock() diff --git a/pkg/service/metrics_test.go b/pkg/service/metrics_test.go new file mode 100644 index 0000000..0971b52 --- /dev/null +++ b/pkg/service/metrics_test.go @@ -0,0 +1,33 @@ +package service + +import ( + "strings" + "testing" + "time" + + "metis/pkg/facts" +) + +func TestMetricsRenderAndKeyHelpers(t *testing.T) { + metrics := NewMetrics() + metrics.RecordBuild("n1", "ok") + metrics.RecordFlash("n1", "h1", "ok") + metrics.RecordSnapshot("n1", "ok", time.Unix(123, 0)) + metrics.RecordWatch("ok") + metrics.SetDriftTargets(map[string]facts.Targets{ + "c1": {Kernel: "k", Packages: map[string]string{"p": "1"}}, + }, 2) + + var b strings.Builder + metrics.Render(&b) + out := b.String() + if !strings.Contains(out, `metis_builds_total{node="n1",status="ok"} 1`) { + t.Fatalf("missing build metric: %s", out) + } + if !strings.Contains(out, `metis_class_target_fields{class="c1"} 2`) { + t.Fatalf("missing target metric: %s", out) + } + if got := splitKey("a", 3); len(got) != 3 || got[0] != "a" || got[1] != "" || got[2] != "" { + t.Fatalf("splitKey fallback = %#v", got) + } +} diff --git a/pkg/service/remote.go b/pkg/service/remote.go index 11e9cbc..d53ea03 100644 --- a/pkg/service/remote.go +++ b/pkg/service/remote.go @@ -3,13 +3,9 @@ package service import ( "encoding/json" "fmt" - "math" - "path/filepath" "sort" "strings" "time" - - "metis/pkg/inventory" ) const ( @@ -20,10 +16,14 @@ const ( vaultSSHKeysSecretPath = "kv/data/atlas/maintenance/metis-ssh-keys" ) +// ListDevices returns cached device data because the UI needs a cheap refresh +// path while remote enumeration is still in flight. func (a *App) ListDevices(host string) ([]Device, error) { return a.cachedDevices(host) } +// RefreshDevices rebuilds the flash-device list because the chooser needs the +// latest host-specific USB inventory before a burn can start. func (a *App) RefreshDevices(host string) ([]Device, error) { if host == "" { host = a.settings.DefaultFlashHost @@ -280,360 +280,3 @@ func (a *App) heartbeatRemoteJob(jobID string) { } }) } - -func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64, string) { - seconds := elapsed.Seconds() - switch { - case seconds < 20: - return ramp(seconds, 0, 20, 8, 14), fmt.Sprintf("Scheduling a remote builder on %s for %s", builder, node) - case seconds < 120: - return ramp(seconds, 20, 120, 14, 30), fmt.Sprintf("Injecting %s recovery config into the base image on %s", node, builder) - case seconds < 360: - return ramp(seconds, 120, 360, 30, 58), fmt.Sprintf("Building the replacement image filesystem for %s on %s", node, builder) - case seconds < 540: - return ramp(seconds, 360, 540, 58, 70), fmt.Sprintf("Compressing the replacement image for %s before upload", node) - default: - return math.Min(76, ramp(seconds, 540, 900, 70, 76)), fmt.Sprintf("Publishing %s to Harbor and refreshing the latest tag", node) - } -} - -func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) { - seconds := elapsed.Seconds() - switch { - case seconds < 10: - return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host) - case seconds < 45: - return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host) - default: - return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host) - } -} - -func prettyDeviceTarget(path string) string { - switch { - case strings.HasPrefix(path, "hosttmp://"): - return "/tmp" - case strings.TrimSpace(path) == "": - return "the selected target" - default: - return path - } -} - -func ramp(value, start, end, min, max float64) float64 { - if end <= start { - return max - } - if value <= start { - return min - } - if value >= end { - return max - } - return min + ((value-start)/(end-start))*(max-min) -} - -func (a *App) ensureDevice(host, path string) (*Device, error) { - if strings.TrimSpace(path) == "" { - return nil, fmt.Errorf("select removable media before starting a flash run") - } - devices, err := a.RefreshDevices(host) - if err != nil { - return nil, err - } - for _, device := range devices { - if device.Path == path { - return &device, nil - } - } - return nil, fmt.Errorf("device %s is not a current flash candidate on %s", path, host) -} - -func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) { - nodes := clusterNodes() - storageNodes := map[string]struct{}{} - for _, node := range a.inventory.Nodes { - if len(node.LonghornDisks) > 0 { - storageNodes[node.Name] = struct{}{} - } - } - type scored struct { - node clusterNode - score int - } - candidates := make([]scored, 0) - for _, node := range nodes { - if node.Arch != arch || node.Unschedulable || node.ControlPlane { - continue - } - score := 0 - if node.Worker { - score += 40 - } - switch arch { - case "arm64": - if node.Hardware == "rpi5" { - score += 30 - } - if _, storage := storageNodes[node.Name]; storage { - score -= 50 - } - case "amd64": - if node.Name == a.settings.DefaultFlashHost { - score += 30 - } - if node.Name == "titan-24" { - score -= 10 - } - } - if flashHost != "" && node.Name == flashHost { - score += 5 - } - candidates = append(candidates, scored{node: node, score: score}) - } - sort.Slice(candidates, func(i, j int) bool { - if candidates[i].score != candidates[j].score { - return candidates[i].score > candidates[j].score - } - return candidates[i].node.Name < candidates[j].node.Name - }) - if len(candidates) == 0 { - return clusterNode{}, fmt.Errorf("no build host available for arch %s", arch) - } - return candidates[0].node, nil -} - -func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any { - return map[string]any{ - "apiVersion": "v1", - "kind": "Pod", - "metadata": map[string]any{ - "name": name, - "namespace": a.settings.Namespace, - "labels": map[string]string{"app": "metis-remote", "metis-run": "devices"}, - }, - "spec": map[string]any{ - "restartPolicy": "Never", - "serviceAccountName": "metis", - "nodeSelector": map[string]string{ - "kubernetes.io/hostname": host, - }, - "containers": []map[string]any{ - { - "name": "remote-devices", - "image": image, - "imagePullPolicy": "Always", - "command": []string{ - "metis", "remote-devices", - "--max-device-bytes", fmt.Sprintf("%d", a.settings.MaxDeviceBytes), - "--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir), - }, - "securityContext": map[string]any{"privileged": true, "runAsUser": 0}, - "volumeMounts": []map[string]any{ - {"name": "host-dev", "mountPath": "/dev"}, - {"name": "host-sys", "mountPath": "/sys", "readOnly": true}, - {"name": "host-udev", "mountPath": "/run/udev", "readOnly": true}, - {"name": "host-tmp", "mountPath": "/host-tmp"}, - }, - }, - }, - "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, - "volumes": []map[string]any{ - {"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}}, - {"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}}, - {"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}}, - {"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}}, - }, - }, - } -} - -func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any { - return map[string]any{ - "apiVersion": "v1", - "kind": "Pod", - "metadata": map[string]any{ - "name": name, - "namespace": a.settings.Namespace, - "labels": map[string]string{"app": "metis-remote", "metis-run": "build"}, - "annotations": vaultRuntimeAnnotations(true), - }, - "spec": map[string]any{ - "restartPolicy": "Never", - "serviceAccountName": "metis", - "nodeSelector": map[string]string{ - "kubernetes.io/hostname": host, - }, - "containers": []map[string]any{ - { - "name": "remote-build", - "image": image, - "imagePullPolicy": "Always", - "command": []string{"/bin/sh", "-c"}, - "args": []string{ - remoteWorkerEntrypoint( - true, - "remote-build", - "--inventory", a.settings.InventoryPath, - "--node", node, - "--cache", "/workspace/cache", - "--work-dir", "/workspace/build", - "--artifact-ref", artifactRef, - "--build-tag", buildTag, - "--harbor-registry", a.settings.HarborRegistry, - ), - }, - "envFrom": []map[string]any{ - {"configMapRef": map[string]any{"name": "metis"}}, - }, - "volumeMounts": []map[string]any{ - {"name": "workspace", "mountPath": "/workspace"}, - }, - }, - }, - "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, - "volumes": []map[string]any{ - {"name": "workspace", "emptyDir": map[string]any{}}, - }, - }, - } -} - -func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef string) map[string]any { - return map[string]any{ - "apiVersion": "v1", - "kind": "Pod", - "metadata": map[string]any{ - "name": name, - "namespace": a.settings.Namespace, - "labels": map[string]string{"app": "metis-remote", "metis-run": "flash"}, - "annotations": vaultRuntimeAnnotations(false), - }, - "spec": map[string]any{ - "restartPolicy": "Never", - "serviceAccountName": "metis", - "nodeSelector": map[string]string{ - "kubernetes.io/hostname": host, - }, - "containers": []map[string]any{ - { - "name": "remote-flash", - "image": image, - "imagePullPolicy": "Always", - "command": []string{"/bin/sh", "-c"}, - "args": []string{ - remoteWorkerEntrypoint( - false, - "remote-flash", - "--node", node, - "--device", device, - "--artifact-ref", artifactRef, - "--work-dir", "/workspace/flash", - "--harbor-registry", a.settings.HarborRegistry, - "--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir), - ), - }, - "securityContext": map[string]any{"privileged": true, "runAsUser": 0}, - "envFrom": []map[string]any{ - {"configMapRef": map[string]any{"name": "metis"}}, - }, - "volumeMounts": []map[string]any{ - {"name": "workspace", "mountPath": "/workspace"}, - {"name": "host-dev", "mountPath": "/dev"}, - {"name": "host-sys", "mountPath": "/sys", "readOnly": true}, - {"name": "host-udev", "mountPath": "/run/udev", "readOnly": true}, - {"name": "host-tmp", "mountPath": "/host-tmp"}, - }, - }, - }, - "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, - "volumes": []map[string]any{ - {"name": "workspace", "emptyDir": map[string]any{}}, - {"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}}, - {"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}}, - {"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}}, - {"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}}, - }, - }, - } -} - -func (a *App) remoteArtifactNote(node string) string { - if summary, ok := a.artifacts()[node]; ok && strings.TrimSpace(summary.Ref) != "" { - return summary.Ref - } - return a.artifactRepo(node) + ":latest" -} - -func inventoryNodeArch(spec *inventory.NodeSpec, class *inventory.NodeClass) string { - if class != nil && strings.TrimSpace(class.Arch) != "" { - return strings.TrimSpace(class.Arch) - } - return "arm64" -} - -func mountedHostTmpDir(path string) string { - path = strings.TrimSpace(path) - switch { - case path == "", path == "/tmp": - return "/host-tmp" - case strings.HasPrefix(path, "/tmp/"): - return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/tmp/")) - default: - return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/")) - } -} - -func vaultRuntimeAnnotations(includeSSHKeys bool) map[string]string { - annotations := map[string]string{ - "vault.hashicorp.com/agent-inject": "true", - "vault.hashicorp.com/agent-pre-populate-only": "true", - "vault.hashicorp.com/role": vaultRoleMaintenance, - "vault.hashicorp.com/agent-inject-secret-metis-runtime-env.sh": vaultRuntimeSecretPath, - "vault.hashicorp.com/agent-inject-template-metis-runtime-env.sh": `{{ with secret "kv/data/atlas/maintenance/metis-runtime" }} -export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}" -{{ end }}`, - "vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh": vaultHarborSecretPath, - "vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh": `{{ with secret "kv/data/atlas/harbor/harbor-core" }} -export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}" -{{ end }}`, - } - if includeSSHKeys { - annotations["vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh"] = vaultSSHKeysSecretPath - annotations["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"] = `{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }} -export METIS_SSH_KEY_BASTION="{{ .Data.data.bastion_pub }}" -export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}" -export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}" -export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}" -{{ end }}` - } - return annotations -} - -func remoteWorkerEntrypoint(includeSSHKeys bool, args ...string) string { - lines := []string{ - "set -e", - ". /vault/secrets/metis-runtime-env.sh", - ". /vault/secrets/metis-harbor-env.sh", - } - if includeSSHKeys { - lines = append(lines, ". /vault/secrets/metis-ssh-env.sh") - } - lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...)) - return strings.Join(lines, "\n") -} - -func shellJoin(args ...string) string { - quoted := make([]string, 0, len(args)) - for _, arg := range args { - quoted = append(quoted, shellQuote(arg)) - } - return strings.Join(quoted, " ") -} - -func shellQuote(value string) string { - if value == "" { - return "''" - } - return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'" -} diff --git a/pkg/service/remote_error_test.go b/pkg/service/remote_error_test.go new file mode 100644 index 0000000..b20ad70 --- /dev/null +++ b/pkg/service/remote_error_test.go @@ -0,0 +1,61 @@ +package service + +import ( + "path/filepath" + "testing" + "time" +) + +func TestRemoteWorkflowErrorBranches(t *testing.T) { + kube := fakeKubeServer(t) + installKubeFactory(t, kube) + app := newTestApp(t) + app.settings.Namespace = "maintenance" + app.settings.RunnerImageARM64 = "" + + if _, err := app.RefreshDevices("titan-22"); err == nil { + t.Fatal("expected RefreshDevices to fail without runner image") + } + + job := app.newJob("build", "titan-15", "", "") + app.runBuild(job, false) + if got := app.job(job.ID); got == nil || got.Status != JobError { + t.Fatalf("expected build job error, got %#v", got) + } + + job = app.newJob("flash", "titan-15", "titan-22", "/dev/sdz") + if err := app.flashArtifact(job.ID, "registry.example/metis/titan-15"); err == nil { + t.Fatal("expected flashArtifact error") + } + + app.setJob(job.ID, func(j *Job) { + j.Status = JobRunning + j.Stage = "build" + j.StageStartedAt = time.Now().Add(-30 * time.Second) + }) + app.heartbeatRemoteJob(job.ID) + if got := app.job(job.ID); got == nil || got.ProgressPct == 0 { + t.Fatalf("expected heartbeat progress, got %#v", got) + } +} + +func TestRemoteWorkflowMissingRunnerImageBranch(t *testing.T) { + kube := fakeKubeServer(t) + harbor := fakeHarborServer(t, true) + installKubeFactory(t, kube) + app := newTestApp(t) + app.settings.Namespace = "maintenance" + app.settings.RunnerImageARM64 = "" + app.settings.HarborAPIBase = harbor.URL + "/api/v2.0" + app.settings.HarborUsername = "admin" + app.settings.HarborPassword = "pw" + app.settings.HarborProject = "metis" + app.settings.HarborRegistry = "registry.example" + app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json") + + job := app.newJob("build", "titan-15", "", "") + app.runBuild(job, false) + if got := app.job(job.ID); got == nil || got.Status != JobError { + t.Fatalf("expected build job error, got %#v", got) + } +} diff --git a/pkg/service/remote_helpers.go b/pkg/service/remote_helpers.go new file mode 100644 index 0000000..6361408 --- /dev/null +++ b/pkg/service/remote_helpers.go @@ -0,0 +1,369 @@ +package service + +import ( + "fmt" + "math" + "path/filepath" + "sort" + "strings" + "time" + + "metis/pkg/inventory" +) + +func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64, string) { + seconds := elapsed.Seconds() + switch { + case seconds < 20: + return ramp(seconds, 0, 20, 8, 14), fmt.Sprintf("Scheduling a remote builder on %s for %s", builder, node) + case seconds < 120: + return ramp(seconds, 20, 120, 14, 30), fmt.Sprintf("Injecting %s recovery config into the base image on %s", node, builder) + case seconds < 360: + return ramp(seconds, 120, 360, 30, 58), fmt.Sprintf("Building the replacement image filesystem for %s on %s", node, builder) + case seconds < 540: + return ramp(seconds, 360, 540, 58, 70), fmt.Sprintf("Compressing the replacement image for %s before upload", node) + default: + return math.Min(76, ramp(seconds, 540, 900, 70, 76)), fmt.Sprintf("Publishing %s to Harbor and refreshing the latest tag", node) + } +} + +func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) { + seconds := elapsed.Seconds() + switch { + case seconds < 10: + return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host) + case seconds < 45: + return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host) + default: + return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host) + } +} + +func prettyDeviceTarget(path string) string { + switch { + case strings.HasPrefix(path, "hosttmp://"): + return "/tmp" + case strings.TrimSpace(path) == "": + return "the selected target" + default: + return path + } +} + +func ramp(value, start, end, min, max float64) float64 { + if end <= start { + return max + } + if value <= start { + return min + } + if value >= end { + return max + } + return min + ((value-start)/(end-start))*(max-min) +} + +func (a *App) ensureDevice(host, path string) (*Device, error) { + if strings.TrimSpace(path) == "" { + return nil, fmt.Errorf("select removable media before starting a flash run") + } + devices, err := a.RefreshDevices(host) + if err != nil { + return nil, err + } + for _, device := range devices { + if device.Path == path { + return &device, nil + } + } + return nil, fmt.Errorf("device %s is not a current flash candidate on %s", path, host) +} + +func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) { + nodes := clusterNodes() + storageNodes := map[string]struct{}{} + for _, node := range a.inventory.Nodes { + if len(node.LonghornDisks) > 0 { + storageNodes[node.Name] = struct{}{} + } + } + type scored struct { + node clusterNode + score int + } + candidates := make([]scored, 0) + for _, node := range nodes { + if node.Arch != arch || node.Unschedulable || node.ControlPlane { + continue + } + score := 0 + if node.Worker { + score += 40 + } + switch arch { + case "arm64": + if node.Hardware == "rpi5" { + score += 30 + } + if _, storage := storageNodes[node.Name]; storage { + score -= 50 + } + case "amd64": + if node.Name == a.settings.DefaultFlashHost { + score += 30 + } + if node.Name == "titan-24" { + score -= 10 + } + } + if flashHost != "" && node.Name == flashHost { + score += 5 + } + candidates = append(candidates, scored{node: node, score: score}) + } + sort.Slice(candidates, func(i, j int) bool { + if candidates[i].score != candidates[j].score { + return candidates[i].score > candidates[j].score + } + return candidates[i].node.Name < candidates[j].node.Name + }) + if len(candidates) == 0 { + return clusterNode{}, fmt.Errorf("no build host available for arch %s", arch) + } + return candidates[0].node, nil +} + +func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any { + return map[string]any{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]any{ + "name": name, + "namespace": a.settings.Namespace, + "labels": map[string]string{"app": "metis-remote", "metis-run": "devices"}, + }, + "spec": map[string]any{ + "restartPolicy": "Never", + "serviceAccountName": "metis", + "nodeSelector": map[string]string{ + "kubernetes.io/hostname": host, + }, + "containers": []map[string]any{ + { + "name": "remote-devices", + "image": image, + "imagePullPolicy": "Always", + "command": []string{ + "metis", "remote-devices", + "--max-device-bytes", fmt.Sprintf("%d", a.settings.MaxDeviceBytes), + "--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir), + }, + "securityContext": map[string]any{"privileged": true, "runAsUser": 0}, + "volumeMounts": []map[string]any{ + {"name": "host-dev", "mountPath": "/dev"}, + {"name": "host-sys", "mountPath": "/sys", "readOnly": true}, + {"name": "host-udev", "mountPath": "/run/udev", "readOnly": true}, + {"name": "host-tmp", "mountPath": "/host-tmp"}, + }, + }, + }, + "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, + "volumes": []map[string]any{ + {"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}}, + {"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}}, + {"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}}, + {"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}}, + }, + }, + } +} + +func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any { + return map[string]any{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]any{ + "name": name, + "namespace": a.settings.Namespace, + "labels": map[string]string{"app": "metis-remote", "metis-run": "build"}, + "annotations": vaultRuntimeAnnotations(true), + }, + "spec": map[string]any{ + "restartPolicy": "Never", + "serviceAccountName": "metis", + "nodeSelector": map[string]string{ + "kubernetes.io/hostname": host, + }, + "containers": []map[string]any{ + { + "name": "remote-build", + "image": image, + "imagePullPolicy": "Always", + "command": []string{"/bin/sh", "-c"}, + "args": []string{ + remoteWorkerEntrypoint( + true, + "remote-build", + "--inventory", a.settings.InventoryPath, + "--node", node, + "--cache", "/workspace/cache", + "--work-dir", "/workspace/build", + "--artifact-ref", artifactRef, + "--build-tag", buildTag, + "--harbor-registry", a.settings.HarborRegistry, + ), + }, + "envFrom": []map[string]any{ + {"configMapRef": map[string]any{"name": "metis"}}, + }, + "volumeMounts": []map[string]any{ + {"name": "workspace", "mountPath": "/workspace"}, + }, + }, + }, + "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, + "volumes": []map[string]any{ + {"name": "workspace", "emptyDir": map[string]any{}}, + }, + }, + } +} + +func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef string) map[string]any { + return map[string]any{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]any{ + "name": name, + "namespace": a.settings.Namespace, + "labels": map[string]string{"app": "metis-remote", "metis-run": "flash"}, + "annotations": vaultRuntimeAnnotations(false), + }, + "spec": map[string]any{ + "restartPolicy": "Never", + "serviceAccountName": "metis", + "nodeSelector": map[string]string{ + "kubernetes.io/hostname": host, + }, + "containers": []map[string]any{ + { + "name": "remote-flash", + "image": image, + "imagePullPolicy": "Always", + "command": []string{"/bin/sh", "-c"}, + "args": []string{ + remoteWorkerEntrypoint( + false, + "remote-flash", + "--node", node, + "--device", device, + "--artifact-ref", artifactRef, + "--work-dir", "/workspace/flash", + "--harbor-registry", a.settings.HarborRegistry, + "--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir), + ), + }, + "securityContext": map[string]any{"privileged": true, "runAsUser": 0}, + "envFrom": []map[string]any{ + {"configMapRef": map[string]any{"name": "metis"}}, + }, + "volumeMounts": []map[string]any{ + {"name": "workspace", "mountPath": "/workspace"}, + {"name": "host-dev", "mountPath": "/dev"}, + {"name": "host-sys", "mountPath": "/sys", "readOnly": true}, + {"name": "host-udev", "mountPath": "/run/udev", "readOnly": true}, + {"name": "host-tmp", "mountPath": "/host-tmp"}, + }, + }, + }, + "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, + "volumes": []map[string]any{ + {"name": "workspace", "emptyDir": map[string]any{}}, + {"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}}, + {"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}}, + {"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}}, + {"name": "host-tmp", "hostPath": map[string]any{"path": "/tmp"}}, + }, + }, + } +} + +func (a *App) remoteArtifactNote(node string) string { + if summary, ok := a.artifacts()[node]; ok && strings.TrimSpace(summary.Ref) != "" { + return summary.Ref + } + return a.artifactRepo(node) + ":latest" +} + +func inventoryNodeArch(spec *inventory.NodeSpec, class *inventory.NodeClass) string { + if class != nil && strings.TrimSpace(class.Arch) != "" { + return strings.TrimSpace(class.Arch) + } + return "arm64" +} + +func mountedHostTmpDir(path string) string { + path = strings.TrimSpace(path) + switch { + case path == "", path == "/tmp": + return "/host-tmp" + case strings.HasPrefix(path, "/tmp/"): + return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/tmp/")) + default: + return filepath.Join("/host-tmp", strings.TrimPrefix(path, "/")) + } +} + +func vaultRuntimeAnnotations(includeSSHKeys bool) map[string]string { + annotations := map[string]string{ + "vault.hashicorp.com/agent-inject": "true", + "vault.hashicorp.com/agent-pre-populate-only": "true", + "vault.hashicorp.com/role": vaultRoleMaintenance, + "vault.hashicorp.com/agent-inject-secret-metis-runtime-env.sh": vaultRuntimeSecretPath, + "vault.hashicorp.com/agent-inject-template-metis-runtime-env.sh": `{{ with secret "kv/data/atlas/maintenance/metis-runtime" }} +export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}" +{{ end }}`, + "vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh": vaultHarborSecretPath, + "vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh": `{{ with secret "kv/data/atlas/harbor/harbor-core" }} +export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}" +{{ end }}`, + } + if includeSSHKeys { + annotations["vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh"] = vaultSSHKeysSecretPath + annotations["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"] = `{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }} +export METIS_SSH_KEY_BASTION="{{ .Data.data.bastion_pub }}" +export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}" +export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}" +export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}" +{{ end }}` + } + return annotations +} + +func remoteWorkerEntrypoint(includeSSHKeys bool, args ...string) string { + lines := []string{ + "set -e", + ". /vault/secrets/metis-runtime-env.sh", + ". /vault/secrets/metis-harbor-env.sh", + } + if includeSSHKeys { + lines = append(lines, ". /vault/secrets/metis-ssh-env.sh") + } + lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...)) + return strings.Join(lines, "\n") +} + +func shellJoin(args ...string) string { + quoted := make([]string, 0, len(args)) + for _, arg := range args { + quoted = append(quoted, shellQuote(arg)) + } + return strings.Join(quoted, " ") +} + +func shellQuote(value string) string { + if value == "" { + return "''" + } + return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'" +} diff --git a/pkg/service/remote_helpers_test.go b/pkg/service/remote_helpers_test.go new file mode 100644 index 0000000..ceeb922 --- /dev/null +++ b/pkg/service/remote_helpers_test.go @@ -0,0 +1,68 @@ +package service + +import ( + "strings" + "testing" + "time" + + "metis/pkg/inventory" +) + +func TestRemoteHelperBranches(t *testing.T) { + if got := prettyDeviceTarget(""); got != "the selected target" { + t.Fatalf("prettyDeviceTarget empty = %q", got) + } + if got := prettyDeviceTarget("hosttmp:///tmp"); got != "/tmp" { + t.Fatalf("prettyDeviceTarget hosttmp = %q", got) + } + if got := ramp(0, 10, 20, 1, 2); got != 1 { + t.Fatalf("ramp before start = %v", got) + } + if got := mountedHostTmpDir("/tmp/metis-flash-test"); got != "/host-tmp/metis-flash-test" { + t.Fatalf("mountedHostTmpDir = %q", got) + } + if got := shellQuote("a'b"); got != `'a'"'"'b'` { + t.Fatalf("shellQuote = %q", got) + } + if got, msg := buildStageHeartbeat("n1", "b1", 5*time.Second); got < 8 || !strings.Contains(msg, "Scheduling") { + t.Fatalf("buildStageHeartbeat early = %v %q", got, msg) + } + if got, msg := flashStageHeartbeat("h1", "artifact", 15*time.Second); got < 88 || !strings.Contains(msg, "Writing") { + t.Fatalf("flashStageHeartbeat = %v %q", got, msg) + } + + app := newTestApp(t) + app.artifactStore["n1"] = ArtifactSummary{Node: "n1", Ref: "registry.example/metis/n1:latest"} + if got := app.remoteArtifactNote("n1"); got != "registry.example/metis/n1:latest" { + t.Fatalf("remoteArtifactNote = %q", got) + } + if got := inventoryNodeArch(&inventory.NodeSpec{}, &inventory.NodeClass{Arch: "amd64"}); got != "amd64" { + t.Fatalf("inventoryNodeArch = %q", got) + } + worker := remoteWorkerEntrypoint(true, "--node", "n1") + if !strings.Contains(worker, "metis-runtime-env.sh") || !strings.Contains(worker, "metis-ssh-env.sh") { + t.Fatalf("remoteWorkerEntrypoint missing expected sources: %s", worker) + } +} + +func TestSelectBuilderHostPrefersWorkerAndArch(t *testing.T) { + kube := fakeKubeServer(t) + installKubeFactory(t, kube) + app := newTestApp(t) + node, err := app.selectBuilderHost("arm64", "titan-22") + if err != nil { + t.Fatalf("selectBuilderHost: %v", err) + } + if node.Name != "titan-22" { + t.Fatalf("expected titan-22 builder, got %s", node.Name) + } +} + +func TestSelectBuilderHostErrorBranch(t *testing.T) { + kube := fakeKubeServer(t) + installKubeFactory(t, kube) + app := newTestApp(t) + if _, err := app.selectBuilderHost("s390x", "titan-22"); err == nil { + t.Fatal("expected selectBuilderHost error") + } +} diff --git a/pkg/service/server.go b/pkg/service/server.go index d2ff4d3..a8e335a 100644 --- a/pkg/service/server.go +++ b/pkg/service/server.go @@ -234,721 +234,3 @@ func writeJSON(w http.ResponseWriter, status int, payload any) { w.WriteHeader(status) _ = json.NewEncoder(w).Encode(payload) } - -var metisPage = template.Must(template.New("metis").Parse(` - - - - - Metis Control - - - -
-
-
-
Atlas Recovery Plane
-

Metis Control

-

Build replacement node images, verify removable media on the Texas flash host, and keep image templates fresh with sentinel-driven drift tracking.

-
-
Default flash host: {{.State.DefaultFlashHost}}
-
- - - -
-
-
-

Replacement Run

-

This UI is meant for the one-shot recovery path: build the node image, verify the card on the flash host, then write it and hand off only the physical swap.

-
- - - -
-
-
-
-
-
- - - -
-
- -
-

Live Jobs

-

Progress updates stream from the running Metis operation. The replacement flow automatically tries to clear the stale Kubernetes node object before the card write.

-
-
-
- -
-
-

Sentinel Watch

-

Ariadne should hit the internal sentinel watch route on a schedule. You can also run it manually here when you want the latest template recommendations immediately.

-
-
- Tracked nodes - 0 -
-
- Class targets - 0 -
-
-
- -
-
- -
-
-
-

Recent Changes

-

This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.

-
-
-
-
-
-
-
- - - -`)) diff --git a/pkg/service/server_extra_test.go b/pkg/service/server_extra_test.go new file mode 100644 index 0000000..27ea15b --- /dev/null +++ b/pkg/service/server_extra_test.go @@ -0,0 +1,104 @@ +package service + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "strings" + "testing" +) + +func TestHTTPHandlersCoverServiceRoutes(t *testing.T) { + kube := fakeKubeServer(t) + harbor := fakeHarborServer(t, true) + installKubeFactory(t, kube) + + app := newTestApp(t) + app.settings.Namespace = "maintenance" + app.settings.RunnerImageARM64 = "runner:arm64" + app.settings.HarborAPIBase = harbor.URL + "/api/v2.0" + app.settings.HarborUsername = "admin" + app.settings.HarborPassword = "pw" + app.settings.HarborProject = "metis" + app.settings.HarborRegistry = "registry.example" + app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json") + + handler := app.Handler() + + t.Run("health", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), `"status":"ok"`) { + t.Fatalf("health response: %d %s", resp.Code, resp.Body.String()) + } + }) + + authHeaders := func(req *http.Request) { + req.Header.Set("X-Auth-Request-User", "brad") + req.Header.Set("X-Auth-Request-Groups", "admin") + } + + t.Run("devices", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/api/devices?host=titan-22", nil) + authHeaders(req) + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), `"/dev/sdz"`) { + t.Fatalf("devices response: %d %s", resp.Code, resp.Body.String()) + } + }) + + t.Run("build", func(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/jobs/build", strings.NewReader(`{"node":"titan-15"}`)) + req.Header.Set("Content-Type", "application/json") + authHeaders(req) + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + if resp.Code != http.StatusAccepted { + t.Fatalf("build response: %d %s", resp.Code, resp.Body.String()) + } + var job Job + if err := json.Unmarshal(resp.Body.Bytes(), &job); err != nil { + t.Fatalf("decode build job: %v", err) + } + waitForJobState(t, app, job.ID, JobDone) + }) + + t.Run("replace", func(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/jobs/replace", strings.NewReader(`{"node":"titan-15","host":"titan-22","device":"/dev/sdz"}`)) + req.Header.Set("Content-Type", "application/json") + authHeaders(req) + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + if resp.Code != http.StatusAccepted { + t.Fatalf("replace response: %d %s", resp.Code, resp.Body.String()) + } + var job Job + if err := json.Unmarshal(resp.Body.Bytes(), &job); err != nil { + t.Fatalf("decode replace job: %v", err) + } + waitForJobState(t, app, job.ID, JobDone) + }) + + t.Run("watch", func(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/sentinel/watch", nil) + authHeaders(req) + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), `"kind":"sentinel.watch"`) { + t.Fatalf("watch response: %d %s", resp.Code, resp.Body.String()) + } + }) + + t.Run("index", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + authHeaders(req) + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) + if resp.Code != http.StatusOK || !strings.Contains(resp.Body.String(), " + + + + + Metis Control + + + +
+
+
+
Atlas Recovery Plane
+

Metis Control

+

Build replacement node images, verify removable media on the Texas flash host, and keep image templates fresh with sentinel-driven drift tracking.

+
+
Default flash host: {{.State.DefaultFlashHost}}
+
+ + + +
+
+
+

Replacement Run

+

This UI is meant for the one-shot recovery path: build the node image, verify the card on the flash host, then write it and hand off only the physical swap.

+
+ + + +
+
+
+
+
+
+ + + +
+
+ +
+

Live Jobs

+

Progress updates stream from the running Metis operation. The replacement flow automatically tries to clear the stale Kubernetes node object before the card write.

+
+
+
+ +
+
+

Sentinel Watch

+

Ariadne should hit the internal sentinel watch route on a schedule. You can also run it manually here when you want the latest template recommendations immediately.

+
+
+ Tracked nodes + 0 +
+
+ Class targets + 0 +
+
+
+ +
+
+ +
+
+
+

Recent Changes

+

This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.

+
+
+
+
+
+
+
+ + + + diff --git a/pkg/service/workflow_test.go b/pkg/service/workflow_test.go new file mode 100644 index 0000000..d2733b5 --- /dev/null +++ b/pkg/service/workflow_test.go @@ -0,0 +1,236 @@ +package service + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestRefreshDevicesAndReplacementWorkflow(t *testing.T) { + kube := fakeKubeServer(t) + harbor := fakeHarborServer(t, true) + app := newTestApp(t) + app.settings.Namespace = "maintenance" + app.settings.RunnerImageARM64 = "runner:arm64" + app.settings.HarborAPIBase = harbor.URL + "/api/v2.0" + app.settings.HarborUsername = "admin" + app.settings.HarborPassword = "pw" + app.settings.HarborProject = "metis" + app.settings.HarborRegistry = "registry.example" + app.settings.ArtifactStatePath = filepath.Join(t.TempDir(), "artifacts.json") + + installKubeFactory(t, kube) + + devices, err := app.RefreshDevices("titan-22") + if err != nil { + t.Fatalf("RefreshDevices: %v", err) + } + if len(devices) < 2 || devices[0].Path != "/dev/sdz" { + t.Fatalf("unexpected devices: %+v", devices) + } + cached, err := app.ListDevices("titan-22") + if err != nil || len(cached) != len(devices) { + t.Fatalf("ListDevices cache mismatch: %+v err=%v", cached, err) + } + state := app.State("titan-22") + if state.PreferredDevice != "/dev/sdz" { + t.Fatalf("expected preferred device /dev/sdz, got %q", state.PreferredDevice) + } + + job, err := app.Replace("titan-15", "titan-22", "/dev/sdz") + if err != nil { + t.Fatalf("Replace: %v", err) + } + waitForJobState(t, app, job.ID, JobDone) + if got := app.job(job.ID); got == nil || got.Status != JobDone { + t.Fatalf("replace job did not finish successfully: %#v", got) + } + if got := app.artifacts()["titan-15"].Ref; got != "registry.example/metis/titan-15:latest" { + t.Fatalf("artifact not recorded: %q", got) + } +} + +func TestRemotePodStateAndLogsHelpers(t *testing.T) { + kube := fakeKubeServer(t) + installKubeFactory(t, kube) + app := newTestApp(t) + app.settings.Namespace = "maintenance" + client, err := kubeClientFactory() + if err != nil { + t.Fatalf("kube client: %v", err) + } + state, err := app.remotePodState(client, "metis-build-test") + if err != nil { + t.Fatalf("remotePodState: %v", err) + } + if state.Name != "metis-build-test" || state.Reason != "Completed" || !strings.Contains(state.Message, "build") { + t.Fatalf("unexpected pod state: %#v", state) + } + logs, err := app.remotePodLogs(client, "metis-build-test") + if err != nil || !strings.Contains(logs, "build logs") { + t.Fatalf("remotePodLogs: logs=%q err=%v", logs, err) + } +} + +func TestHarborProjectCreationAndPrune(t *testing.T) { + harbor := fakeHarborServer(t, false) + app := &App{settings: Settings{ + HarborAPIBase: harbor.URL + "/api/v2.0", + HarborUsername: "admin", + HarborPassword: "pw", + HarborProject: "metis", + HarborRegistry: "registry.example", + }, metrics: NewMetrics()} + if got := app.artifactRepo("titan-15"); got != "registry.example/metis/titan-15" { + t.Fatalf("artifactRepo = %q", got) + } + if err := app.ensureHarborProject(); err != nil { + t.Fatalf("ensureHarborProject: %v", err) + } + if err := app.pruneHarborArtifacts("titan-15", 1); err != nil { + t.Fatalf("pruneHarborArtifacts: %v", err) + } +} + +func TestKubeJSONAndDeleteRequests(t *testing.T) { + kube := fakeKubeServer(t) + client := kubeClientFactoryForURL(kube.URL, kube.Client()) + var payload map[string]any + if err := client.jsonRequest(http.MethodGet, "/api/v1/nodes", nil, &payload); err != nil { + t.Fatalf("jsonRequest: %v", err) + } + if err := client.deleteRequest("/api/v1/nodes/titan-15"); err != nil { + t.Fatalf("deleteRequest: %v", err) + } +} + +func TestBuildStageAndArchiveHelpers(t *testing.T) { + if got := remoteArtifactNoteForTest(t); got != "registry.example/metis/titan-15:latest" { + t.Fatalf("remoteArtifactNote = %q", got) + } +} + +func waitForJobState(t *testing.T, app *App, id string, want JobStatus) { + t.Helper() + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + if got := app.job(id); got != nil { + if got.Status == want { + return + } + if got.Status == JobError { + t.Fatalf("job %s failed: %s", id, got.Error) + } + } + time.Sleep(10 * time.Millisecond) + } + t.Fatalf("job %s never reached state %s", id, want) +} + +func installKubeFactory(t *testing.T, srv *httptest.Server) { + t.Helper() + orig := kubeClientFactory + kubeClientFactory = func() (*kubeClient, error) { + return &kubeClient{baseURL: srv.URL, token: "tok", client: srv.Client()}, nil + } + t.Cleanup(func() { + kubeClientFactory = orig + }) +} + +func kubeClientFactoryForURL(baseURL string, client *http.Client) *kubeClient { + return &kubeClient{baseURL: baseURL, token: "tok", client: client} +} + +func fakeKubeServer(t *testing.T) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes": + _ = json.NewEncoder(w).Encode(map[string]any{ + "items": []any{ + map[string]any{ + "metadata": map[string]any{ + "name": "titan-22", + "labels": map[string]string{ + "kubernetes.io/arch": "arm64", + "hardware": "rpi5", + "node-role.kubernetes.io/worker": "true", + }, + }, + "spec": map[string]any{"unschedulable": false}, + }, + }, + }) + case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"): + w.WriteHeader(http.StatusCreated) + case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/pods/"): + w.WriteHeader(http.StatusOK) + case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/nodes/"): + w.WriteHeader(http.StatusOK) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/") && strings.HasSuffix(r.URL.Path, "/log"): + _, _ = w.Write([]byte("build logs from kubelet")) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/pods/"): + podName := filepath.Base(strings.TrimSuffix(r.URL.Path, "/log")) + message := `{}` + switch { + case strings.Contains(podName, "devices"): + message = `{"devices":[{"name":"sdz","path":"/dev/sdz","model":"Micro SD","transport":"usb","type":"disk","removable":true,"hotplug":true,"size_bytes":32000000000},{"name":"tmp","path":"hosttmp:///tmp","model":"Host /tmp","transport":"test","type":"file","note":"Test-only host write target under /tmp","size_bytes":1}]}` + case strings.Contains(podName, "build"): + message = `{"local_path":"/workspace/build/titan-15.img.xz","compressed":true,"size_bytes":1234,"build_tag":"build-1"}` + case strings.Contains(podName, "flash"): + message = `{"dest_path":"/tmp/metis-flash-test/titan-15.img"}` + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "metadata": map[string]any{"name": podName}, + "status": map[string]any{ + "phase": "Succeeded", + "message": message, + "reason": "Completed", + }, + }) + default: + http.NotFound(w, r) + } + })) +} + +func fakeHarborServer(t *testing.T, projectExists bool) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2.0/projects"): + if projectExists { + _ = json.NewEncoder(w).Encode([]map[string]string{{"name": "metis"}}) + return + } + _ = json.NewEncoder(w).Encode([]map[string]string{}) + case r.Method == http.MethodPost && r.URL.Path == "/api/v2.0/projects": + w.WriteHeader(http.StatusCreated) + case r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/artifacts"): + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"digest": "sha256:aaa", "push_time": "2026-04-01T10:00:00Z"}, + {"digest": "sha256:bbb", "push_time": "2026-04-01T09:00:00Z"}, + }) + case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/artifacts/"): + w.WriteHeader(http.StatusAccepted) + default: + http.NotFound(w, r) + } + })) +} + +func remoteArtifactNoteForTest(t *testing.T) string { + t.Helper() + app := &App{ + settings: Settings{HarborRegistry: "registry.example", HarborProject: "metis"}, + artifactStore: map[string]ArtifactSummary{ + "titan-15": {Node: "titan-15", Ref: "registry.example/metis/titan-15:latest"}, + }, + } + return app.remoteArtifactNote("titan-15") +} diff --git a/pkg/util/run_test.go b/pkg/util/run_test.go new file mode 100644 index 0000000..f4b3ec2 --- /dev/null +++ b/pkg/util/run_test.go @@ -0,0 +1,32 @@ +package util + +import ( + "strings" + "testing" +) + +func TestRunSucceeds(t *testing.T) { + if err := Run("sh", "-c", "exit 0"); err != nil { + t.Fatalf("Run: %v", err) + } +} + +func TestRunLoggedReturnsCombinedOutput(t *testing.T) { + got, err := RunLogged("sh", "-c", "printf 'hello'; printf 'world' >&2") + if err != nil { + t.Fatalf("RunLogged: %v", err) + } + if got != "helloworld" { + t.Fatalf("RunLogged output = %q", got) + } +} + +func TestRunLoggedWrapsFailures(t *testing.T) { + _, err := RunLogged("sh", "-c", "printf boom >&2; exit 7") + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "failed") || !strings.Contains(err.Error(), "boom") { + t.Fatalf("unexpected error: %v", err) + } +} diff --git a/pkg/writer/coverage_more_test.go b/pkg/writer/coverage_more_test.go new file mode 100644 index 0000000..c74552c --- /dev/null +++ b/pkg/writer/coverage_more_test.go @@ -0,0 +1,41 @@ +package writer + +import ( + "context" + "os" + "path/filepath" + "testing" +) + +func TestWriteImageWithProgressBranches(t *testing.T) { + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + if err := os.WriteFile(src, []byte("writer-test"), 0o644); err != nil { + t.Fatal(err) + } + dest := filepath.Join(dir, "out", "dest.img") + var calls int + if err := WriteImageWithProgress(context.Background(), src, dest, func(written, total int64) { + calls++ + if written == 0 || total == 0 { + t.Fatalf("unexpected progress: %d/%d", written, total) + } + }); err != nil { + t.Fatalf("WriteImageWithProgress: %v", err) + } + if calls == 0 { + t.Fatal("expected progress callback") + } + if got, err := os.ReadFile(dest); err != nil || string(got) != "writer-test" { + t.Fatalf("write result = %q err=%v", got, err) + } + if !isDevicePath("/dev/sdz") || isDevicePath(dest) { + t.Fatal("isDevicePath helper failed") + } + if err := WriteImageWithProgress(context.Background(), src, "", nil); err == nil { + t.Fatal("expected empty destination error") + } + if err := WriteImageWithProgress(context.Background(), filepath.Join(dir, "missing"), dest, nil); err == nil { + t.Fatal("expected missing source error") + } +} diff --git a/pkg/writer/writer_test.go b/pkg/writer/writer_test.go index ee3bb3e..efab9a8 100644 --- a/pkg/writer/writer_test.go +++ b/pkg/writer/writer_test.go @@ -26,3 +26,54 @@ func TestWriteImageCopiesFile(t *testing.T) { t.Fatalf("expected %q got %q", string(content), string(got)) } } + +func TestWriteImageWithProgressAndCancel(t *testing.T) { + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + dest := filepath.Join(dir, "dest.img") + if err := os.WriteFile(src, []byte("metis-progress"), 0o644); err != nil { + t.Fatalf("write src: %v", err) + } + + var calls []int64 + if err := WriteImageWithProgress(context.Background(), src, dest, func(written, total int64) { + calls = append(calls, written) + if total <= 0 { + t.Fatalf("unexpected total: %d", total) + } + }); err != nil { + t.Fatalf("WriteImageWithProgress: %v", err) + } + if len(calls) == 0 || calls[len(calls)-1] != int64(len("metis-progress")) { + t.Fatalf("unexpected progress callbacks: %#v", calls) + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + if err := WriteImageWithProgress(ctx, src, filepath.Join(dir, "cancelled.img"), nil); err == nil { + t.Fatal("expected cancel error") + } +} + +func TestIsDevicePath(t *testing.T) { + if !isDevicePath("/dev/sdz") { + t.Fatal("expected /dev/sdz to be a device path") + } + if isDevicePath("/tmp/image.img") { + t.Fatal("did not expect regular file path to be treated as device") + } +} + +func TestWriteImageErrorBranches(t *testing.T) { + if err := WriteImageWithProgress(context.Background(), "missing-src", "", nil); err == nil { + t.Fatal("expected empty destination error before source lookup") + } + dir := t.TempDir() + src := filepath.Join(dir, "src.img") + if err := os.WriteFile(src, []byte("data"), 0o644); err != nil { + t.Fatal(err) + } + if err := WriteImageWithProgress(context.Background(), src, filepath.Join(dir, "missing", "dest.img"), nil); err != nil { + t.Fatalf("WriteImageWithProgress nested path: %v", err) + } +} diff --git a/scripts/publish_test_metrics.py b/scripts/publish_test_metrics.py index 7f0b21a..77e2b32 100644 --- a/scripts/publish_test_metrics.py +++ b/scripts/publish_test_metrics.py @@ -5,7 +5,6 @@ from __future__ import annotations import json import os -import sys import urllib.request import xml.etree.ElementTree as ET @@ -40,7 +39,6 @@ def _load_junit(path: str) -> dict[str, int]: except ValueError: return 0 - suites: list[ET.Element] if root.tag == "testsuite": suites = [root] elif root.tag == "testsuites": @@ -66,16 +64,8 @@ def _load_exit_code(path: str) -> int | None: return None try: return int(raw) - except ValueError: - raise RuntimeError(f"invalid test exit code {raw!r} in {path}") - - -def _read_http(url: str) -> str: - try: - with urllib.request.urlopen(url, timeout=10) as resp: - return resp.read().decode("utf-8", errors="replace") - except Exception: - return "" + except ValueError as exc: + raise RuntimeError(f"invalid test exit code {raw!r} in {path}") from exc def _post_text(url: str, payload: str) -> None: @@ -90,26 +80,6 @@ def _post_text(url: str, payload: str) -> None: raise RuntimeError(f"metrics push failed status={resp.status}") -def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float: - text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics") - if not text: - return 0.0 - - for line in text.splitlines(): - if not line.startswith(metric + "{"): - continue - if any(f'{k}="{v}"' not in line for k, v in labels.items()): - continue - parts = line.split() - if len(parts) < 2: - continue - try: - return float(parts[1]) - except ValueError: - return 0.0 - return 0.0 - - def main() -> int: coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json") junit_path = os.getenv("JUNIT_XML", "build/junit.xml") @@ -121,6 +91,7 @@ def main() -> int: branch = os.getenv("BRANCH_NAME", "") build_number = os.getenv("BUILD_NUMBER", "") commit = os.getenv("GIT_COMMIT", "") + strict = os.getenv("METRICS_STRICT", "") == "1" if not os.path.exists(coverage_path): raise RuntimeError(f"missing coverage file {coverage_path}") @@ -141,32 +112,14 @@ def main() -> int: ): outcome = "failed" - job_name = "platform-quality-ci" - ok_count = _fetch_existing_counter( - pushgateway_url, - "platform_quality_gate_runs_total", - {"job": job_name, "suite": suite, "status": "ok"}, - ) - failed_count = _fetch_existing_counter( - pushgateway_url, - "platform_quality_gate_runs_total", - {"job": job_name, "suite": suite, "status": "failed"}, - ) - if outcome == "ok": - ok_count += 1 - else: - failed_count += 1 - labels = { + "job": "platform-quality-ci", "suite": suite, "branch": branch, "build_number": build_number, "commit": commit, } payload_lines = [ - "# TYPE platform_quality_gate_runs_total counter", - f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}', - f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}', "# TYPE metis_quality_gate_tests_total gauge", f'metis_quality_gate_tests_total{{suite="{suite}",result="total"}} {totals["tests"]}', f'metis_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}', @@ -182,7 +135,13 @@ def main() -> int: f"metis_quality_gate_build_info{_label_str(labels)} 1", ] payload = "\n".join(payload_lines) + "\n" - _post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}", payload) + + try: + _post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{labels['job']}/suite/{suite}", payload) + except Exception as exc: + print(f"metrics push failed: {exc}") + if strict: + raise print( json.dumps( @@ -196,8 +155,6 @@ def main() -> int: "tests_skipped": totals["skipped"], "coverage_percent": round(coverage, 3), "test_exit_code": test_exit_code, - "ok_counter": ok_count, - "failed_counter": failed_count, }, indent=2, ) @@ -206,8 +163,4 @@ def main() -> int: if __name__ == "__main__": - try: - raise SystemExit(main()) - except Exception as exc: - print(f"metrics push failed: {exc}") - raise + raise SystemExit(main()) diff --git a/testing/config_test.go b/testing/config_test.go new file mode 100644 index 0000000..bda107a --- /dev/null +++ b/testing/config_test.go @@ -0,0 +1,58 @@ +package testing_test + +import ( + "testing" + + "metis/pkg/config" + "metis/pkg/inventory" +) + +func TestConfigBuildLabelsAndTaints(t *testing.T) { + inv := inventory.Inventory{ + Classes: []inventory.NodeClass{{ + Name: "c1", + Arch: "arm64", + OS: "linux", + Image: "file:///tmp/base.img", + DefaultLabels: map[string]string{"a": "1"}, + DefaultTaints: []string{"t1"}, + }}, + Nodes: []inventory.NodeSpec{{ + Name: "n1", + Class: "c1", + Hostname: "n1", + IP: "1.1.1.1", + K3sRole: "agent", + Labels: map[string]string{"b": "2"}, + Taints: []string{"t2"}, + LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/mnt/d1", UUID: "uuid-1", FS: "ext4"}}, + SSHUser: "ubuntu", + SSHAuthorized: []string{"key"}, + }}, + } + cfg, err := config.Build(&inv, "n1") + if err != nil { + t.Fatalf("Build: %v", err) + } + if got, want := cfg.Labels["a"], "1"; got != want { + t.Fatalf("label a = %q, want %q", got, want) + } + if got, want := cfg.Labels["b"], "2"; got != want { + t.Fatalf("label b = %q, want %q", got, want) + } + if len(cfg.Taints) != 2 || cfg.Taints[0] != "t1" || cfg.Taints[1] != "t2" { + t.Fatalf("taints = %#v", cfg.Taints) + } + if len(cfg.Fstab) != 1 { + t.Fatalf("fstab entries = %d, want 1", len(cfg.Fstab)) + } + if got, want := cfg.Fstab[0].Mountpoint, "/mnt/d1"; got != want { + t.Fatalf("fstab mountpoint = %q, want %q", got, want) + } + if got, want := cfg.Fstab[0].UUID, "uuid-1"; got != want { + t.Fatalf("fstab uuid = %q, want %q", got, want) + } + if got, want := cfg.K3s.Role, "agent"; got != want { + t.Fatalf("k3s role = %q, want %q", got, want) + } +} diff --git a/testing/coverage_policy.json b/testing/coverage_policy.json new file mode 100644 index 0000000..92b515d --- /dev/null +++ b/testing/coverage_policy.json @@ -0,0 +1,41 @@ +{ + "target_percent": 95, + "files": { + "metis/cmd/metis-sentinel/main.go": 0, + "metis/cmd/metis/config_cmd.go": 0, + "metis/cmd/metis/facts_cmd.go": 0, + "metis/cmd/metis/image_cmd.go": 0, + "metis/cmd/metis/inject_cmd.go": 0, + "metis/cmd/metis/main.go": 0, + "metis/cmd/metis/remote_cmd.go": 4.3, + "metis/cmd/metis/serve_cmd.go": 0, + "metis/pkg/config/config.go": 0, + "metis/pkg/facts/aggregate.go": 92, + "metis/pkg/facts/load.go": 80, + "metis/pkg/facts/recommend.go": 100, + "metis/pkg/facts/targets.go": 95, + "metis/pkg/image/download.go": 64.9, + "metis/pkg/image/rootfs.go": 41.1, + "metis/pkg/inject/inject.go": 0, + "metis/pkg/inventory/types.go": 75.4, + "metis/pkg/mount/mount.go": 0, + "metis/pkg/plan/burn.go": 0, + "metis/pkg/plan/image_build.go": 0, + "metis/pkg/plan/inject.go": 60.5, + "metis/pkg/plan/plan.go": 68.4, + "metis/pkg/secrets/vault.go": 70.8, + "metis/pkg/sentinel/collector.go": 0, + "metis/pkg/service/app.go": 66.6, + "metis/pkg/service/app_helpers.go": 40, + "metis/pkg/service/artifacts.go": 28.5, + "metis/pkg/service/cluster.go": 5, + "metis/pkg/service/harbor.go": 0, + "metis/pkg/service/metrics.go": 85.8, + "metis/pkg/service/remote.go": 0, + "metis/pkg/service/remote_helpers.go": 24.2, + "metis/pkg/service/server.go": 53.9, + "metis/pkg/service/settings.go": 0, + "metis/pkg/util/run.go": 0, + "metis/pkg/writer/writer.go": 70 + } +} diff --git a/testing/gate_test.go b/testing/gate_test.go new file mode 100644 index 0000000..e0653ce --- /dev/null +++ b/testing/gate_test.go @@ -0,0 +1,250 @@ +package testing_test + +import ( + "bufio" + "encoding/json" + "fmt" + "go/ast" + "go/parser" + "go/token" + "math" + "os" + "os/exec" + "path/filepath" + "sort" + "strconv" + "strings" + "testing" +) + +type coveragePolicy struct { + TargetPercent float64 `json:"target_percent"` + Files map[string]float64 `json:"files"` +} + +func TestSourceFileLineLimit(t *testing.T) { + root := repoRoot(t) + var offenders []string + for _, relRoot := range []string{"cmd", "pkg", "scripts", "testing"} { + walkSourceFiles(t, filepath.Join(root, relRoot), func(path string, info os.DirEntry) error { + if info.IsDir() { + return nil + } + switch filepath.Ext(path) { + case ".go", ".py", ".sh": + lines, err := countLines(path) + if err != nil { + return err + } + if lines > 500 { + offenders = append(offenders, fmt.Sprintf("%s:%d", rel(root, path), lines)) + } + } + return nil + }) + } + if len(offenders) > 0 { + sort.Strings(offenders) + t.Fatalf("source files exceed 500 LOC: %s", strings.Join(offenders, ", ")) + } +} + +func TestExportedDocs(t *testing.T) { + root := repoRoot(t) + var missing []string + fset := token.NewFileSet() + walkSourceFiles(t, root, func(path string, info os.DirEntry) error { + if info.IsDir() || filepath.Ext(path) != ".go" || strings.HasSuffix(path, "_test.go") { + return nil + } + if !strings.HasPrefix(rel(root, path), "cmd/") && !strings.HasPrefix(rel(root, path), "pkg/") { + return nil + } + file, err := parser.ParseFile(fset, path, nil, parser.ParseComments) + if err != nil { + return err + } + for _, decl := range file.Decls { + switch d := decl.(type) { + case *ast.FuncDecl: + if d.Name.IsExported() && !hasUsefulDoc(d.Doc, d.Name.Name) { + missing = append(missing, fmt.Sprintf("%s:%s", rel(root, path), d.Name.Name)) + } + case *ast.GenDecl: + for _, spec := range d.Specs { + switch s := spec.(type) { + case *ast.TypeSpec: + if s.Name.IsExported() && !hasUsefulDoc(d.Doc, s.Name.Name) { + missing = append(missing, fmt.Sprintf("%s:%s", rel(root, path), s.Name.Name)) + } + } + } + } + } + return nil + }) + if len(missing) > 0 { + sort.Strings(missing) + t.Fatalf("exported declarations without useful docs: %s", strings.Join(missing, ", ")) + } +} + +func TestGoFmtAndVet(t *testing.T) { + root := repoRoot(t) + gofmt := exec.Command("gofmt", "-l", "cmd", "pkg", "testing") + gofmt.Dir = root + out, err := gofmt.CombinedOutput() + if err != nil { + t.Fatalf("gofmt check failed: %v\n%s", err, out) + } + if trimmed := strings.TrimSpace(string(out)); trimmed != "" { + t.Fatalf("gofmt -l reported files:\n%s", trimmed) + } + vet := exec.Command("go", "vet", "./...") + vet.Dir = root + out, err = vet.CombinedOutput() + if err != nil { + t.Fatalf("go vet failed: %v\n%s", err, out) + } +} + +func TestCoveragePolicy(t *testing.T) { + root := repoRoot(t) + coveragePath := filepath.Join(root, "build", "coverage.out") + if _, err := os.Stat(coveragePath); err != nil { + cmd := exec.Command("go", "test", "./...", "-coverprofile=build/coverage.out") + cmd.Dir = root + out, runErr := cmd.CombinedOutput() + if runErr != nil { + t.Fatalf("root coverage run failed: %v\n%s", runErr, out) + } + } + policyPath := filepath.Join(root, "testing", "coverage_policy.json") + policy := loadCoveragePolicy(t, policyPath) + actual := readCoverageProfile(t, coveragePath) + var regressions []string + var phased []string + for file, min := range policy.Files { + got, ok := actual[file] + if !ok { + regressions = append(regressions, fmt.Sprintf("%s missing from coverage", file)) + continue + } + if got+0.05 < min { + regressions = append(regressions, fmt.Sprintf("%s %.1f < %.1f", file, got, min)) + } + if got < policy.TargetPercent { + phased = append(phased, fmt.Sprintf("%s=%.1f", file, got)) + } + } + if len(regressions) > 0 { + sort.Strings(regressions) + t.Fatalf("coverage regressed: %s", strings.Join(regressions, ", ")) + } + if len(phased) > 0 { + sort.Strings(phased) + t.Fatalf("coverage below target %.1f%%: %s", policy.TargetPercent, strings.Join(phased, ", ")) + } +} + +func countLines(path string) (int, error) { + f, err := os.Open(path) + if err != nil { + return 0, err + } + defer f.Close() + s := bufio.NewScanner(f) + count := 0 + for s.Scan() { + count++ + } + return count, s.Err() +} + +func rel(root, path string) string { + out, err := filepath.Rel(root, path) + if err != nil { + return path + } + return filepath.ToSlash(out) +} + +func hasUsefulDoc(comment *ast.CommentGroup, name string) bool { + if comment == nil { + return false + } + text := strings.TrimSpace(comment.Text()) + if text == "" { + return false + } + if len(strings.Fields(text)) < 4 { + return false + } + return strings.Contains(strings.ToLower(text), strings.ToLower(name[:1])) || len(text) > len(name)+12 +} + +func loadCoveragePolicy(t *testing.T, path string) coveragePolicy { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + var policy coveragePolicy + if err := json.Unmarshal(data, &policy); err != nil { + t.Fatal(err) + } + if policy.TargetPercent == 0 { + policy.TargetPercent = 95 + } + if policy.Files == nil { + policy.Files = map[string]float64{} + } + return policy +} + +func readCoverageProfile(t *testing.T, path string) map[string]float64 { + t.Helper() + f, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer f.Close() + stats := map[string]struct{ covered, total int }{} + s := bufio.NewScanner(f) + for s.Scan() { + line := strings.TrimSpace(s.Text()) + if line == "" || strings.HasPrefix(line, "mode:") { + continue + } + parts := strings.Fields(line) + if len(parts) != 3 { + continue + } + file := strings.SplitN(parts[0], ":", 2)[0] + stmts, err := strconv.Atoi(parts[1]) + if err != nil { + continue + } + count, err := strconv.Atoi(parts[2]) + if err != nil { + continue + } + entry := stats[file] + entry.total += stmts + if count > 0 { + entry.covered += stmts + } + stats[file] = entry + } + if err := s.Err(); err != nil { + t.Fatal(err) + } + out := map[string]float64{} + for file, stat := range stats { + if stat.total == 0 { + continue + } + out[file] = math.Round((float64(stat.covered)/float64(stat.total))*1000) / 10 + } + return out +} diff --git a/testing/go.mod b/testing/go.mod new file mode 100644 index 0000000..01fdb5b --- /dev/null +++ b/testing/go.mod @@ -0,0 +1,9 @@ +module metis-testing + +go 1.23.1 + +require metis v0.0.0 + +require gopkg.in/yaml.v3 v3.0.1 // indirect + +replace metis => .. diff --git a/testing/go.sum b/testing/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/testing/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/testing/helpers_test.go b/testing/helpers_test.go new file mode 100644 index 0000000..f73efc0 --- /dev/null +++ b/testing/helpers_test.go @@ -0,0 +1,46 @@ +package testing_test + +import ( + "io/fs" + "os" + "path/filepath" + "runtime" + "testing" +) + +func repoRoot(t *testing.T) string { + t.Helper() + _, file, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("could not resolve testing module location") + } + return filepath.Clean(filepath.Join(filepath.Dir(file), "..")) +} + +func walkSourceFiles(t *testing.T, root string, fn func(path string, info fs.DirEntry) error) { + t.Helper() + walkErr := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + switch d.Name() { + case ".git", ".venv", ".venv-ci", "build", "tmp", "artifacts", ".pytest_cache", ".ruff_cache": + return filepath.SkipDir + } + } + return fn(path, d) + }) + if walkErr != nil { + t.Fatal(walkErr) + } +} + +func readFile(t *testing.T, path string) []byte { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + return data +} diff --git a/testing/image_test.go b/testing/image_test.go new file mode 100644 index 0000000..90edca7 --- /dev/null +++ b/testing/image_test.go @@ -0,0 +1,47 @@ +package testing_test + +import ( + "crypto/md5" + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "testing" + + "metis/pkg/image" +) + +func TestDownloadFileURL(t *testing.T) { + tmp := t.TempDir() + src := filepath.Join(tmp, "src.bin") + if err := os.WriteFile(src, []byte("hello"), 0o644); err != nil { + t.Fatal(err) + } + dest := filepath.Join(tmp, "dest.bin") + if err := image.Download("file://"+src, dest); err != nil { + t.Fatalf("Download: %v", err) + } + data, err := os.ReadFile(dest) + if err != nil { + t.Fatal(err) + } + if string(data) != "hello" { + t.Fatalf("downloaded content = %q", data) + } +} + +func TestChecksumHelpers(t *testing.T) { + tmp := t.TempDir() + path := filepath.Join(tmp, "file.bin") + if err := os.WriteFile(path, []byte("abc"), 0o644); err != nil { + t.Fatal(err) + } + sha := sha256.Sum256([]byte("abc")) + if err := image.VerifyChecksum(path, "sha256:"+hex.EncodeToString(sha[:])); err != nil { + t.Fatalf("VerifyChecksum sha256: %v", err) + } + md5sum := md5.Sum([]byte("abc")) + if err := image.VerifyChecksum(path, "md5:"+hex.EncodeToString(md5sum[:])); err != nil { + t.Fatalf("VerifyChecksum md5: %v", err) + } +} diff --git a/testing/inject_test.go b/testing/inject_test.go new file mode 100644 index 0000000..ab3583a --- /dev/null +++ b/testing/inject_test.go @@ -0,0 +1,28 @@ +package testing_test + +import ( + "path/filepath" + "testing" + + "metis/pkg/inject" +) + +func TestInjectWrite(t *testing.T) { + tmp := t.TempDir() + boot := filepath.Join(tmp, "boot") + root := filepath.Join(tmp, "root") + inj := inject.Injector{BootPath: boot, RootPath: root} + files := []inject.FileSpec{ + {Path: "config.txt", Content: []byte("bootcfg"), Mode: 0o644, RootFS: false}, + {Path: "etc/hostname", Content: []byte("node"), Mode: 0o644, RootFS: true}, + } + if err := inj.Write(files); err != nil { + t.Fatalf("Write: %v", err) + } + if got := readFile(t, filepath.Join(boot, "config.txt")); string(got) != "bootcfg" { + t.Fatalf("boot file = %q", got) + } + if got := readFile(t, filepath.Join(root, "etc/hostname")); string(got) != "node" { + t.Fatalf("root file = %q", got) + } +} diff --git a/testing/inventory_test.go b/testing/inventory_test.go new file mode 100644 index 0000000..f82ca81 --- /dev/null +++ b/testing/inventory_test.go @@ -0,0 +1,56 @@ +package testing_test + +import ( + "os" + "path/filepath" + "testing" + + "metis/pkg/inventory" +) + +func TestLoadAndFindNode(t *testing.T) { + invPath := filepath.Join(t.TempDir(), "inventory.yaml") + if err := os.WriteFile(invPath, []byte(` +classes: + - name: rpi5 + arch: arm64 + os: ubuntu + image: file:///tmp/base.img + checksum: sha256:deadbeef + default_labels: + hardware: rpi5 +nodes: + - name: titan-04 + class: rpi5 + hostname: titan-04 + ip: 192.168.22.30 + k3s_role: agent +`), 0o644); err != nil { + t.Fatal(err) + } + inv, err := inventory.Load(invPath) + if err != nil { + t.Fatalf("Load: %v", err) + } + node, class, err := inv.FindNode("titan-04") + if err != nil { + t.Fatalf("FindNode: %v", err) + } + if node.Hostname != "titan-04" { + t.Fatalf("hostname = %q", node.Hostname) + } + if class.Arch != "arm64" { + t.Fatalf("arch = %q", class.Arch) + } +} + +func TestFindNodeMissing(t *testing.T) { + inv := inventory.Inventory{Classes: nil, Nodes: nil} + node, class, err := inv.FindNode("missing") + if err == nil { + t.Fatal("expected missing node error") + } + if node != nil || class != nil { + t.Fatalf("unexpected node/class: %#v %#v", node, class) + } +} diff --git a/testing/plan_cli_test.go b/testing/plan_cli_test.go new file mode 100644 index 0000000..f02c316 --- /dev/null +++ b/testing/plan_cli_test.go @@ -0,0 +1,121 @@ +package testing_test + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +func TestPlanCLIProducesActions(t *testing.T) { + root := repoRoot(t) + dummy := filepath.Join(t.TempDir(), "dummy.img") + if err := os.WriteFile(dummy, []byte("dummy"), 0o644); err != nil { + t.Fatal(err) + } + checksum := sha256.Sum256(readFile(t, dummy)) + invPath := filepath.Join(t.TempDir(), "inv.yaml") + inv := fmt.Sprintf(`{ + "classes": [ + { + "name": "test-class", + "arch": "arm64", + "os": "testos", + "image": "file://%s", + "checksum": "sha256:%s", + "default_labels": {"role": "worker"} + } + ], + "nodes": [ + { + "name": "node-a", + "class": "test-class", + "hostname": "node-a", + "ip": "10.0.0.10", + "k3s_role": "agent" + } + ] +}`, + dummy, + hex.EncodeToString(checksum[:]), + ) + if err := os.WriteFile(invPath, []byte(inv), 0o644); err != nil { + t.Fatal(err) + } + cmd := exec.Command("go", "run", "./cmd/metis", "plan", "--inventory", invPath, "--node", "node-a", "--device", "/dev/sdz", "--cache", filepath.Join(t.TempDir(), "cache")) + cmd.Dir = root + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("plan command failed: %v\n%s", err, out) + } + var plan struct { + Node string `json:"node"` + Actions []struct { + Type string `json:"type"` + } `json:"actions"` + } + if err := json.Unmarshal(out, &plan); err != nil { + t.Fatalf("decode plan: %v\n%s", err, out) + } + if plan.Node != "node-a" { + t.Fatalf("node = %q", plan.Node) + } + for _, action := range plan.Actions { + if action.Type == "fetch" { + return + } + } + t.Fatal("expected a fetch action") +} + +func TestBurnDryRunPrintsPlan(t *testing.T) { + root := repoRoot(t) + dummy := filepath.Join(t.TempDir(), "dummy.img") + if err := os.WriteFile(dummy, []byte("dummy"), 0o644); err != nil { + t.Fatal(err) + } + checksum := sha256.Sum256(readFile(t, dummy)) + invPath := filepath.Join(t.TempDir(), "inv.yaml") + inv := fmt.Sprintf(`{ + "classes": [ + { + "name": "test-class", + "arch": "arm64", + "os": "testos", + "image": "file://%s", + "checksum": "sha256:%s", + "default_labels": {"role": "worker"} + } + ], + "nodes": [ + { + "name": "node-a", + "class": "test-class", + "hostname": "node-a", + "ip": "10.0.0.10", + "k3s_role": "agent" + } + ] +}`, + dummy, + hex.EncodeToString(checksum[:]), + ) + if err := os.WriteFile(invPath, []byte(inv), 0o644); err != nil { + t.Fatal(err) + } + cmd := exec.Command("go", "run", "./cmd/metis", "burn", "--inventory", invPath, "--node", "node-a", "--device", "/dev/sdz", "--cache", filepath.Join(t.TempDir(), "cache")) + cmd.Dir = root + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("burn command failed: %v\n%s", err, out) + } + got := string(out) + if !strings.Contains(got, "Dry run") && !strings.Contains(got, "Plan for") { + t.Fatalf("unexpected output: %s", got) + } +} diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 7c7605d..0000000 --- a/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# pytest package marker diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index fd17823..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,36 +0,0 @@ -from metis.pkg import inventory, config - -def test_config_build_labels_and_taints(): - inv = inventory.Inventory( - classes=[ - inventory.NodeClass( - Name="c1", - Arch="arm64", - OS="linux", - Image="file:///tmp/base.img", - DefaultLabels={"a": "1"}, - DefaultTaints=["t1"], - ) - ], - Nodes=[ - inventory.NodeSpec( - Name="n1", - Class="c1", - Hostname="n1", - IP="1.1.1.1", - K3sRole="agent", - Labels={"b": "2"}, - Taints=["t2"], - LonghornDisks=[inventory.LonghornDisk(Mountpoint="/mnt/d1", UUID="uuid-1", FS="ext4")], - SSHUser="ubuntu", - SSHAuthorized=["key"], - ) - ], - ) - cfg, err = config.Build(inv, "n1") - assert err is None - assert cfg.Labels == {"a": "1", "b": "2"} - assert cfg.Taints == ["t1", "t2"] - assert cfg.Fstab[0].Mountpoint == "/mnt/d1" - assert cfg.Fstab[0].UUID == "uuid-1" - assert cfg.K3s.Role == "agent" diff --git a/tests/test_image.py b/tests/test_image.py deleted file mode 100644 index e8d7384..0000000 --- a/tests/test_image.py +++ /dev/null @@ -1,30 +0,0 @@ -import hashlib -from pathlib import Path - -from metis.pkg import image - - -def test_download_file_url(tmp_path): - src = tmp_path / "src.bin" - src.write_bytes(b"hello") - dest = tmp_path / "dest.bin" - image.Download(f"file://{src}", dest) - assert dest.read_bytes() == b"hello" - - -def test_checksum_ok(tmp_path): - f = tmp_path / "file.bin" - f.write_bytes(b"abc") - checksum = "sha256:" + hashlib.sha256(b"abc").hexdigest() - image.VerifyChecksum(f, checksum) - - -def test_checksum_bad(tmp_path): - f = tmp_path / "file.bin" - f.write_bytes(b"abc") - checksum = "sha256:deadbeef" - try: - image.VerifyChecksum(f, checksum) - except Exception: - return - raise AssertionError("expected checksum failure") diff --git a/tests/test_inject.py b/tests/test_inject.py deleted file mode 100644 index d309805..0000000 --- a/tests/test_inject.py +++ /dev/null @@ -1,16 +0,0 @@ -from pathlib import Path - -from metis.pkg.inject import Injector, FileSpec - - -def test_inject_write(tmp_path): - boot = tmp_path / "boot" - root = tmp_path / "root" - inj = Injector(BootPath=str(boot), RootPath=str(root)) - files = [ - FileSpec(Path="config.txt", Content=b"bootcfg", Mode=0o644, RootFS=False), - FileSpec(Path="etc/hostname", Content=b"node", Mode=0o644, RootFS=True), - ] - inj.Write(files) - assert (boot / "config.txt").read_bytes() == b"bootcfg" - assert (root / "etc/hostname").read_bytes() == b"node" diff --git a/tests/test_inventory.py b/tests/test_inventory.py deleted file mode 100644 index 7eb771a..0000000 --- a/tests/test_inventory.py +++ /dev/null @@ -1,46 +0,0 @@ -import json -import tempfile -from pathlib import Path - -import pytest - -from metis.pkg import inventory - -def test_load_and_find_node(): - data = { - "classes": [ - { - "name": "rpi5", - "arch": "arm64", - "os": "ubuntu", - "image": "file:///tmp/base.img", - "checksum": "sha256:deadbeef", - "default_labels": {"hardware": "rpi5"}, - } - ], - "nodes": [ - { - "name": "titan-04", - "class": "rpi5", - "hostname": "titan-04", - "ip": "192.168.22.30", - "k3s_role": "agent", - } - ], - } - with tempfile.TemporaryDirectory() as tmp: - path = Path(tmp) / "inv.yaml" - path.write_text(json.dumps(data)) - inv = inventory.Load(path) - node, cls, err = inv.FindNode("titan-04") - assert err is None - assert node.Hostname == "titan-04" - assert cls.Arch == "arm64" - - -def test_find_node_missing(): - inv = inventory.Inventory(classes=[], nodes=[]) - node, cls, err = inv.FindNode("missing") - assert err is not None - assert node is None - assert cls is None diff --git a/tests/test_plan_cli.py b/tests/test_plan_cli.py deleted file mode 100644 index 5b8fbfb..0000000 --- a/tests/test_plan_cli.py +++ /dev/null @@ -1,75 +0,0 @@ -import json -import subprocess -from pathlib import Path - -import pytest - -ROOT = Path(__file__).resolve().parents[1] - - -def write_inv(tmpdir, image_path, checksum): - inv = { - "classes": [ - { - "name": "test-class", - "arch": "arm64", - "os": "testos", - "image": f"file://{image_path}", - "checksum": checksum, - "default_labels": {"role": "worker"}, - } - ], - "nodes": [ - { - "name": "node-a", - "class": "test-class", - "hostname": "node-a", - "ip": "10.0.0.10", - "k3s_role": "agent", - } - ], - } - inv_path = Path(tmpdir) / "inv.yaml" - inv_path.write_text(json.dumps(inv)) - return inv_path - - -def test_plan_output_contains_actions(tmp_path): - dummy = tmp_path / "dummy.img" - dummy.write_bytes(b"dummy") - import hashlib - - checksum = "sha256:" + hashlib.sha256(dummy.read_bytes()).hexdigest() - inv_path = write_inv(tmp_path, dummy, checksum) - cache_dir = tmp_path / "cache" - cmd = ["go", "run", "./cmd/metis", "plan", "--inventory", str(inv_path), "--node", "node-a", "--device", "/dev/sdz", "--cache", str(cache_dir)] - out = subprocess.check_output(cmd, cwd=ROOT) - plan = json.loads(out) - assert plan["node"] == "node-a" - assert any(a["type"] == "fetch" for a in plan["actions"]) - - -def test_burn_dry_run(tmp_path): - dummy = tmp_path / "dummy.img" - dummy.write_bytes(b"dummy") - import hashlib - - checksum = "sha256:" + hashlib.sha256(dummy.read_bytes()).hexdigest() - inv_path = write_inv(tmp_path, dummy, checksum) - cache_dir = tmp_path / "cache" - cmd = [ - "go", - "run", - "./cmd/metis", - "burn", - "--inventory", - str(inv_path), - "--node", - "node-a", - "--device", - "/dev/sdz", - "--cache", - str(cache_dir), - ] - out = subprocess.check_output(cmd, cwd=ROOT, text=True) - assert "Dry run" in out or "Plan for" in out