diff --git a/cmd/metis/main.go b/cmd/metis/main.go new file mode 100644 index 0000000..9144ba4 --- /dev/null +++ b/cmd/metis/main.go @@ -0,0 +1,88 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "log" + "os" + "path/filepath" + + "metis/pkg/inventory" + "metis/pkg/plan" +) + +func main() { + if len(os.Args) < 2 { + usage() + os.Exit(1) + } + switch os.Args[1] { + case "plan": + planCmd(os.Args[2:]) + case "burn": + burnCmd(os.Args[2:]) + default: + usage() + os.Exit(1) + } +} + +func usage() { + fmt.Fprintf(os.Stderr, "Usage: metis [options]\n") +} + +func loadInventory(path string) *inventory.Inventory { + inv, err := inventory.Load(path) + if err != nil { + log.Fatalf("load inventory: %v", err) + } + return inv +} + +func planCmd(args []string) { + fs := flag.NewFlagSet("plan", flag.ExitOnError) + invPath := fs.String("inventory", "inventory.yaml", "inventory file") + node := fs.String("node", "", "target node") + device := fs.String("device", "/dev/sdX", "target block device") + cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir") + fs.Parse(args) + if *node == "" { + log.Fatalf("--node is required") + } + inv := loadInventory(*invPath) + p, err := plan.Build(inv, *node, *device, *cache) + if err != nil { + log.Fatalf("build plan: %v", err) + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + _ = enc.Encode(p) +} + +func burnCmd(args []string) { + fs := flag.NewFlagSet("burn", flag.ExitOnError) + invPath := fs.String("inventory", "inventory.yaml", "inventory file") + node := fs.String("node", "", "target node") + device := fs.String("device", "", "target block device (e.g. /dev/sdX)") + cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir") + confirm := fs.Bool("yes", false, "actually write to device") + fs.Parse(args) + if *node == "" || *device == "" { + log.Fatalf("--node and --device are required") + } + inv := loadInventory(*invPath) + p, err := plan.Build(inv, *node, *device, *cache) + if err != nil { + log.Fatalf("build plan: %v", err) + } + fmt.Printf("Plan for %s to %s:\n", p.Node, p.Device) + for _, a := range p.Actions { + fmt.Printf("- [%s] %s\n", a.Type, a.Detail) + } + if !*confirm { + fmt.Printf("\nDry run. Re-run with --yes to execute (not yet implemented).\n") + return + } + log.Fatalf("burn execution not yet implemented; follow plan commands manually") +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..12a75ea --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module metis + +go 1.22.0 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/inventory.example.yaml b/inventory.example.yaml new file mode 100644 index 0000000..15c26bf --- /dev/null +++ b/inventory.example.yaml @@ -0,0 +1,56 @@ +# Example inventory for Metis +classes: + - name: rpi5-ubuntu-worker + arch: arm64 + os: ubuntu-24.04 + image: https://harbor.bstein.dev/library/rpi5-ubuntu-worker.img + checksum: sha256:REPLACE_ME + default_labels: + hardware: rpi5 + node-role.kubernetes.io/worker: "true" + default_taints: [] + - name: rpi4-armbian-longhorn + arch: arm64 + os: armbian-6.6 + image: https://harbor.bstein.dev/library/rpi4-armbian-longhorn.img + checksum: sha256:REPLACE_ME + default_labels: + hardware: rpi4 + longhorn: "true" + node-role.kubernetes.io/worker: "true" + default_taints: [] + - name: control-plane + arch: arm64 + os: ubuntu-24.04 + image: https://harbor.bstein.dev/library/rpi5-ubuntu-control.img + checksum: sha256:REPLACE_ME + default_labels: + node-role.kubernetes.io/control-plane: "true" + default_taints: + - node-role.kubernetes.io/control-plane:NoSchedule + +nodes: + - name: titan-04 + class: rpi5-ubuntu-worker + hostname: titan-04 + ip: 192.168.22.30 + k3s_role: agent + labels: + hardware: rpi5 + ssh_user: ubuntu + - name: titan-13 + class: rpi4-armbian-longhorn + hostname: titan-13 + ip: 192.168.22.41 + k3s_role: agent + labels: + hardware: rpi4 + longhorn: "true" + longhorn_disks: + - mountpoint: /mnt/astreae + uuid: 6031fa8b-f28c-45c3-b7bc-6133300e07c6 + fs: ext4 + - mountpoint: /mnt/asteria + uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae + fs: ext4 + ssh_user: root diff --git a/pkg/inventory/types.go b/pkg/inventory/types.go new file mode 100644 index 0000000..a005d64 --- /dev/null +++ b/pkg/inventory/types.go @@ -0,0 +1,85 @@ +package inventory + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +// Inventory is the root document defining node classes and per-node specs. +type Inventory struct { + Classes []NodeClass `yaml:"classes"` + Nodes []NodeSpec `yaml:"nodes"` +} + +// NodeClass defines a reusable image/config for a group of nodes. +type NodeClass struct { + Name string `yaml:"name"` + Arch string `yaml:"arch"` + OS string `yaml:"os"` + Image string `yaml:"image"` + Checksum string `yaml:"checksum,omitempty"` + BootloaderNote string `yaml:"bootloader_note,omitempty"` + DefaultLabels map[string]string `yaml:"default_labels,omitempty"` + DefaultTaints []string `yaml:"default_taints,omitempty"` + CloudInit string `yaml:"cloud_init,omitempty"` +} + +// NodeSpec captures per-node overrides and identity. +type NodeSpec struct { + Name string `yaml:"name"` + Class string `yaml:"class"` + Hostname string `yaml:"hostname"` + IP string `yaml:"ip"` + MAC string `yaml:"mac,omitempty"` + K3sRole string `yaml:"k3s_role"` + K3sToken string `yaml:"k3s_token,omitempty"` + K3sURL string `yaml:"k3s_url,omitempty"` + Labels map[string]string `yaml:"labels,omitempty"` + Taints []string `yaml:"taints,omitempty"` + LonghornDisks []LonghornDisk `yaml:"longhorn_disks,omitempty"` + SSHUser string `yaml:"ssh_user,omitempty"` + SSHAuthorized []string `yaml:"ssh_authorized_keys,omitempty"` + Notes string `yaml:"notes,omitempty"` +} + +// LonghornDisk describes an attached disk to mount for Longhorn. +type LonghornDisk struct { + Mountpoint string `yaml:"mountpoint"` + UUID string `yaml:"uuid"` + FS string `yaml:"fs,omitempty"` +} + +// Load reads and parses an inventory file. +func Load(path string) (*Inventory, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read inventory: %w", err) + } + var inv Inventory + if err := yaml.Unmarshal(data, &inv); err != nil { + return nil, fmt.Errorf("parse inventory: %w", err) + } + return &inv, nil +} + +// FindNode returns the node spec and class. +func (i *Inventory) FindNode(name string) (*NodeSpec, *NodeClass, error) { + var node *NodeSpec + for idx := range i.Nodes { + if i.Nodes[idx].Name == name { + node = &i.Nodes[idx] + break + } + } + if node == nil { + return nil, nil, fmt.Errorf("node %s not found", name) + } + for idx := range i.Classes { + if i.Classes[idx].Name == node.Class { + return node, &i.Classes[idx], nil + } + } + return node, nil, fmt.Errorf("class %s not found for node %s", node.Class, node.Name) +} diff --git a/pkg/plan/plan.go b/pkg/plan/plan.go new file mode 100644 index 0000000..f0af547 --- /dev/null +++ b/pkg/plan/plan.go @@ -0,0 +1,59 @@ +package plan + +import ( + "fmt" + "path/filepath" + "time" + + "metis/pkg/inventory" +) + +// Action describes a step in the burn process. +type Action struct { + Type string `json:"type"` + Detail string `json:"detail"` + Command string `json:"command,omitempty"` +} + +// Plan describes the overall burn for a node. +type Plan struct { + Node string `json:"node"` + Device string `json:"device"` + Image string `json:"image"` + Class string `json:"class"` + Actions []Action `json:"actions"` +} + +// Build constructs a plan without executing it. +func Build(inv *inventory.Inventory, nodeName, device, cacheDir string) (*Plan, error) { + node, class, err := inv.FindNode(nodeName) + if err != nil { + return nil, err + } + if device == "" { + device = "/dev/sdX" // placeholder + } + cacheImage := filepath.Join(cacheDir, filepath.Base(class.Image)) + actions := []Action{ + {Type: "fetch", Detail: fmt.Sprintf("Download %s to %s", class.Image, cacheImage)}, + } + if class.Checksum != "" { + actions = append(actions, Action{Type: "verify", Detail: fmt.Sprintf("Verify checksum %s", class.Checksum)}) + } + actions = append(actions, Action{Type: "write", Detail: fmt.Sprintf("Write image to %s", device), Command: fmt.Sprintf("dd if=%s of=%s bs=4M status=progress conv=fsync", cacheImage, device)}) + actions = append(actions, Action{Type: "inject", Detail: "Inject hostname/network/k3s config into boot or rootfs"}) + actions = append(actions, Action{Type: "finalize", Detail: fmt.Sprintf("Ready to insert SD for %s", node.Hostname)}) + + return &Plan{ + Node: nodeName, + Device: device, + Image: class.Image, + Class: class.Name, + Actions: actions, + }, nil +} + +// NextRunStale returns true if the last success was older than the given duration. +func NextRunStale(lastSuccess time.Time, maxAge time.Duration) bool { + return time.Since(lastSuccess) > maxAge +}