metis: add USB scratch inventory support
This commit is contained in:
parent
cb2498b1df
commit
6d0351f4b3
@ -6,13 +6,13 @@ Metis produces fully configured recovery SD cards for any node in the lab (RPi 4
|
||||
- Cross-platform (Linux + Windows) CLI/GUI with dead-simple UX.
|
||||
- Pull class-specific golden images from Harbor (or other artifact store), inject per-node config, and write/verify SD cards.
|
||||
- Minimal image set via node classes; inject per-node deltas at burn time.
|
||||
- Idempotent bootstraps: hostname/IP, k3s server/agent setup, labels/taints, journald/log GC drop-ins, Longhorn mount validation, SSH keys/users.
|
||||
- Idempotent bootstraps: hostname/IP, k3s server/agent setup, labels/taints, journald/log GC drop-ins, Longhorn and USB scratch mount validation, SSH keys/users.
|
||||
- Works offline once artifacts are cached; verifies hashes/signatures before writing.
|
||||
|
||||
## Planned high-level workflow
|
||||
1) Select target node (from inventory) + target disk.
|
||||
2) Tool downloads/caches the right golden image for that node class.
|
||||
3) Injects per-node config (net, k3s tokens/roles/labels/taints, SSH keys, runtime drop-ins, Longhorn mount metadata) and writes SD.
|
||||
3) Injects per-node config (net, k3s tokens/roles/labels/taints, SSH keys, runtime drop-ins, Longhorn mount metadata, USB scratch bind layout) and writes SD.
|
||||
4) Verifies write; prints next-step: "insert and power on." No manual follow-up.
|
||||
|
||||
## Early design notes
|
||||
@ -45,7 +45,7 @@ Metis produces fully configured recovery SD cards for any node in the lab (RPi 4
|
||||
- Vault: Metis can read per-node secrets from `secret/data/nodes/<hostname>` using VAULT_ADDR plus either VAULT_TOKEN or AppRole (VAULT_ROLE_ID/VAULT_SECRET_ID). Expected fields: ssh_password, k3s_token, cloud_init, extra map.
|
||||
- Sentinel: `metis-sentinel` collects host facts and can either print them, write local history, or push them into the Metis service. The intended deployment shape is a DaemonSet on cluster nodes plus an Ariadne-triggered Metis watch that recomputes recommended class targets and drift history.
|
||||
- Facts aggregation: `metis facts --inventory inv.yaml --snapshots ./snapshots` reads sentinel snapshot JSON files and prints per-class drift summary (kernels, containerd, k3s, package samples). Use exported ConfigMaps or `METIS_SENTINEL_OUT` history as input.
|
||||
- `metis config --inventory inv.yaml --node titan-13` prints the merged node config (hostname/IP/k3s labels/taints/Longhorn UUIDs).
|
||||
- `metis config --inventory inv.yaml --node titan-13` prints the merged node config (hostname/IP/k3s labels/taints/Longhorn UUIDs and optional USB scratch metadata).
|
||||
|
||||
## Service direction
|
||||
- Deployed UI protected by Atlas SSO headers (`admin` / `maintenance`)
|
||||
|
||||
@ -5,8 +5,8 @@ Initial classes to minimize golden images while covering hardware/OS deltas:
|
||||
- `rpi5-ubuntu-worker`: Ubuntu 24.04, k3s agent, hardware=rpi5 (titan-04..11, 0a/0c minus control-plane bits)
|
||||
- `rpi5-ubuntu-control`: Ubuntu 24.04, k3s server (titan-0a/0b/0c specifics), control-plane taints, etcd snapshot hooks
|
||||
- `rpi4-armbian-longhorn`: Armbian 6.6.x, k3s agent, hardware=rpi4 with Longhorn disks (titan-13/15/17/19; astreae/asteria mounts)
|
||||
- `rpi4-armbian-worker`: Armbian 6.6.x, k3s agent, hardware=rpi4 without Longhorn disks (titan-12/14/18)
|
||||
- `rpi4-armbian-worker`: Armbian 6.6.x, k3s agent, hardware=rpi4 without Longhorn disks; `titan-16` uses the USB scratch recovery card standard
|
||||
- `amd64-agent`: Debian 13 k3s agent with GPU/node labels (titan-22/24, avoid by preference)
|
||||
- `external-hosts`: non-cluster (tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21) – per-host config over base image template
|
||||
|
||||
Per-node overlays capture hostname/IP, labels/taints, Longhorn UUID mounts, and drop-ins for logging/GC.
|
||||
Per-node overlays capture hostname/IP, labels/taints, Longhorn UUID mounts, USB scratch bind targets, and drop-ins for logging/GC.
|
||||
|
||||
@ -23,6 +23,17 @@ classes:
|
||||
longhorn: "true"
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
default_taints: []
|
||||
- name: rpi4-armbian-worker
|
||||
arch: arm64
|
||||
os: armbian-6.6
|
||||
image: https://harbor.bstein.dev/library/rpi4-armbian-worker.img
|
||||
checksum: sha256:REPLACE_ME
|
||||
boot_overlay: overlays/rpi4-boot
|
||||
root_overlay: overlays/rpi4-root
|
||||
default_labels:
|
||||
hardware: rpi4
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
default_taints: []
|
||||
- name: control-plane
|
||||
arch: arm64
|
||||
os: ubuntu-24.04
|
||||
@ -72,6 +83,21 @@ nodes:
|
||||
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
|
||||
fs: ext4
|
||||
ssh_user: root
|
||||
- name: titan-16
|
||||
class: rpi4-armbian-worker
|
||||
hostname: titan-16
|
||||
ip: 192.168.22.44
|
||||
k3s_role: agent
|
||||
labels:
|
||||
hardware: rpi4
|
||||
usb_scratch:
|
||||
mountpoint: /mnt/scratch
|
||||
label: titan-16-scratch
|
||||
fs: ext4
|
||||
bind_targets:
|
||||
- /var/lib/rancher
|
||||
- /var/log
|
||||
ssh_user: ubuntu
|
||||
- name: titan-20
|
||||
class: jetson-accelerator
|
||||
hostname: titan-20
|
||||
|
||||
@ -165,6 +165,13 @@ nodes:
|
||||
ssh_user: atlas
|
||||
ssh_authorized_keys:
|
||||
- ${METIS_SSH_KEY_BASTION}
|
||||
usb_scratch:
|
||||
mountpoint: /mnt/scratch
|
||||
label: titan-16-scratch
|
||||
fs: ext4
|
||||
bind_targets:
|
||||
- /var/lib/rancher
|
||||
- /var/log
|
||||
- name: titan-13
|
||||
class: rpi4-armbian-longhorn
|
||||
hostname: titan-13
|
||||
|
||||
@ -8,15 +8,16 @@ import (
|
||||
|
||||
// NodeConfig represents boot-time configuration to inject.
|
||||
type NodeConfig struct {
|
||||
Hostname string `json:"hostname"`
|
||||
IP string `json:"ip"`
|
||||
K3s K3sConfig `json:"k3s"`
|
||||
SSHUser string `json:"ssh_user,omitempty"`
|
||||
SSHKeys []string `json:"ssh_keys,omitempty"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Taints []string `json:"taints,omitempty"`
|
||||
Fstab []FstabEntry `json:"fstab,omitempty"`
|
||||
Secrets map[string]string `json:"secrets,omitempty"` // optional key/values for local agent use
|
||||
Hostname string `json:"hostname"`
|
||||
IP string `json:"ip"`
|
||||
K3s K3sConfig `json:"k3s"`
|
||||
SSHUser string `json:"ssh_user,omitempty"`
|
||||
SSHKeys []string `json:"ssh_keys,omitempty"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Taints []string `json:"taints,omitempty"`
|
||||
Fstab []FstabEntry `json:"fstab,omitempty"`
|
||||
USBScratch *USBScratchConfig `json:"usb_scratch,omitempty"`
|
||||
Secrets map[string]string `json:"secrets,omitempty"` // optional key/values for local agent use
|
||||
}
|
||||
|
||||
// K3sConfig includes role and token/url.
|
||||
@ -32,12 +33,23 @@ type K3sConfig struct {
|
||||
|
||||
// FstabEntry for Longhorn or other mounts.
|
||||
type FstabEntry struct {
|
||||
UUID string `json:"uuid"`
|
||||
Source string `json:"source,omitempty"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
Label string `json:"label,omitempty"`
|
||||
Mountpoint string `json:"mountpoint"`
|
||||
FS string `json:"fs"`
|
||||
Options string `json:"options"`
|
||||
}
|
||||
|
||||
// USBScratchConfig describes a recovery USB disk and its bind mounts.
|
||||
type USBScratchConfig struct {
|
||||
Mountpoint string `json:"mountpoint"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
Label string `json:"label,omitempty"`
|
||||
FS string `json:"fs,omitempty"`
|
||||
BindTargets []string `json:"bind_targets,omitempty"`
|
||||
}
|
||||
|
||||
// Build creates a NodeConfig from inventory.
|
||||
func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
|
||||
n, cls, err := inv.FindNode(nodeName)
|
||||
@ -58,6 +70,23 @@ func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
|
||||
k3sVersion = n.K3sVersion
|
||||
}
|
||||
|
||||
cfg := &NodeConfig{
|
||||
Hostname: n.Hostname,
|
||||
IP: n.IP,
|
||||
SSHUser: n.SSHUser,
|
||||
SSHKeys: n.SSHAuthorized,
|
||||
Labels: labels,
|
||||
Taints: taints,
|
||||
K3s: K3sConfig{
|
||||
Role: n.K3sRole,
|
||||
Version: k3sVersion,
|
||||
URL: n.K3sURL,
|
||||
Token: n.K3sToken,
|
||||
Labels: labels,
|
||||
Taints: taints,
|
||||
},
|
||||
}
|
||||
|
||||
fstab := []FstabEntry{}
|
||||
for _, d := range n.LonghornDisks {
|
||||
fs := d.FS
|
||||
@ -71,24 +100,35 @@ func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
|
||||
Options: "defaults,nofail",
|
||||
})
|
||||
}
|
||||
|
||||
cfg := &NodeConfig{
|
||||
Hostname: n.Hostname,
|
||||
IP: n.IP,
|
||||
SSHUser: n.SSHUser,
|
||||
SSHKeys: n.SSHAuthorized,
|
||||
Labels: labels,
|
||||
Taints: taints,
|
||||
Fstab: fstab,
|
||||
K3s: K3sConfig{
|
||||
Role: n.K3sRole,
|
||||
Version: k3sVersion,
|
||||
URL: n.K3sURL,
|
||||
Token: n.K3sToken,
|
||||
Labels: labels,
|
||||
Taints: taints,
|
||||
},
|
||||
if n.USBScratch != nil {
|
||||
scratch := USBScratchConfig{
|
||||
Mountpoint: n.USBScratch.Mountpoint,
|
||||
UUID: n.USBScratch.UUID,
|
||||
Label: n.USBScratch.Label,
|
||||
FS: n.USBScratch.FS,
|
||||
BindTargets: append([]string{}, n.USBScratch.BindTargets...),
|
||||
}
|
||||
if scratch.FS == "" {
|
||||
scratch.FS = "ext4"
|
||||
}
|
||||
cfg.USBScratch = &scratch
|
||||
fstab = append(fstab, FstabEntry{
|
||||
UUID: scratch.UUID,
|
||||
Label: scratch.Label,
|
||||
Mountpoint: scratch.Mountpoint,
|
||||
FS: scratch.FS,
|
||||
Options: "defaults,nofail",
|
||||
})
|
||||
for _, target := range scratch.BindTargets {
|
||||
fstab = append(fstab, FstabEntry{
|
||||
Source: scratch.Mountpoint,
|
||||
Mountpoint: target,
|
||||
FS: "none",
|
||||
Options: "bind,nofail",
|
||||
})
|
||||
}
|
||||
}
|
||||
cfg.Fstab = fstab
|
||||
if cfg.Hostname == "" || cfg.IP == "" {
|
||||
return nil, fmt.Errorf("hostname/ip required for node %s", nodeName)
|
||||
}
|
||||
|
||||
@ -26,6 +26,12 @@ func TestBuildUsesNodeOverridesAndDefaultFilesystem(t *testing.T) {
|
||||
SSHUser: "atlas",
|
||||
SSHAuthorized: []string{"key"},
|
||||
LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/mnt/data", UUID: "uuid-1"}},
|
||||
USBScratch: &inventory.USBScratchDisk{
|
||||
Mountpoint: "/mnt/scratch",
|
||||
Label: "titan-13-scratch",
|
||||
FS: "ext4",
|
||||
BindTargets: []string{"/var/lib/rancher", "/var/log"},
|
||||
},
|
||||
}},
|
||||
}
|
||||
cfg, err := Build(&inv, "n1")
|
||||
@ -38,6 +44,18 @@ func TestBuildUsesNodeOverridesAndDefaultFilesystem(t *testing.T) {
|
||||
if got := cfg.Fstab[0].FS; got != "ext4" {
|
||||
t.Fatalf("expected default filesystem ext4, got %q", got)
|
||||
}
|
||||
if cfg.USBScratch == nil || cfg.USBScratch.Label != "titan-13-scratch" {
|
||||
t.Fatalf("usb scratch missing: %#v", cfg.USBScratch)
|
||||
}
|
||||
if got := len(cfg.Fstab); got != 4 {
|
||||
t.Fatalf("expected longhorn plus scratch fstab entries, got %d", got)
|
||||
}
|
||||
if got := cfg.Fstab[1].Label; got != "titan-13-scratch" {
|
||||
t.Fatalf("usb scratch label = %q", got)
|
||||
}
|
||||
if got := cfg.Fstab[2].Source; got != "/mnt/scratch" || cfg.Fstab[2].Mountpoint != "/var/lib/rancher" {
|
||||
t.Fatalf("usb bind mount = %#v", cfg.Fstab[2])
|
||||
}
|
||||
if got := cfg.Labels["role"]; got != "worker" {
|
||||
t.Fatalf("label merge lost default label: %q", got)
|
||||
}
|
||||
|
||||
@ -31,15 +31,19 @@ func TestBuildBranches(t *testing.T) {
|
||||
SSHUser: "atlas",
|
||||
SSHAuthorized: []string{"ssh-ed25519 AAA"},
|
||||
LonghornDisks: []inventory.LonghornDisk{{UUID: "u1", Mountpoint: "/var/lib/longhorn"}},
|
||||
USBScratch: &inventory.USBScratchDisk{Mountpoint: "/mnt/scratch", UUID: "usb-1", BindTargets: []string{"/var/lib/rancher"}},
|
||||
}},
|
||||
}
|
||||
cfg, err := Build(inv, "titan-15")
|
||||
if err != nil {
|
||||
t.Fatalf("Build: %v", err)
|
||||
}
|
||||
if cfg.K3s.Version != "v1.31.5+k3s2" || len(cfg.Fstab) != 1 || cfg.Fstab[0].FS != "ext4" {
|
||||
if cfg.K3s.Version != "v1.31.5+k3s2" || len(cfg.Fstab) != 3 || cfg.Fstab[0].FS != "ext4" {
|
||||
t.Fatalf("unexpected config: %#v", cfg)
|
||||
}
|
||||
if cfg.USBScratch == nil || cfg.USBScratch.Mountpoint != "/mnt/scratch" {
|
||||
t.Fatalf("expected usb scratch config: %#v", cfg.USBScratch)
|
||||
}
|
||||
if _, err := Build(&inventory.Inventory{}, "missing"); err == nil {
|
||||
t.Fatal("expected Build to fail for missing node")
|
||||
}
|
||||
|
||||
@ -6,13 +6,17 @@ import (
|
||||
|
||||
// ClassSummary captures aggregated sentinel facts per class.
|
||||
type ClassSummary struct {
|
||||
Class string `json:"class"`
|
||||
Nodes []string `json:"nodes"`
|
||||
Kernels map[string]int `json:"kernels,omitempty"`
|
||||
OSImages map[string]int `json:"os_images,omitempty"`
|
||||
Containerd map[string]int `json:"containerd,omitempty"`
|
||||
K3sVersions map[string]int `json:"k3s_versions,omitempty"`
|
||||
PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count
|
||||
Class string `json:"class"`
|
||||
Nodes []string `json:"nodes"`
|
||||
Kernels map[string]int `json:"kernels,omitempty"`
|
||||
OSImages map[string]int `json:"os_images,omitempty"`
|
||||
Containerd map[string]int `json:"containerd,omitempty"`
|
||||
K3sVersions map[string]int `json:"k3s_versions,omitempty"`
|
||||
PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count
|
||||
USBMountHealth map[string]int `json:"usb_mount_health,omitempty"`
|
||||
USBUUIDHealth map[string]int `json:"usb_uuid_health,omitempty"`
|
||||
USBLabelHealth map[string]int `json:"usb_label_health,omitempty"`
|
||||
USBBindHealth map[string]int `json:"usb_bind_health,omitempty"`
|
||||
}
|
||||
|
||||
// Aggregate groups snapshots by inventory class and tallies version drift.
|
||||
@ -20,20 +24,28 @@ func Aggregate(inv *inventory.Inventory, snaps []Snapshot) map[string]*ClassSumm
|
||||
result := map[string]*ClassSummary{}
|
||||
for _, s := range snaps {
|
||||
class := "unknown"
|
||||
var scratch *inventory.USBScratchDisk
|
||||
if inv != nil {
|
||||
if node, cls, err := inv.FindNode(s.Hostname); err == nil && cls != nil && node != nil {
|
||||
if node, cls, err := inv.FindNode(s.Hostname); node != nil && cls != nil && err == nil {
|
||||
class = cls.Name
|
||||
scratch = node.USBScratch
|
||||
} else if node != nil {
|
||||
scratch = node.USBScratch
|
||||
}
|
||||
}
|
||||
sum, ok := result[class]
|
||||
if !ok {
|
||||
sum = &ClassSummary{
|
||||
Class: class,
|
||||
Kernels: map[string]int{},
|
||||
OSImages: map[string]int{},
|
||||
Containerd: map[string]int{},
|
||||
K3sVersions: map[string]int{},
|
||||
PackageStats: map[string]map[string]int{},
|
||||
Class: class,
|
||||
Kernels: map[string]int{},
|
||||
OSImages: map[string]int{},
|
||||
Containerd: map[string]int{},
|
||||
K3sVersions: map[string]int{},
|
||||
PackageStats: map[string]map[string]int{},
|
||||
USBMountHealth: map[string]int{},
|
||||
USBUUIDHealth: map[string]int{},
|
||||
USBLabelHealth: map[string]int{},
|
||||
USBBindHealth: map[string]int{},
|
||||
}
|
||||
result[class] = sum
|
||||
}
|
||||
@ -58,6 +70,35 @@ func Aggregate(inv *inventory.Inventory, snaps []Snapshot) map[string]*ClassSumm
|
||||
sum.PackageStats[pkg][ver]++
|
||||
}
|
||||
}
|
||||
addUSBHealth(sum, scratch, s.USBScratch)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func addUSBHealth(sum *ClassSummary, desired *inventory.USBScratchDisk, observed *USBScratch) {
|
||||
if desired == nil || sum == nil {
|
||||
return
|
||||
}
|
||||
if desired.Mountpoint != "" {
|
||||
sum.USBMountHealth[usbStatus(observed, observed != nil && observed.MountHealthy)]++
|
||||
}
|
||||
if desired.UUID != "" {
|
||||
sum.USBUUIDHealth[usbStatus(observed, observed != nil && observed.UUIDHealthy)]++
|
||||
}
|
||||
if desired.Label != "" {
|
||||
sum.USBLabelHealth[usbStatus(observed, observed != nil && observed.LabelHealthy)]++
|
||||
}
|
||||
if len(desired.BindTargets) > 0 {
|
||||
sum.USBBindHealth[usbStatus(observed, observed != nil && observed.BindHealthy)]++
|
||||
}
|
||||
}
|
||||
|
||||
func usbStatus(observed *USBScratch, ok bool) string {
|
||||
if observed == nil {
|
||||
return "missing"
|
||||
}
|
||||
if ok {
|
||||
return "ok"
|
||||
}
|
||||
return "bad"
|
||||
}
|
||||
|
||||
@ -10,12 +10,12 @@ func TestAggregateGroupsByClass(t *testing.T) {
|
||||
inv := &inventory.Inventory{
|
||||
Classes: []inventory.NodeClass{{Name: "c1"}, {Name: "c2"}},
|
||||
Nodes: []inventory.NodeSpec{
|
||||
{Name: "n1", Class: "c1"},
|
||||
{Name: "n1", Class: "c1", USBScratch: &inventory.USBScratchDisk{Mountpoint: "/mnt/scratch", Label: "scratch-1", BindTargets: []string{"/var/lib/rancher"}}},
|
||||
{Name: "n2", Class: "c2"},
|
||||
},
|
||||
}
|
||||
snaps := []Snapshot{
|
||||
{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"containerd": "2.0"}},
|
||||
{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"containerd": "2.0"}, USBScratch: &USBScratch{Mountpoint: "/mnt/scratch", Label: "scratch-1", MountHealthy: true, LabelHealthy: true, BindHealthy: true, BindTargets: []USBBindTarget{{Path: "/var/lib/rancher", Healthy: true}}}},
|
||||
{Hostname: "n2", Kernel: "k2", PackageSample: map[string]string{"containerd": "1.7"}},
|
||||
{Hostname: "n1", Kernel: "k1"},
|
||||
}
|
||||
@ -30,6 +30,9 @@ func TestAggregateGroupsByClass(t *testing.T) {
|
||||
if c1.PackageStats["containerd"]["2.0"] != 1 {
|
||||
t.Fatalf("package stats not tallied: %#v", c1.PackageStats)
|
||||
}
|
||||
if c1.USBMountHealth["ok"] != 1 || c1.USBLabelHealth["ok"] != 1 || c1.USBBindHealth["ok"] != 1 {
|
||||
t.Fatalf("usb health not tallied: %#v", c1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAggregateKeepsUnknownHostnames(t *testing.T) {
|
||||
|
||||
@ -16,6 +16,7 @@ type Snapshot struct {
|
||||
Containerd string `json:"containerd,omitempty"`
|
||||
PackageSample map[string]string `json:"package_sample,omitempty"`
|
||||
DropInsSample map[string]string `json:"dropins_sample,omitempty"`
|
||||
USBScratch *USBScratch `json:"usb_scratch,omitempty"`
|
||||
}
|
||||
|
||||
// LoadDir reads all *.json under a directory and returns snapshots.
|
||||
|
||||
@ -8,7 +8,7 @@ import (
|
||||
|
||||
func TestLoadDirReadsSnapshots(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
snap := `{"hostname":"n1","kernel":"k","containerd":"c","package_sample":{"a":"1"}}`
|
||||
snap := `{"hostname":"n1","kernel":"k","containerd":"c","package_sample":{"a":"1"},"usb_scratch":{"mountpoint":"/mnt/scratch","label":"titan-16-scratch","mount_healthy":true,"bind_targets":[{"path":"/var/lib/rancher","healthy":true}],"bind_healthy":true}}`
|
||||
if err := os.WriteFile(filepath.Join(dir, "snap.json"), []byte(snap), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -19,6 +19,9 @@ func TestLoadDirReadsSnapshots(t *testing.T) {
|
||||
if len(got) != 1 || got[0].Hostname != "n1" || got[0].PackageSample["a"] != "1" {
|
||||
t.Fatalf("unexpected snapshot: %+v", got)
|
||||
}
|
||||
if got[0].USBScratch == nil || got[0].USBScratch.Label != "titan-16-scratch" || len(got[0].USBScratch.BindTargets) != 1 {
|
||||
t.Fatalf("unexpected usb scratch snapshot: %+v", got[0].USBScratch)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadDirRejectsInvalidJSON(t *testing.T) {
|
||||
|
||||
@ -1,21 +1,45 @@
|
||||
package facts
|
||||
|
||||
// USBBindTarget captures a bind mount and whether it looked healthy.
|
||||
type USBBindTarget struct {
|
||||
Path string `json:"path,omitempty"`
|
||||
Healthy bool `json:"healthy,omitempty"`
|
||||
}
|
||||
|
||||
// USBScratch captures the desired scratch-disk configuration plus health.
|
||||
type USBScratch struct {
|
||||
Mountpoint string `json:"mountpoint,omitempty"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
Label string `json:"label,omitempty"`
|
||||
FS string `json:"fs,omitempty"`
|
||||
MountHealthy bool `json:"mount_healthy,omitempty"`
|
||||
UUIDHealthy bool `json:"uuid_healthy,omitempty"`
|
||||
LabelHealthy bool `json:"label_healthy,omitempty"`
|
||||
BindTargets []USBBindTarget `json:"bind_targets,omitempty"`
|
||||
BindHealthy bool `json:"bind_healthy,omitempty"`
|
||||
}
|
||||
|
||||
// ClassFacts captures driftable state collected by metis-sentinel.
|
||||
type ClassFacts struct {
|
||||
ClassName string `json:"class_name"`
|
||||
Kernel string `json:"kernel,omitempty"`
|
||||
K3sVersion string `json:"k3s_version,omitempty"`
|
||||
Containerd string `json:"containerd,omitempty"`
|
||||
Packages map[string]string `json:"packages,omitempty"` // name -> version
|
||||
DropIns map[string]string `json:"dropins,omitempty"` // path -> content
|
||||
Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value
|
||||
CGroupConfig map[string]string `json:"cgroup_config,omitempty"` // key -> value
|
||||
Notes string `json:"notes,omitempty"`
|
||||
ClassName string `json:"class_name"`
|
||||
Kernel string `json:"kernel,omitempty"`
|
||||
K3sVersion string `json:"k3s_version,omitempty"`
|
||||
Containerd string `json:"containerd,omitempty"`
|
||||
Packages map[string]string `json:"packages,omitempty"` // name -> version
|
||||
DropIns map[string]string `json:"dropins,omitempty"` // path -> content
|
||||
Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value
|
||||
CGroupConfig map[string]string `json:"cgroup_config,omitempty"` // key -> value
|
||||
USBMountHealth map[string]int `json:"usb_mount_health,omitempty"`
|
||||
USBUUIDHealth map[string]int `json:"usb_uuid_health,omitempty"`
|
||||
USBLabelHealth map[string]int `json:"usb_label_health,omitempty"`
|
||||
USBBindHealth map[string]int `json:"usb_bind_health,omitempty"`
|
||||
Notes string `json:"notes,omitempty"`
|
||||
}
|
||||
|
||||
// NodeFacts captures per-node data (e.g., disk UUIDs) to verify drift.
|
||||
type NodeFacts struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Disks map[string]string `json:"disks,omitempty"` // mount -> UUID
|
||||
Notes string `json:"notes,omitempty"`
|
||||
Hostname string `json:"hostname"`
|
||||
Disks map[string]string `json:"disks,omitempty"` // mount -> UUID
|
||||
USBScratch *USBScratch `json:"usb_scratch,omitempty"`
|
||||
Notes string `json:"notes,omitempty"`
|
||||
}
|
||||
|
||||
@ -44,6 +44,7 @@ type NodeSpec struct {
|
||||
Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"`
|
||||
Taints []string `yaml:"taints,omitempty" json:"taints,omitempty"`
|
||||
LonghornDisks []LonghornDisk `yaml:"longhorn_disks,omitempty" json:"longhorn_disks,omitempty"`
|
||||
USBScratch *USBScratchDisk `yaml:"usb_scratch,omitempty" json:"usb_scratch,omitempty"`
|
||||
SSHUser string `yaml:"ssh_user,omitempty" json:"ssh_user,omitempty"`
|
||||
SSHAuthorized []string `yaml:"ssh_authorized_keys,omitempty" json:"ssh_authorized_keys,omitempty"`
|
||||
Notes string `yaml:"notes,omitempty" json:"notes,omitempty"`
|
||||
@ -56,6 +57,15 @@ type LonghornDisk struct {
|
||||
FS string `yaml:"fs,omitempty" json:"fs,omitempty"`
|
||||
}
|
||||
|
||||
// USBScratchDisk describes the recovery USB disk and its bind targets.
|
||||
type USBScratchDisk struct {
|
||||
Mountpoint string `yaml:"mountpoint" json:"mountpoint"`
|
||||
UUID string `yaml:"uuid,omitempty" json:"uuid,omitempty"`
|
||||
Label string `yaml:"label,omitempty" json:"label,omitempty"`
|
||||
FS string `yaml:"fs,omitempty" json:"fs,omitempty"`
|
||||
BindTargets []string `yaml:"bind_targets,omitempty" json:"bind_targets,omitempty"`
|
||||
}
|
||||
|
||||
// Load reads and parses an inventory file.
|
||||
func Load(path string) (*Inventory, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
@ -121,6 +131,21 @@ func expandInventory(inv *Inventory) {
|
||||
inv.Nodes[idx].LonghornDisks[diskIdx].UUID = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].UUID)
|
||||
inv.Nodes[idx].LonghornDisks[diskIdx].FS = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].FS)
|
||||
}
|
||||
if inv.Nodes[idx].USBScratch != nil {
|
||||
inv.Nodes[idx].USBScratch.Mountpoint = os.ExpandEnv(inv.Nodes[idx].USBScratch.Mountpoint)
|
||||
inv.Nodes[idx].USBScratch.UUID = os.ExpandEnv(inv.Nodes[idx].USBScratch.UUID)
|
||||
inv.Nodes[idx].USBScratch.Label = os.ExpandEnv(inv.Nodes[idx].USBScratch.Label)
|
||||
inv.Nodes[idx].USBScratch.FS = os.ExpandEnv(inv.Nodes[idx].USBScratch.FS)
|
||||
bindTargets := make([]string, 0, len(inv.Nodes[idx].USBScratch.BindTargets))
|
||||
for _, value := range inv.Nodes[idx].USBScratch.BindTargets {
|
||||
expanded := strings.TrimSpace(os.ExpandEnv(value))
|
||||
if expanded == "" {
|
||||
continue
|
||||
}
|
||||
bindTargets = append(bindTargets, expanded)
|
||||
}
|
||||
inv.Nodes[idx].USBScratch.BindTargets = bindTargets
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -9,6 +9,9 @@ import (
|
||||
func TestLoadExpandsEnvironmentVariables(t *testing.T) {
|
||||
t.Setenv("METIS_IMAGE_PATH", "file:///tmp/rpi4.img")
|
||||
t.Setenv("METIS_K3S_TOKEN", "secret-token")
|
||||
t.Setenv("METIS_USB_MOUNT", "/mnt/usb")
|
||||
t.Setenv("METIS_USB_LABEL", "titan-13-scratch")
|
||||
t.Setenv("METIS_USB_BIND", "/var/lib/rancher")
|
||||
invPath := filepath.Join(t.TempDir(), "inventory.yaml")
|
||||
if err := os.WriteFile(invPath, []byte(`
|
||||
classes:
|
||||
@ -22,6 +25,12 @@ nodes:
|
||||
ip: 192.168.22.41
|
||||
k3s_role: agent
|
||||
k3s_token: ${METIS_K3S_TOKEN}
|
||||
usb_scratch:
|
||||
mountpoint: ${METIS_USB_MOUNT}
|
||||
label: ${METIS_USB_LABEL}
|
||||
fs: ext4
|
||||
bind_targets:
|
||||
- ${METIS_USB_BIND}
|
||||
`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -40,6 +49,12 @@ nodes:
|
||||
if node.K3sToken != "secret-token" {
|
||||
t.Fatalf("token not expanded: %q", node.K3sToken)
|
||||
}
|
||||
if node.USBScratch == nil || node.USBScratch.Mountpoint != "/mnt/usb" || node.USBScratch.Label != "titan-13-scratch" {
|
||||
t.Fatalf("usb scratch not expanded: %#v", node.USBScratch)
|
||||
}
|
||||
if len(node.USBScratch.BindTargets) != 1 || node.USBScratch.BindTargets[0] != "/var/lib/rancher" {
|
||||
t.Fatalf("usb bind target not expanded: %#v", node.USBScratch.BindTargets)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindNodeReturnsClassMissingError(t *testing.T) {
|
||||
|
||||
@ -344,9 +344,20 @@ LinkLocalAddressing=no
|
||||
func fstabAppendContent(cfg *config.NodeConfig) string {
|
||||
var lines []string
|
||||
for _, entry := range cfg.Fstab {
|
||||
source := entry.Source
|
||||
switch {
|
||||
case source != "":
|
||||
// Use the explicit source path for bind mounts.
|
||||
case entry.UUID != "":
|
||||
source = "UUID=" + entry.UUID
|
||||
case entry.Label != "":
|
||||
source = "LABEL=" + entry.Label
|
||||
default:
|
||||
source = "none"
|
||||
}
|
||||
lines = append(lines, fmt.Sprintf(
|
||||
"UUID=%s %s %s %s 0 0",
|
||||
entry.UUID,
|
||||
"%s %s %s %s 0 0",
|
||||
source,
|
||||
entry.Mountpoint,
|
||||
entry.FS,
|
||||
entry.Options,
|
||||
|
||||
@ -30,6 +30,18 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) {
|
||||
FS: "ext4",
|
||||
Options: "defaults,nofail",
|
||||
},
|
||||
{
|
||||
UUID: "usb-uuid",
|
||||
Mountpoint: "/mnt/scratch",
|
||||
FS: "ext4",
|
||||
Options: "defaults,nofail",
|
||||
},
|
||||
{
|
||||
Source: "/mnt/scratch",
|
||||
Mountpoint: "/var/lib/rancher",
|
||||
FS: "none",
|
||||
Options: "bind,nofail",
|
||||
},
|
||||
},
|
||||
Labels: map[string]string{"role": "worker", "zone": "a", "node-role.kubernetes.io/worker": "true"},
|
||||
Taints: []string{"gpu=true:NoSchedule"},
|
||||
@ -80,7 +92,7 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) {
|
||||
t.Fatalf("systemd-networkd config missing/incorrect: %s", networkd)
|
||||
}
|
||||
fstab, ok := pathMap["etc/metis/fstab.append"]
|
||||
if !ok || !strings.Contains(fstab, "UUID=disk-uuid /mnt/astreae ext4 defaults,nofail 0 0") {
|
||||
if !ok || !strings.Contains(fstab, "UUID=disk-uuid /mnt/astreae ext4 defaults,nofail 0 0") || !strings.Contains(fstab, "UUID=usb-uuid /mnt/scratch ext4 defaults,nofail 0 0") || !strings.Contains(fstab, "/mnt/scratch /var/lib/rancher none bind,nofail 0 0") {
|
||||
t.Fatalf("fstab append missing/incorrect: %s", fstab)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,11 +1,26 @@
|
||||
package sentinel
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"metis/pkg/facts"
|
||||
)
|
||||
|
||||
type nodeConfig struct {
|
||||
USBScratch *usbScratchConfig `json:"usb_scratch,omitempty"`
|
||||
}
|
||||
|
||||
type usbScratchConfig struct {
|
||||
Mountpoint string `json:"mountpoint,omitempty"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
Label string `json:"label,omitempty"`
|
||||
FS string `json:"fs,omitempty"`
|
||||
BindTargets []string `json:"bind_targets,omitempty"`
|
||||
}
|
||||
|
||||
// Snapshot captures host-level facts.
|
||||
type Snapshot struct {
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
@ -15,6 +30,7 @@ type Snapshot struct {
|
||||
Containerd string `json:"containerd,omitempty"`
|
||||
PackageSample map[string]string `json:"package_sample,omitempty"` // small subset to detect drift
|
||||
DropInsSample map[string]string `json:"dropins_sample,omitempty"` // path->content hash/sample
|
||||
USBScratch *facts.USBScratch `json:"usb_scratch,omitempty"`
|
||||
Notes string `json:"notes,omitempty"`
|
||||
}
|
||||
|
||||
@ -27,9 +43,76 @@ func Collect() *Snapshot {
|
||||
K3sVersion: runAndTrim("k3s", "version"),
|
||||
Containerd: runAndTrim("containerd", "--version"),
|
||||
PackageSample: pkgSample(),
|
||||
USBScratch: collectUSBScratch(),
|
||||
}
|
||||
}
|
||||
|
||||
func collectUSBScratch() *facts.USBScratch {
|
||||
raw, err := commandOutput("cat", "/etc/metis/node.json")
|
||||
if err != nil || len(strings.TrimSpace(string(raw))) == 0 {
|
||||
return nil
|
||||
}
|
||||
var cfg nodeConfig
|
||||
if err := json.Unmarshal(raw, &cfg); err != nil || cfg.USBScratch == nil {
|
||||
return nil
|
||||
}
|
||||
desired := cfg.USBScratch
|
||||
scratch := &facts.USBScratch{
|
||||
Mountpoint: desired.Mountpoint,
|
||||
UUID: desired.UUID,
|
||||
Label: desired.Label,
|
||||
FS: desired.FS,
|
||||
}
|
||||
|
||||
source, fsType, mounted := mountInfo(desired.Mountpoint)
|
||||
scratch.MountHealthy = mounted && strings.TrimSpace(source) != ""
|
||||
if scratch.MountHealthy && desired.FS != "" && fsType != "" {
|
||||
scratch.MountHealthy = strings.EqualFold(fsType, desired.FS)
|
||||
}
|
||||
if scratch.FS == "" && fsType != "" {
|
||||
scratch.FS = fsType
|
||||
}
|
||||
|
||||
device := source
|
||||
if device == "" && desired.UUID != "" {
|
||||
device = resolveDeviceByUUID(desired.UUID)
|
||||
}
|
||||
if device == "" && desired.Label != "" {
|
||||
device = resolveDeviceByLabel(desired.Label)
|
||||
}
|
||||
if device != "" {
|
||||
export := blkidExport(device)
|
||||
if desired.UUID != "" {
|
||||
scratch.UUIDHealthy = export["UUID"] == desired.UUID
|
||||
}
|
||||
if desired.Label != "" {
|
||||
scratch.LabelHealthy = export["LABEL"] == desired.Label
|
||||
}
|
||||
if scratch.FS == "" {
|
||||
scratch.FS = export["TYPE"]
|
||||
}
|
||||
}
|
||||
|
||||
healthy := true
|
||||
if len(desired.BindTargets) > 0 {
|
||||
scratch.BindTargets = make([]facts.USBBindTarget, 0, len(desired.BindTargets))
|
||||
for _, target := range desired.BindTargets {
|
||||
ok := bindHealthy(target, desired.Mountpoint)
|
||||
if !ok {
|
||||
healthy = false
|
||||
}
|
||||
scratch.BindTargets = append(scratch.BindTargets, facts.USBBindTarget{
|
||||
Path: target,
|
||||
Healthy: ok,
|
||||
})
|
||||
}
|
||||
scratch.BindHealthy = healthy
|
||||
} else {
|
||||
scratch.BindHealthy = true
|
||||
}
|
||||
return scratch
|
||||
}
|
||||
|
||||
func runAndTrim(cmd string, args ...string) string {
|
||||
out, err := commandOutput(cmd, args...)
|
||||
if err != nil {
|
||||
@ -78,6 +161,71 @@ func pkgVersion(name string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func mountInfo(target string) (string, string, bool) {
|
||||
target = strings.TrimSpace(target)
|
||||
if target == "" {
|
||||
return "", "", false
|
||||
}
|
||||
out, err := commandOutput("findmnt", "-P", "-n", "-T", target, "-o", "SOURCE,TARGET,FSTYPE")
|
||||
if err != nil {
|
||||
return "", "", false
|
||||
}
|
||||
fields := parseKeyValues(string(out))
|
||||
source := fields["SOURCE"]
|
||||
fsType := fields["FSTYPE"]
|
||||
return source, fsType, strings.TrimSpace(fields["TARGET"]) == target
|
||||
}
|
||||
|
||||
func bindHealthy(target, source string) bool {
|
||||
target = strings.TrimSpace(target)
|
||||
source = strings.TrimSpace(source)
|
||||
if target == "" || source == "" {
|
||||
return false
|
||||
}
|
||||
mountSource, _, mounted := mountInfo(target)
|
||||
return mounted && strings.TrimSpace(mountSource) == source
|
||||
}
|
||||
|
||||
func resolveDeviceByUUID(uuid string) string {
|
||||
uuid = strings.TrimSpace(uuid)
|
||||
if uuid == "" {
|
||||
return ""
|
||||
}
|
||||
return runAndTrim("blkid", "-U", uuid)
|
||||
}
|
||||
|
||||
func resolveDeviceByLabel(label string) string {
|
||||
label = strings.TrimSpace(label)
|
||||
if label == "" {
|
||||
return ""
|
||||
}
|
||||
return runAndTrim("blkid", "-L", label)
|
||||
}
|
||||
|
||||
func blkidExport(device string) map[string]string {
|
||||
device = strings.TrimSpace(device)
|
||||
if device == "" {
|
||||
return map[string]string{}
|
||||
}
|
||||
out, err := commandOutput("blkid", "-o", "export", device)
|
||||
if err != nil {
|
||||
return map[string]string{}
|
||||
}
|
||||
return parseKeyValues(string(out))
|
||||
}
|
||||
|
||||
func parseKeyValues(out string) map[string]string {
|
||||
result := map[string]string{}
|
||||
for _, field := range strings.Fields(strings.TrimSpace(out)) {
|
||||
key, value, ok := strings.Cut(field, "=")
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
result[key] = strings.Trim(value, `"`)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func commandOutput(cmd string, args ...string) ([]byte, error) {
|
||||
if os.Getenv("METIS_SENTINEL_NSENTER") == "1" {
|
||||
nsenterArgs := []string{"-t", "1", "-m", "-u", "-n", "-i", "-p", "--", cmd}
|
||||
|
||||
@ -21,6 +21,9 @@ func TestCollectUsesCommandOutputAndPkgSample(t *testing.T) {
|
||||
if len(snap.PackageSample) != 4 || snap.PackageSample["k3s"] != "v1.31.5+k3s1" {
|
||||
t.Fatalf("unexpected package sample: %+v", snap.PackageSample)
|
||||
}
|
||||
if snap.USBScratch == nil || snap.USBScratch.Label != "titan-16-scratch" || !snap.USBScratch.MountHealthy || !snap.USBScratch.LabelHealthy || !snap.USBScratch.BindHealthy {
|
||||
t.Fatalf("unexpected usb scratch sample: %+v", snap.USBScratch)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandOutputUsesNsenterWhenRequested(t *testing.T) {
|
||||
@ -73,7 +76,30 @@ func fakeSentinelCommands(t *testing.T) string {
|
||||
write("uname", `printf '6.6.63\n'`)
|
||||
write("k3s", `printf 'v1.31.5+k3s1\n'`)
|
||||
write("containerd", `printf '1.7.99\n'`)
|
||||
write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`)
|
||||
write("cat", `case "${1:-}" in
|
||||
/etc/os-release) printf 'PRETTY_NAME="Metis OS"\n' ;;
|
||||
/etc/metis/node.json) printf '%s\n' '{"usb_scratch":{"mountpoint":"/mnt/scratch","label":"titan-16-scratch","fs":"ext4","bind_targets":["/var/lib/rancher","/var/log"]}}' ;;
|
||||
*) printf 'PRETTY_NAME="Metis OS"\n' ;;
|
||||
esac`)
|
||||
write("findmnt", `target=""
|
||||
for ((i=1; i<=$#; i++)); do
|
||||
if [[ "${!i}" == "-T" ]]; then
|
||||
j=$((i + 1))
|
||||
target="${!j}"
|
||||
break
|
||||
fi
|
||||
done
|
||||
case "${target}" in
|
||||
/mnt/scratch) printf 'SOURCE="/dev/sdz1" TARGET="/mnt/scratch" FSTYPE="ext4"\n' ;;
|
||||
/var/lib/rancher) printf 'SOURCE="/mnt/scratch" TARGET="/var/lib/rancher" FSTYPE="none"\n' ;;
|
||||
/var/log) printf 'SOURCE="/mnt/scratch" TARGET="/var/log" FSTYPE="none"\n' ;;
|
||||
*) exit 1 ;;
|
||||
esac`)
|
||||
write("blkid", `case "${1:-}" in
|
||||
-U) printf '/dev/sdz1\n' ;;
|
||||
-L) printf '/dev/sdz1\n' ;;
|
||||
-o) printf 'UUID=titan-16-uuid\nLABEL=titan-16-scratch\nTYPE=ext4\n' ;;
|
||||
esac`)
|
||||
write("dpkg-query", `case "${@: -1}" in
|
||||
containerd) printf '1.7.99\n' ;;
|
||||
k3s) printf 'v1.31.5+k3s1\n' ;;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user