metis: add USB scratch inventory support

This commit is contained in:
Brad Stein 2026-04-11 01:08:08 -03:00
parent cb2498b1df
commit 6d0351f4b3
18 changed files with 470 additions and 66 deletions

View File

@ -6,13 +6,13 @@ Metis produces fully configured recovery SD cards for any node in the lab (RPi 4
- Cross-platform (Linux + Windows) CLI/GUI with dead-simple UX.
- Pull class-specific golden images from Harbor (or other artifact store), inject per-node config, and write/verify SD cards.
- Minimal image set via node classes; inject per-node deltas at burn time.
- Idempotent bootstraps: hostname/IP, k3s server/agent setup, labels/taints, journald/log GC drop-ins, Longhorn mount validation, SSH keys/users.
- Idempotent bootstraps: hostname/IP, k3s server/agent setup, labels/taints, journald/log GC drop-ins, Longhorn and USB scratch mount validation, SSH keys/users.
- Works offline once artifacts are cached; verifies hashes/signatures before writing.
## Planned high-level workflow
1) Select target node (from inventory) + target disk.
2) Tool downloads/caches the right golden image for that node class.
3) Injects per-node config (net, k3s tokens/roles/labels/taints, SSH keys, runtime drop-ins, Longhorn mount metadata) and writes SD.
3) Injects per-node config (net, k3s tokens/roles/labels/taints, SSH keys, runtime drop-ins, Longhorn mount metadata, USB scratch bind layout) and writes SD.
4) Verifies write; prints next-step: "insert and power on." No manual follow-up.
## Early design notes
@ -45,7 +45,7 @@ Metis produces fully configured recovery SD cards for any node in the lab (RPi 4
- Vault: Metis can read per-node secrets from `secret/data/nodes/<hostname>` using VAULT_ADDR plus either VAULT_TOKEN or AppRole (VAULT_ROLE_ID/VAULT_SECRET_ID). Expected fields: ssh_password, k3s_token, cloud_init, extra map.
- Sentinel: `metis-sentinel` collects host facts and can either print them, write local history, or push them into the Metis service. The intended deployment shape is a DaemonSet on cluster nodes plus an Ariadne-triggered Metis watch that recomputes recommended class targets and drift history.
- Facts aggregation: `metis facts --inventory inv.yaml --snapshots ./snapshots` reads sentinel snapshot JSON files and prints per-class drift summary (kernels, containerd, k3s, package samples). Use exported ConfigMaps or `METIS_SENTINEL_OUT` history as input.
- `metis config --inventory inv.yaml --node titan-13` prints the merged node config (hostname/IP/k3s labels/taints/Longhorn UUIDs).
- `metis config --inventory inv.yaml --node titan-13` prints the merged node config (hostname/IP/k3s labels/taints/Longhorn UUIDs and optional USB scratch metadata).
## Service direction
- Deployed UI protected by Atlas SSO headers (`admin` / `maintenance`)

View File

@ -5,8 +5,8 @@ Initial classes to minimize golden images while covering hardware/OS deltas:
- `rpi5-ubuntu-worker`: Ubuntu 24.04, k3s agent, hardware=rpi5 (titan-04..11, 0a/0c minus control-plane bits)
- `rpi5-ubuntu-control`: Ubuntu 24.04, k3s server (titan-0a/0b/0c specifics), control-plane taints, etcd snapshot hooks
- `rpi4-armbian-longhorn`: Armbian 6.6.x, k3s agent, hardware=rpi4 with Longhorn disks (titan-13/15/17/19; astreae/asteria mounts)
- `rpi4-armbian-worker`: Armbian 6.6.x, k3s agent, hardware=rpi4 without Longhorn disks (titan-12/14/18)
- `rpi4-armbian-worker`: Armbian 6.6.x, k3s agent, hardware=rpi4 without Longhorn disks; `titan-16` uses the USB scratch recovery card standard
- `amd64-agent`: Debian 13 k3s agent with GPU/node labels (titan-22/24, avoid by preference)
- `external-hosts`: non-cluster (tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21) per-host config over base image template
Per-node overlays capture hostname/IP, labels/taints, Longhorn UUID mounts, and drop-ins for logging/GC.
Per-node overlays capture hostname/IP, labels/taints, Longhorn UUID mounts, USB scratch bind targets, and drop-ins for logging/GC.

View File

@ -23,6 +23,17 @@ classes:
longhorn: "true"
node-role.kubernetes.io/worker: "true"
default_taints: []
- name: rpi4-armbian-worker
arch: arm64
os: armbian-6.6
image: https://harbor.bstein.dev/library/rpi4-armbian-worker.img
checksum: sha256:REPLACE_ME
boot_overlay: overlays/rpi4-boot
root_overlay: overlays/rpi4-root
default_labels:
hardware: rpi4
node-role.kubernetes.io/worker: "true"
default_taints: []
- name: control-plane
arch: arm64
os: ubuntu-24.04
@ -72,6 +83,21 @@ nodes:
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
fs: ext4
ssh_user: root
- name: titan-16
class: rpi4-armbian-worker
hostname: titan-16
ip: 192.168.22.44
k3s_role: agent
labels:
hardware: rpi4
usb_scratch:
mountpoint: /mnt/scratch
label: titan-16-scratch
fs: ext4
bind_targets:
- /var/lib/rancher
- /var/log
ssh_user: ubuntu
- name: titan-20
class: jetson-accelerator
hostname: titan-20

View File

@ -165,6 +165,13 @@ nodes:
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BASTION}
usb_scratch:
mountpoint: /mnt/scratch
label: titan-16-scratch
fs: ext4
bind_targets:
- /var/lib/rancher
- /var/log
- name: titan-13
class: rpi4-armbian-longhorn
hostname: titan-13

View File

@ -16,6 +16,7 @@ type NodeConfig struct {
Labels map[string]string `json:"labels,omitempty"`
Taints []string `json:"taints,omitempty"`
Fstab []FstabEntry `json:"fstab,omitempty"`
USBScratch *USBScratchConfig `json:"usb_scratch,omitempty"`
Secrets map[string]string `json:"secrets,omitempty"` // optional key/values for local agent use
}
@ -32,12 +33,23 @@ type K3sConfig struct {
// FstabEntry for Longhorn or other mounts.
type FstabEntry struct {
UUID string `json:"uuid"`
Source string `json:"source,omitempty"`
UUID string `json:"uuid,omitempty"`
Label string `json:"label,omitempty"`
Mountpoint string `json:"mountpoint"`
FS string `json:"fs"`
Options string `json:"options"`
}
// USBScratchConfig describes a recovery USB disk and its bind mounts.
type USBScratchConfig struct {
Mountpoint string `json:"mountpoint"`
UUID string `json:"uuid,omitempty"`
Label string `json:"label,omitempty"`
FS string `json:"fs,omitempty"`
BindTargets []string `json:"bind_targets,omitempty"`
}
// Build creates a NodeConfig from inventory.
func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
n, cls, err := inv.FindNode(nodeName)
@ -58,6 +70,23 @@ func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
k3sVersion = n.K3sVersion
}
cfg := &NodeConfig{
Hostname: n.Hostname,
IP: n.IP,
SSHUser: n.SSHUser,
SSHKeys: n.SSHAuthorized,
Labels: labels,
Taints: taints,
K3s: K3sConfig{
Role: n.K3sRole,
Version: k3sVersion,
URL: n.K3sURL,
Token: n.K3sToken,
Labels: labels,
Taints: taints,
},
}
fstab := []FstabEntry{}
for _, d := range n.LonghornDisks {
fs := d.FS
@ -71,24 +100,35 @@ func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
Options: "defaults,nofail",
})
}
cfg := &NodeConfig{
Hostname: n.Hostname,
IP: n.IP,
SSHUser: n.SSHUser,
SSHKeys: n.SSHAuthorized,
Labels: labels,
Taints: taints,
Fstab: fstab,
K3s: K3sConfig{
Role: n.K3sRole,
Version: k3sVersion,
URL: n.K3sURL,
Token: n.K3sToken,
Labels: labels,
Taints: taints,
},
if n.USBScratch != nil {
scratch := USBScratchConfig{
Mountpoint: n.USBScratch.Mountpoint,
UUID: n.USBScratch.UUID,
Label: n.USBScratch.Label,
FS: n.USBScratch.FS,
BindTargets: append([]string{}, n.USBScratch.BindTargets...),
}
if scratch.FS == "" {
scratch.FS = "ext4"
}
cfg.USBScratch = &scratch
fstab = append(fstab, FstabEntry{
UUID: scratch.UUID,
Label: scratch.Label,
Mountpoint: scratch.Mountpoint,
FS: scratch.FS,
Options: "defaults,nofail",
})
for _, target := range scratch.BindTargets {
fstab = append(fstab, FstabEntry{
Source: scratch.Mountpoint,
Mountpoint: target,
FS: "none",
Options: "bind,nofail",
})
}
}
cfg.Fstab = fstab
if cfg.Hostname == "" || cfg.IP == "" {
return nil, fmt.Errorf("hostname/ip required for node %s", nodeName)
}

View File

@ -26,6 +26,12 @@ func TestBuildUsesNodeOverridesAndDefaultFilesystem(t *testing.T) {
SSHUser: "atlas",
SSHAuthorized: []string{"key"},
LonghornDisks: []inventory.LonghornDisk{{Mountpoint: "/mnt/data", UUID: "uuid-1"}},
USBScratch: &inventory.USBScratchDisk{
Mountpoint: "/mnt/scratch",
Label: "titan-13-scratch",
FS: "ext4",
BindTargets: []string{"/var/lib/rancher", "/var/log"},
},
}},
}
cfg, err := Build(&inv, "n1")
@ -38,6 +44,18 @@ func TestBuildUsesNodeOverridesAndDefaultFilesystem(t *testing.T) {
if got := cfg.Fstab[0].FS; got != "ext4" {
t.Fatalf("expected default filesystem ext4, got %q", got)
}
if cfg.USBScratch == nil || cfg.USBScratch.Label != "titan-13-scratch" {
t.Fatalf("usb scratch missing: %#v", cfg.USBScratch)
}
if got := len(cfg.Fstab); got != 4 {
t.Fatalf("expected longhorn plus scratch fstab entries, got %d", got)
}
if got := cfg.Fstab[1].Label; got != "titan-13-scratch" {
t.Fatalf("usb scratch label = %q", got)
}
if got := cfg.Fstab[2].Source; got != "/mnt/scratch" || cfg.Fstab[2].Mountpoint != "/var/lib/rancher" {
t.Fatalf("usb bind mount = %#v", cfg.Fstab[2])
}
if got := cfg.Labels["role"]; got != "worker" {
t.Fatalf("label merge lost default label: %q", got)
}

View File

@ -31,15 +31,19 @@ func TestBuildBranches(t *testing.T) {
SSHUser: "atlas",
SSHAuthorized: []string{"ssh-ed25519 AAA"},
LonghornDisks: []inventory.LonghornDisk{{UUID: "u1", Mountpoint: "/var/lib/longhorn"}},
USBScratch: &inventory.USBScratchDisk{Mountpoint: "/mnt/scratch", UUID: "usb-1", BindTargets: []string{"/var/lib/rancher"}},
}},
}
cfg, err := Build(inv, "titan-15")
if err != nil {
t.Fatalf("Build: %v", err)
}
if cfg.K3s.Version != "v1.31.5+k3s2" || len(cfg.Fstab) != 1 || cfg.Fstab[0].FS != "ext4" {
if cfg.K3s.Version != "v1.31.5+k3s2" || len(cfg.Fstab) != 3 || cfg.Fstab[0].FS != "ext4" {
t.Fatalf("unexpected config: %#v", cfg)
}
if cfg.USBScratch == nil || cfg.USBScratch.Mountpoint != "/mnt/scratch" {
t.Fatalf("expected usb scratch config: %#v", cfg.USBScratch)
}
if _, err := Build(&inventory.Inventory{}, "missing"); err == nil {
t.Fatal("expected Build to fail for missing node")
}

View File

@ -13,6 +13,10 @@ type ClassSummary struct {
Containerd map[string]int `json:"containerd,omitempty"`
K3sVersions map[string]int `json:"k3s_versions,omitempty"`
PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count
USBMountHealth map[string]int `json:"usb_mount_health,omitempty"`
USBUUIDHealth map[string]int `json:"usb_uuid_health,omitempty"`
USBLabelHealth map[string]int `json:"usb_label_health,omitempty"`
USBBindHealth map[string]int `json:"usb_bind_health,omitempty"`
}
// Aggregate groups snapshots by inventory class and tallies version drift.
@ -20,9 +24,13 @@ func Aggregate(inv *inventory.Inventory, snaps []Snapshot) map[string]*ClassSumm
result := map[string]*ClassSummary{}
for _, s := range snaps {
class := "unknown"
var scratch *inventory.USBScratchDisk
if inv != nil {
if node, cls, err := inv.FindNode(s.Hostname); err == nil && cls != nil && node != nil {
if node, cls, err := inv.FindNode(s.Hostname); node != nil && cls != nil && err == nil {
class = cls.Name
scratch = node.USBScratch
} else if node != nil {
scratch = node.USBScratch
}
}
sum, ok := result[class]
@ -34,6 +42,10 @@ func Aggregate(inv *inventory.Inventory, snaps []Snapshot) map[string]*ClassSumm
Containerd: map[string]int{},
K3sVersions: map[string]int{},
PackageStats: map[string]map[string]int{},
USBMountHealth: map[string]int{},
USBUUIDHealth: map[string]int{},
USBLabelHealth: map[string]int{},
USBBindHealth: map[string]int{},
}
result[class] = sum
}
@ -58,6 +70,35 @@ func Aggregate(inv *inventory.Inventory, snaps []Snapshot) map[string]*ClassSumm
sum.PackageStats[pkg][ver]++
}
}
addUSBHealth(sum, scratch, s.USBScratch)
}
return result
}
func addUSBHealth(sum *ClassSummary, desired *inventory.USBScratchDisk, observed *USBScratch) {
if desired == nil || sum == nil {
return
}
if desired.Mountpoint != "" {
sum.USBMountHealth[usbStatus(observed, observed != nil && observed.MountHealthy)]++
}
if desired.UUID != "" {
sum.USBUUIDHealth[usbStatus(observed, observed != nil && observed.UUIDHealthy)]++
}
if desired.Label != "" {
sum.USBLabelHealth[usbStatus(observed, observed != nil && observed.LabelHealthy)]++
}
if len(desired.BindTargets) > 0 {
sum.USBBindHealth[usbStatus(observed, observed != nil && observed.BindHealthy)]++
}
}
func usbStatus(observed *USBScratch, ok bool) string {
if observed == nil {
return "missing"
}
if ok {
return "ok"
}
return "bad"
}

View File

@ -10,12 +10,12 @@ func TestAggregateGroupsByClass(t *testing.T) {
inv := &inventory.Inventory{
Classes: []inventory.NodeClass{{Name: "c1"}, {Name: "c2"}},
Nodes: []inventory.NodeSpec{
{Name: "n1", Class: "c1"},
{Name: "n1", Class: "c1", USBScratch: &inventory.USBScratchDisk{Mountpoint: "/mnt/scratch", Label: "scratch-1", BindTargets: []string{"/var/lib/rancher"}}},
{Name: "n2", Class: "c2"},
},
}
snaps := []Snapshot{
{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"containerd": "2.0"}},
{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"containerd": "2.0"}, USBScratch: &USBScratch{Mountpoint: "/mnt/scratch", Label: "scratch-1", MountHealthy: true, LabelHealthy: true, BindHealthy: true, BindTargets: []USBBindTarget{{Path: "/var/lib/rancher", Healthy: true}}}},
{Hostname: "n2", Kernel: "k2", PackageSample: map[string]string{"containerd": "1.7"}},
{Hostname: "n1", Kernel: "k1"},
}
@ -30,6 +30,9 @@ func TestAggregateGroupsByClass(t *testing.T) {
if c1.PackageStats["containerd"]["2.0"] != 1 {
t.Fatalf("package stats not tallied: %#v", c1.PackageStats)
}
if c1.USBMountHealth["ok"] != 1 || c1.USBLabelHealth["ok"] != 1 || c1.USBBindHealth["ok"] != 1 {
t.Fatalf("usb health not tallied: %#v", c1)
}
}
func TestAggregateKeepsUnknownHostnames(t *testing.T) {

View File

@ -16,6 +16,7 @@ type Snapshot struct {
Containerd string `json:"containerd,omitempty"`
PackageSample map[string]string `json:"package_sample,omitempty"`
DropInsSample map[string]string `json:"dropins_sample,omitempty"`
USBScratch *USBScratch `json:"usb_scratch,omitempty"`
}
// LoadDir reads all *.json under a directory and returns snapshots.

View File

@ -8,7 +8,7 @@ import (
func TestLoadDirReadsSnapshots(t *testing.T) {
dir := t.TempDir()
snap := `{"hostname":"n1","kernel":"k","containerd":"c","package_sample":{"a":"1"}}`
snap := `{"hostname":"n1","kernel":"k","containerd":"c","package_sample":{"a":"1"},"usb_scratch":{"mountpoint":"/mnt/scratch","label":"titan-16-scratch","mount_healthy":true,"bind_targets":[{"path":"/var/lib/rancher","healthy":true}],"bind_healthy":true}}`
if err := os.WriteFile(filepath.Join(dir, "snap.json"), []byte(snap), 0o644); err != nil {
t.Fatal(err)
}
@ -19,6 +19,9 @@ func TestLoadDirReadsSnapshots(t *testing.T) {
if len(got) != 1 || got[0].Hostname != "n1" || got[0].PackageSample["a"] != "1" {
t.Fatalf("unexpected snapshot: %+v", got)
}
if got[0].USBScratch == nil || got[0].USBScratch.Label != "titan-16-scratch" || len(got[0].USBScratch.BindTargets) != 1 {
t.Fatalf("unexpected usb scratch snapshot: %+v", got[0].USBScratch)
}
}
func TestLoadDirRejectsInvalidJSON(t *testing.T) {

View File

@ -1,5 +1,24 @@
package facts
// USBBindTarget captures a bind mount and whether it looked healthy.
type USBBindTarget struct {
Path string `json:"path,omitempty"`
Healthy bool `json:"healthy,omitempty"`
}
// USBScratch captures the desired scratch-disk configuration plus health.
type USBScratch struct {
Mountpoint string `json:"mountpoint,omitempty"`
UUID string `json:"uuid,omitempty"`
Label string `json:"label,omitempty"`
FS string `json:"fs,omitempty"`
MountHealthy bool `json:"mount_healthy,omitempty"`
UUIDHealthy bool `json:"uuid_healthy,omitempty"`
LabelHealthy bool `json:"label_healthy,omitempty"`
BindTargets []USBBindTarget `json:"bind_targets,omitempty"`
BindHealthy bool `json:"bind_healthy,omitempty"`
}
// ClassFacts captures driftable state collected by metis-sentinel.
type ClassFacts struct {
ClassName string `json:"class_name"`
@ -10,6 +29,10 @@ type ClassFacts struct {
DropIns map[string]string `json:"dropins,omitempty"` // path -> content
Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value
CGroupConfig map[string]string `json:"cgroup_config,omitempty"` // key -> value
USBMountHealth map[string]int `json:"usb_mount_health,omitempty"`
USBUUIDHealth map[string]int `json:"usb_uuid_health,omitempty"`
USBLabelHealth map[string]int `json:"usb_label_health,omitempty"`
USBBindHealth map[string]int `json:"usb_bind_health,omitempty"`
Notes string `json:"notes,omitempty"`
}
@ -17,5 +40,6 @@ type ClassFacts struct {
type NodeFacts struct {
Hostname string `json:"hostname"`
Disks map[string]string `json:"disks,omitempty"` // mount -> UUID
USBScratch *USBScratch `json:"usb_scratch,omitempty"`
Notes string `json:"notes,omitempty"`
}

View File

@ -44,6 +44,7 @@ type NodeSpec struct {
Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"`
Taints []string `yaml:"taints,omitempty" json:"taints,omitempty"`
LonghornDisks []LonghornDisk `yaml:"longhorn_disks,omitempty" json:"longhorn_disks,omitempty"`
USBScratch *USBScratchDisk `yaml:"usb_scratch,omitempty" json:"usb_scratch,omitempty"`
SSHUser string `yaml:"ssh_user,omitempty" json:"ssh_user,omitempty"`
SSHAuthorized []string `yaml:"ssh_authorized_keys,omitempty" json:"ssh_authorized_keys,omitempty"`
Notes string `yaml:"notes,omitempty" json:"notes,omitempty"`
@ -56,6 +57,15 @@ type LonghornDisk struct {
FS string `yaml:"fs,omitempty" json:"fs,omitempty"`
}
// USBScratchDisk describes the recovery USB disk and its bind targets.
type USBScratchDisk struct {
Mountpoint string `yaml:"mountpoint" json:"mountpoint"`
UUID string `yaml:"uuid,omitempty" json:"uuid,omitempty"`
Label string `yaml:"label,omitempty" json:"label,omitempty"`
FS string `yaml:"fs,omitempty" json:"fs,omitempty"`
BindTargets []string `yaml:"bind_targets,omitempty" json:"bind_targets,omitempty"`
}
// Load reads and parses an inventory file.
func Load(path string) (*Inventory, error) {
data, err := os.ReadFile(path)
@ -121,6 +131,21 @@ func expandInventory(inv *Inventory) {
inv.Nodes[idx].LonghornDisks[diskIdx].UUID = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].UUID)
inv.Nodes[idx].LonghornDisks[diskIdx].FS = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].FS)
}
if inv.Nodes[idx].USBScratch != nil {
inv.Nodes[idx].USBScratch.Mountpoint = os.ExpandEnv(inv.Nodes[idx].USBScratch.Mountpoint)
inv.Nodes[idx].USBScratch.UUID = os.ExpandEnv(inv.Nodes[idx].USBScratch.UUID)
inv.Nodes[idx].USBScratch.Label = os.ExpandEnv(inv.Nodes[idx].USBScratch.Label)
inv.Nodes[idx].USBScratch.FS = os.ExpandEnv(inv.Nodes[idx].USBScratch.FS)
bindTargets := make([]string, 0, len(inv.Nodes[idx].USBScratch.BindTargets))
for _, value := range inv.Nodes[idx].USBScratch.BindTargets {
expanded := strings.TrimSpace(os.ExpandEnv(value))
if expanded == "" {
continue
}
bindTargets = append(bindTargets, expanded)
}
inv.Nodes[idx].USBScratch.BindTargets = bindTargets
}
}
}

View File

@ -9,6 +9,9 @@ import (
func TestLoadExpandsEnvironmentVariables(t *testing.T) {
t.Setenv("METIS_IMAGE_PATH", "file:///tmp/rpi4.img")
t.Setenv("METIS_K3S_TOKEN", "secret-token")
t.Setenv("METIS_USB_MOUNT", "/mnt/usb")
t.Setenv("METIS_USB_LABEL", "titan-13-scratch")
t.Setenv("METIS_USB_BIND", "/var/lib/rancher")
invPath := filepath.Join(t.TempDir(), "inventory.yaml")
if err := os.WriteFile(invPath, []byte(`
classes:
@ -22,6 +25,12 @@ nodes:
ip: 192.168.22.41
k3s_role: agent
k3s_token: ${METIS_K3S_TOKEN}
usb_scratch:
mountpoint: ${METIS_USB_MOUNT}
label: ${METIS_USB_LABEL}
fs: ext4
bind_targets:
- ${METIS_USB_BIND}
`), 0o644); err != nil {
t.Fatal(err)
}
@ -40,6 +49,12 @@ nodes:
if node.K3sToken != "secret-token" {
t.Fatalf("token not expanded: %q", node.K3sToken)
}
if node.USBScratch == nil || node.USBScratch.Mountpoint != "/mnt/usb" || node.USBScratch.Label != "titan-13-scratch" {
t.Fatalf("usb scratch not expanded: %#v", node.USBScratch)
}
if len(node.USBScratch.BindTargets) != 1 || node.USBScratch.BindTargets[0] != "/var/lib/rancher" {
t.Fatalf("usb bind target not expanded: %#v", node.USBScratch.BindTargets)
}
}
func TestFindNodeReturnsClassMissingError(t *testing.T) {

View File

@ -344,9 +344,20 @@ LinkLocalAddressing=no
func fstabAppendContent(cfg *config.NodeConfig) string {
var lines []string
for _, entry := range cfg.Fstab {
source := entry.Source
switch {
case source != "":
// Use the explicit source path for bind mounts.
case entry.UUID != "":
source = "UUID=" + entry.UUID
case entry.Label != "":
source = "LABEL=" + entry.Label
default:
source = "none"
}
lines = append(lines, fmt.Sprintf(
"UUID=%s %s %s %s 0 0",
entry.UUID,
"%s %s %s %s 0 0",
source,
entry.Mountpoint,
entry.FS,
entry.Options,

View File

@ -30,6 +30,18 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) {
FS: "ext4",
Options: "defaults,nofail",
},
{
UUID: "usb-uuid",
Mountpoint: "/mnt/scratch",
FS: "ext4",
Options: "defaults,nofail",
},
{
Source: "/mnt/scratch",
Mountpoint: "/var/lib/rancher",
FS: "none",
Options: "bind,nofail",
},
},
Labels: map[string]string{"role": "worker", "zone": "a", "node-role.kubernetes.io/worker": "true"},
Taints: []string{"gpu=true:NoSchedule"},
@ -80,7 +92,7 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) {
t.Fatalf("systemd-networkd config missing/incorrect: %s", networkd)
}
fstab, ok := pathMap["etc/metis/fstab.append"]
if !ok || !strings.Contains(fstab, "UUID=disk-uuid /mnt/astreae ext4 defaults,nofail 0 0") {
if !ok || !strings.Contains(fstab, "UUID=disk-uuid /mnt/astreae ext4 defaults,nofail 0 0") || !strings.Contains(fstab, "UUID=usb-uuid /mnt/scratch ext4 defaults,nofail 0 0") || !strings.Contains(fstab, "/mnt/scratch /var/lib/rancher none bind,nofail 0 0") {
t.Fatalf("fstab append missing/incorrect: %s", fstab)
}
}

View File

@ -1,11 +1,26 @@
package sentinel
import (
"encoding/json"
"os"
"os/exec"
"strings"
"metis/pkg/facts"
)
type nodeConfig struct {
USBScratch *usbScratchConfig `json:"usb_scratch,omitempty"`
}
type usbScratchConfig struct {
Mountpoint string `json:"mountpoint,omitempty"`
UUID string `json:"uuid,omitempty"`
Label string `json:"label,omitempty"`
FS string `json:"fs,omitempty"`
BindTargets []string `json:"bind_targets,omitempty"`
}
// Snapshot captures host-level facts.
type Snapshot struct {
Hostname string `json:"hostname,omitempty"`
@ -15,6 +30,7 @@ type Snapshot struct {
Containerd string `json:"containerd,omitempty"`
PackageSample map[string]string `json:"package_sample,omitempty"` // small subset to detect drift
DropInsSample map[string]string `json:"dropins_sample,omitempty"` // path->content hash/sample
USBScratch *facts.USBScratch `json:"usb_scratch,omitempty"`
Notes string `json:"notes,omitempty"`
}
@ -27,9 +43,76 @@ func Collect() *Snapshot {
K3sVersion: runAndTrim("k3s", "version"),
Containerd: runAndTrim("containerd", "--version"),
PackageSample: pkgSample(),
USBScratch: collectUSBScratch(),
}
}
func collectUSBScratch() *facts.USBScratch {
raw, err := commandOutput("cat", "/etc/metis/node.json")
if err != nil || len(strings.TrimSpace(string(raw))) == 0 {
return nil
}
var cfg nodeConfig
if err := json.Unmarshal(raw, &cfg); err != nil || cfg.USBScratch == nil {
return nil
}
desired := cfg.USBScratch
scratch := &facts.USBScratch{
Mountpoint: desired.Mountpoint,
UUID: desired.UUID,
Label: desired.Label,
FS: desired.FS,
}
source, fsType, mounted := mountInfo(desired.Mountpoint)
scratch.MountHealthy = mounted && strings.TrimSpace(source) != ""
if scratch.MountHealthy && desired.FS != "" && fsType != "" {
scratch.MountHealthy = strings.EqualFold(fsType, desired.FS)
}
if scratch.FS == "" && fsType != "" {
scratch.FS = fsType
}
device := source
if device == "" && desired.UUID != "" {
device = resolveDeviceByUUID(desired.UUID)
}
if device == "" && desired.Label != "" {
device = resolveDeviceByLabel(desired.Label)
}
if device != "" {
export := blkidExport(device)
if desired.UUID != "" {
scratch.UUIDHealthy = export["UUID"] == desired.UUID
}
if desired.Label != "" {
scratch.LabelHealthy = export["LABEL"] == desired.Label
}
if scratch.FS == "" {
scratch.FS = export["TYPE"]
}
}
healthy := true
if len(desired.BindTargets) > 0 {
scratch.BindTargets = make([]facts.USBBindTarget, 0, len(desired.BindTargets))
for _, target := range desired.BindTargets {
ok := bindHealthy(target, desired.Mountpoint)
if !ok {
healthy = false
}
scratch.BindTargets = append(scratch.BindTargets, facts.USBBindTarget{
Path: target,
Healthy: ok,
})
}
scratch.BindHealthy = healthy
} else {
scratch.BindHealthy = true
}
return scratch
}
func runAndTrim(cmd string, args ...string) string {
out, err := commandOutput(cmd, args...)
if err != nil {
@ -78,6 +161,71 @@ func pkgVersion(name string) string {
return ""
}
func mountInfo(target string) (string, string, bool) {
target = strings.TrimSpace(target)
if target == "" {
return "", "", false
}
out, err := commandOutput("findmnt", "-P", "-n", "-T", target, "-o", "SOURCE,TARGET,FSTYPE")
if err != nil {
return "", "", false
}
fields := parseKeyValues(string(out))
source := fields["SOURCE"]
fsType := fields["FSTYPE"]
return source, fsType, strings.TrimSpace(fields["TARGET"]) == target
}
func bindHealthy(target, source string) bool {
target = strings.TrimSpace(target)
source = strings.TrimSpace(source)
if target == "" || source == "" {
return false
}
mountSource, _, mounted := mountInfo(target)
return mounted && strings.TrimSpace(mountSource) == source
}
func resolveDeviceByUUID(uuid string) string {
uuid = strings.TrimSpace(uuid)
if uuid == "" {
return ""
}
return runAndTrim("blkid", "-U", uuid)
}
func resolveDeviceByLabel(label string) string {
label = strings.TrimSpace(label)
if label == "" {
return ""
}
return runAndTrim("blkid", "-L", label)
}
func blkidExport(device string) map[string]string {
device = strings.TrimSpace(device)
if device == "" {
return map[string]string{}
}
out, err := commandOutput("blkid", "-o", "export", device)
if err != nil {
return map[string]string{}
}
return parseKeyValues(string(out))
}
func parseKeyValues(out string) map[string]string {
result := map[string]string{}
for _, field := range strings.Fields(strings.TrimSpace(out)) {
key, value, ok := strings.Cut(field, "=")
if !ok {
continue
}
result[key] = strings.Trim(value, `"`)
}
return result
}
func commandOutput(cmd string, args ...string) ([]byte, error) {
if os.Getenv("METIS_SENTINEL_NSENTER") == "1" {
nsenterArgs := []string{"-t", "1", "-m", "-u", "-n", "-i", "-p", "--", cmd}

View File

@ -21,6 +21,9 @@ func TestCollectUsesCommandOutputAndPkgSample(t *testing.T) {
if len(snap.PackageSample) != 4 || snap.PackageSample["k3s"] != "v1.31.5+k3s1" {
t.Fatalf("unexpected package sample: %+v", snap.PackageSample)
}
if snap.USBScratch == nil || snap.USBScratch.Label != "titan-16-scratch" || !snap.USBScratch.MountHealthy || !snap.USBScratch.LabelHealthy || !snap.USBScratch.BindHealthy {
t.Fatalf("unexpected usb scratch sample: %+v", snap.USBScratch)
}
}
func TestCommandOutputUsesNsenterWhenRequested(t *testing.T) {
@ -73,7 +76,30 @@ func fakeSentinelCommands(t *testing.T) string {
write("uname", `printf '6.6.63\n'`)
write("k3s", `printf 'v1.31.5+k3s1\n'`)
write("containerd", `printf '1.7.99\n'`)
write("cat", `printf 'PRETTY_NAME="Metis OS"\n'`)
write("cat", `case "${1:-}" in
/etc/os-release) printf 'PRETTY_NAME="Metis OS"\n' ;;
/etc/metis/node.json) printf '%s\n' '{"usb_scratch":{"mountpoint":"/mnt/scratch","label":"titan-16-scratch","fs":"ext4","bind_targets":["/var/lib/rancher","/var/log"]}}' ;;
*) printf 'PRETTY_NAME="Metis OS"\n' ;;
esac`)
write("findmnt", `target=""
for ((i=1; i<=$#; i++)); do
if [[ "${!i}" == "-T" ]]; then
j=$((i + 1))
target="${!j}"
break
fi
done
case "${target}" in
/mnt/scratch) printf 'SOURCE="/dev/sdz1" TARGET="/mnt/scratch" FSTYPE="ext4"\n' ;;
/var/lib/rancher) printf 'SOURCE="/mnt/scratch" TARGET="/var/lib/rancher" FSTYPE="none"\n' ;;
/var/log) printf 'SOURCE="/mnt/scratch" TARGET="/var/log" FSTYPE="none"\n' ;;
*) exit 1 ;;
esac`)
write("blkid", `case "${1:-}" in
-U) printf '/dev/sdz1\n' ;;
-L) printf '/dev/sdz1\n' ;;
-o) printf 'UUID=titan-16-uuid\nLABEL=titan-16-scratch\nTYPE=ext4\n' ;;
esac`)
write("dpkg-query", `case "${@: -1}" in
containerd) printf '1.7.99\n' ;;
k3s) printf 'v1.31.5+k3s1\n' ;;