From 791d528a99519c69e549f7c65660302bab48e652 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 31 Mar 2026 18:46:13 -0300 Subject: [PATCH] service: tighten metis access and recovery ui --- pkg/inventory/types.go | 64 ++--- pkg/service/app.go | 203 +++++++++++++-- pkg/service/server.go | 489 +++++++++++++++++++++++++++---------- pkg/service/server_test.go | 23 +- pkg/service/settings.go | 2 - 5 files changed, 591 insertions(+), 190 deletions(-) diff --git a/pkg/inventory/types.go b/pkg/inventory/types.go index bd6641f..a486bb6 100644 --- a/pkg/inventory/types.go +++ b/pkg/inventory/types.go @@ -9,50 +9,50 @@ import ( // Inventory is the root document defining node classes and per-node specs. type Inventory struct { - Classes []NodeClass `yaml:"classes"` - Nodes []NodeSpec `yaml:"nodes"` + Classes []NodeClass `yaml:"classes" json:"classes"` + Nodes []NodeSpec `yaml:"nodes" json:"nodes"` } // NodeClass defines a reusable image/config for a group of nodes. type NodeClass struct { - Name string `yaml:"name"` - Arch string `yaml:"arch"` - OS string `yaml:"os"` - Image string `yaml:"image"` - Checksum string `yaml:"checksum,omitempty"` - K3sVersion string `yaml:"k3s_version,omitempty"` - BootloaderNote string `yaml:"bootloader_note,omitempty"` - DefaultLabels map[string]string `yaml:"default_labels,omitempty"` - DefaultTaints []string `yaml:"default_taints,omitempty"` - CloudInit string `yaml:"cloud_init,omitempty"` - BootOverlay string `yaml:"boot_overlay,omitempty"` // path to overlay files for boot partition - RootOverlay string `yaml:"root_overlay,omitempty"` // path to overlay files for rootfs + Name string `yaml:"name" json:"name"` + Arch string `yaml:"arch" json:"arch"` + OS string `yaml:"os" json:"os"` + Image string `yaml:"image" json:"image"` + Checksum string `yaml:"checksum,omitempty" json:"checksum,omitempty"` + K3sVersion string `yaml:"k3s_version,omitempty" json:"k3s_version,omitempty"` + BootloaderNote string `yaml:"bootloader_note,omitempty" json:"bootloader_note,omitempty"` + DefaultLabels map[string]string `yaml:"default_labels,omitempty" json:"default_labels,omitempty"` + DefaultTaints []string `yaml:"default_taints,omitempty" json:"default_taints,omitempty"` + CloudInit string `yaml:"cloud_init,omitempty" json:"cloud_init,omitempty"` + BootOverlay string `yaml:"boot_overlay,omitempty" json:"boot_overlay,omitempty"` // path to overlay files for boot partition + RootOverlay string `yaml:"root_overlay,omitempty" json:"root_overlay,omitempty"` // path to overlay files for rootfs } // NodeSpec captures per-node overrides and identity. type NodeSpec struct { - Name string `yaml:"name"` - Class string `yaml:"class"` - Hostname string `yaml:"hostname"` - IP string `yaml:"ip"` - MAC string `yaml:"mac,omitempty"` - K3sRole string `yaml:"k3s_role"` - K3sVersion string `yaml:"k3s_version,omitempty"` - K3sToken string `yaml:"k3s_token,omitempty"` - K3sURL string `yaml:"k3s_url,omitempty"` - Labels map[string]string `yaml:"labels,omitempty"` - Taints []string `yaml:"taints,omitempty"` - LonghornDisks []LonghornDisk `yaml:"longhorn_disks,omitempty"` - SSHUser string `yaml:"ssh_user,omitempty"` - SSHAuthorized []string `yaml:"ssh_authorized_keys,omitempty"` - Notes string `yaml:"notes,omitempty"` + Name string `yaml:"name" json:"name"` + Class string `yaml:"class" json:"class"` + Hostname string `yaml:"hostname" json:"hostname"` + IP string `yaml:"ip" json:"ip"` + MAC string `yaml:"mac,omitempty" json:"mac,omitempty"` + K3sRole string `yaml:"k3s_role" json:"k3s_role"` + K3sVersion string `yaml:"k3s_version,omitempty" json:"k3s_version,omitempty"` + K3sToken string `yaml:"k3s_token,omitempty" json:"k3s_token,omitempty"` + K3sURL string `yaml:"k3s_url,omitempty" json:"k3s_url,omitempty"` + Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"` + Taints []string `yaml:"taints,omitempty" json:"taints,omitempty"` + LonghornDisks []LonghornDisk `yaml:"longhorn_disks,omitempty" json:"longhorn_disks,omitempty"` + SSHUser string `yaml:"ssh_user,omitempty" json:"ssh_user,omitempty"` + SSHAuthorized []string `yaml:"ssh_authorized_keys,omitempty" json:"ssh_authorized_keys,omitempty"` + Notes string `yaml:"notes,omitempty" json:"notes,omitempty"` } // LonghornDisk describes an attached disk to mount for Longhorn. type LonghornDisk struct { - Mountpoint string `yaml:"mountpoint"` - UUID string `yaml:"uuid"` - FS string `yaml:"fs,omitempty"` + Mountpoint string `yaml:"mountpoint" json:"mountpoint"` + UUID string `yaml:"uuid" json:"uuid"` + FS string `yaml:"fs,omitempty" json:"fs,omitempty"` } // Load reads and parses an inventory file. diff --git a/pkg/service/app.go b/pkg/service/app.go index 292ca94..7c9f5a3 100644 --- a/pkg/service/app.go +++ b/pkg/service/app.go @@ -86,10 +86,13 @@ type SnapshotRecord struct { type PageState struct { LocalHost string `json:"local_host"` DefaultFlashHost string `json:"default_flash_host"` + SelectedHost string `json:"selected_host"` FlashHosts []string `json:"flash_hosts"` Nodes []inventory.NodeSpec `json:"nodes"` Jobs []*Job `json:"jobs"` Devices []Device `json:"devices"` + PreferredDevice string `json:"preferred_device,omitempty"` + DeviceError string `json:"device_error,omitempty"` Events []Event `json:"events"` Snapshots []SnapshotRecord `json:"snapshots"` Targets map[string]facts.Targets `json:"targets"` @@ -145,6 +148,9 @@ func NewApp(settings Settings) (*App, error) { // State returns the current UI/API snapshot. func (a *App) State(deviceHost string) PageState { + if strings.TrimSpace(deviceHost) == "" { + deviceHost = a.settings.DefaultFlashHost + } a.mu.RLock() jobs := make([]*Job, 0, len(a.jobs)) for _, job := range a.jobs { @@ -169,14 +175,19 @@ func (a *App) State(deviceHost string) PageState { return snaps[i].Node < snaps[j].Node }) - devices, _ := a.ListDevices(deviceHost) + flashHosts := a.flashHosts() + devices, deviceErr := a.ListDevices(deviceHost) + preferredDevice := preferredDevice(devices) return PageState{ LocalHost: a.settings.LocalHost, DefaultFlashHost: a.settings.DefaultFlashHost, - FlashHosts: append([]string{}, a.settings.FlashHosts...), + SelectedHost: deviceHost, + FlashHosts: flashHosts, Nodes: append([]inventory.NodeSpec{}, a.inventory.Nodes...), Jobs: jobs, Devices: devices, + PreferredDevice: preferredDevice, + DeviceError: errorString(deviceErr), Events: a.recentEvents(40), Snapshots: snaps, Targets: aTargets, @@ -199,13 +210,10 @@ func (a *App) Replace(node, host, device string) (*Job, error) { if host == "" { host = a.settings.DefaultFlashHost } - if host != a.settings.LocalHost && host != a.settings.DefaultFlashHost { - return nil, fmt.Errorf("flash host %s is not available on this Metis instance", host) - } if _, _, err := a.inventory.FindNode(node); err != nil { return nil, err } - if _, err := a.ensureDevice(device); err != nil { + if _, err := a.ensureDevice(host, device); err != nil { return nil, err } job := a.newJob("replace", node, host, device) @@ -299,8 +307,8 @@ func (a *App) ListDevices(host string) ([]Device, error) { if host == "" { host = a.settings.DefaultFlashHost } - if host != a.settings.LocalHost && host != a.settings.DefaultFlashHost { - return nil, fmt.Errorf("flash host %s is not attached to this Metis instance", host) + if !a.supportsLocalMedia(host) { + return nil, fmt.Errorf("flash host %s is listed for planning, but this Metis instance only has direct removable-media access on %s", host, a.settings.LocalHost) } cmd := exec.Command("lsblk", "-J", "-b", "-o", "NAME,PATH,RM,HOTPLUG,SIZE,MODEL,TRAN,TYPE") out, err := cmd.Output() @@ -351,7 +359,17 @@ func (a *App) ListDevices(host string) ([]Device, error) { SizeBytes: size, }) } - sort.Slice(devices, func(i, j int) bool { return devices[i].Path < devices[j].Path }) + sort.Slice(devices, func(i, j int) bool { + left := deviceScore(devices[i]) + right := deviceScore(devices[j]) + if left != right { + return left > right + } + if devices[i].SizeBytes != devices[j].SizeBytes { + return devices[i].SizeBytes < devices[j].SizeBytes + } + return devices[i].Path < devices[j].Path + }) return devices, nil } @@ -443,7 +461,7 @@ func (a *App) runBuild(job *Job, flash bool) { j.ProgressPct = 78 j.Artifact = output }) - if _, err := a.ensureDevice(job.Device); err != nil { + if _, err := a.ensureDevice(job.Host, job.Device); err != nil { a.failJob(job.ID, err) a.metrics.RecordFlash(job.Node, job.Host, "error") return @@ -502,8 +520,11 @@ func (a *App) flashArtifact(jobID, artifact string) error { return err } -func (a *App) ensureDevice(path string) (*Device, error) { - devices, err := a.ListDevices(a.settings.DefaultFlashHost) +func (a *App) ensureDevice(host, path string) (*Device, error) { + if strings.TrimSpace(path) == "" { + return nil, fmt.Errorf("select removable media before starting a flash run") + } + devices, err := a.ListDevices(host) if err != nil { return nil, err } @@ -517,14 +538,14 @@ func (a *App) ensureDevice(path string) (*Device, error) { func (a *App) newJob(kind, node, host, device string) *Job { job := &Job{ - ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()), - Kind: kind, - Node: node, - Host: host, - Device: device, - Status: JobQueued, + ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()), + Kind: kind, + Node: node, + Host: host, + Device: device, + Status: JobQueued, ProgressPct: 0, - StartedAt: time.Now().UTC(), + StartedAt: time.Now().UTC(), } a.mu.Lock() a.jobs[job.ID] = job @@ -628,6 +649,32 @@ func (a *App) artifactPath(node string) string { return filepath.Join(a.settings.ArtifactDir, fmt.Sprintf("%s.img", node)) } +func (a *App) flashHosts() []string { + hosts := map[string]struct{}{} + for _, host := range a.settings.FlashHosts { + if value := strings.TrimSpace(host); value != "" { + hosts[value] = struct{}{} + } + } + for _, host := range []string{a.settings.DefaultFlashHost, a.settings.LocalHost} { + if value := strings.TrimSpace(host); value != "" { + hosts[value] = struct{}{} + } + } + for _, host := range clusterNodeNames() { + hosts[host] = struct{}{} + } + out := make([]string, 0, len(hosts)) + for host := range hosts { + out = append(out, host) + } + sort.Strings(out) + if a.settings.DefaultFlashHost == "" { + return out + } + return moveToFront(out, a.settings.DefaultFlashHost) +} + func (a *App) loadSnapshots() error { data, err := os.ReadFile(a.settings.SnapshotsPath) if err != nil { @@ -742,6 +789,67 @@ func firstLine(value string) string { return value } +func preferredDevice(devices []Device) string { + if len(devices) == 0 { + return "" + } + return devices[0].Path +} + +func errorString(err error) string { + if err == nil { + return "" + } + return err.Error() +} + +func (a *App) supportsLocalMedia(host string) bool { + host = strings.TrimSpace(host) + return host == "" || host == a.settings.LocalHost || host == a.settings.DefaultFlashHost +} + +func deviceScore(device Device) int { + score := 0 + model := strings.ToLower(strings.TrimSpace(device.Model)) + switch { + case strings.Contains(model, "microsd"), strings.Contains(model, "micro sd"): + score += 60 + case strings.Contains(model, "sdxc"), strings.Contains(model, "sdhc"), strings.Contains(model, "sd "): + score += 50 + case strings.Contains(model, "card"), strings.Contains(model, "reader"): + score += 40 + } + if device.Removable { + score += 20 + } + if device.Hotplug { + score += 10 + } + if device.Transport == "usb" { + score += 5 + } + if strings.HasPrefix(device.Name, "mmcblk") { + score += 25 + } + return score +} + +func moveToFront(values []string, preferred string) []string { + if preferred == "" || len(values) < 2 { + return values + } + out := append([]string{}, values...) + for idx, value := range out { + if value != preferred { + continue + } + copy(out[1:idx+1], out[:idx]) + out[0] = preferred + return out + } + return out +} + func deleteNodeObject(node string) error { if err := deleteNodeObjectInCluster(node); err == nil { return nil @@ -793,3 +901,60 @@ func deleteNodeObjectInCluster(node string) error { body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) return fmt.Errorf("delete node %s failed: %s: %s", node, resp.Status, strings.TrimSpace(string(body))) } + +func clusterNodeNames() []string { + host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST")) + port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT")) + if host == "" || port == "" { + return nil + } + token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return nil + } + caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + if err != nil { + return nil + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil + } + client := &http.Client{ + Timeout: 10 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{RootCAs: pool}, + }, + } + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("https://%s:%s/api/v1/nodes", host, port), nil) + if err != nil { + return nil + } + req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token))) + resp, err := client.Do(req) + if err != nil { + return nil + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil + } + var payload struct { + Items []struct { + Metadata struct { + Name string `json:"name"` + } `json:"metadata"` + } `json:"items"` + } + if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&payload); err != nil { + return nil + } + names := make([]string, 0, len(payload.Items)) + for _, item := range payload.Items { + if name := strings.TrimSpace(item.Metadata.Name); name != "" { + names = append(names, name) + } + } + sort.Strings(names) + return names +} diff --git a/pkg/service/server.go b/pkg/service/server.go index 0a731a2..9f794c0 100644 --- a/pkg/service/server.go +++ b/pkg/service/server.go @@ -161,15 +161,9 @@ func (a *App) withUIAuth(next http.HandlerFunc) http.HandlerFunc { func (a *App) authorize(r *http.Request) (userContext, bool) { user := firstNonEmptyHeader(r, "X-Auth-Request-User", "X-Forwarded-User", "X-Auth-Request-Email", "X-Forwarded-Email") - if user == "" { - return userContext{}, false - } groups := splitHeaderList(firstNonEmptyHeader(r, "X-Auth-Request-Groups", "X-Forwarded-Groups")) - normalizedUser := normalizeUserValue(user) - for _, allowedUser := range a.settings.AllowedUsers { - if normalizeUserValue(allowedUser) == normalizedUser { - return userContext{Name: user, Groups: groups}, true - } + if len(groups) == 0 { + return userContext{Name: user, Groups: groups}, false } for _, group := range groups { for _, allowed := range a.settings.AllowedGroups { @@ -205,10 +199,6 @@ func splitHeaderList(raw string) []string { return out } -func normalizeUserValue(raw string) string { - return strings.ToLower(strings.TrimSpace(raw)) -} - func normalizeGroupValue(raw string) string { value := strings.ToLower(strings.TrimSpace(raw)) return strings.TrimPrefix(value, "/") @@ -243,17 +233,21 @@ var metisPage = template.Must(template.New("metis").Parse(` Metis Control @@ -457,6 +493,13 @@ var metisPage = template.Must(template.New("metis").Parse(`
Default flash host: {{.State.DefaultFlashHost}}
+ +
@@ -473,6 +516,8 @@ var metisPage = template.Must(template.New("metis").Parse(`
+
+
@@ -507,8 +552,12 @@ var metisPage = template.Must(template.New("metis").Parse(`
-

Recent Changes

-

This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.

+
+
+

Recent Changes

+

This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.

+
+
@@ -518,6 +567,8 @@ var metisPage = template.Must(template.New("metis").Parse(` `)) diff --git a/pkg/service/server_test.go b/pkg/service/server_test.go index 8629109..50fef26 100644 --- a/pkg/service/server_test.go +++ b/pkg/service/server_test.go @@ -50,18 +50,35 @@ func TestUIAuthAcceptsForwardedSlashGroups(t *testing.T) { } } -func TestUIAuthAcceptsForwardedEmailForAllowedUser(t *testing.T) { +func TestUIAuthRejectsUserWithoutAllowedGroup(t *testing.T) { app := newTestApp(t) - app.settings.AllowedUsers = []string{"brad.stein@gmail.com"} handler := app.Handler() req := httptest.NewRequest(http.MethodGet, "/api/state", nil) req.Header.Set("X-Forwarded-Email", "Brad.Stein@gmail.com") resp := httptest.NewRecorder() handler.ServeHTTP(resp, req) + if resp.Code != http.StatusForbidden { + t.Fatalf("expected forbidden, got %d: %s", resp.Code, resp.Body.String()) + } +} + +func TestStateJSONUsesLowerCaseNodeFields(t *testing.T) { + app := newTestApp(t) + handler := app.Handler() + + req := httptest.NewRequest(http.MethodGet, "/api/state", nil) + req.Header.Set("X-Auth-Request-User", "brad") + req.Header.Set("X-Auth-Request-Groups", "admin") + resp := httptest.NewRecorder() + handler.ServeHTTP(resp, req) if resp.Code != http.StatusOK { t.Fatalf("expected ok, got %d: %s", resp.Code, resp.Body.String()) } + body := resp.Body.String() + if !strings.Contains(body, `"name":"titan-15"`) { + t.Fatalf("expected lowercase node name field in json, got %s", body) + } } func TestInternalSnapshotAndWatch(t *testing.T) { @@ -155,7 +172,7 @@ nodes: if err := app.StoreSnapshot(SnapshotRecord{ Node: "titan-17", CollectedAt: time.Now().UTC().Add(-10 * time.Minute), - Snapshot: sentinelSnapshot("titan-17", "6.6.63"), + Snapshot: sentinelSnapshot("titan-17", "6.6.63"), }); err != nil { t.Fatalf("seed snapshot: %v", err) } diff --git a/pkg/service/settings.go b/pkg/service/settings.go index b671ff1..78e58f6 100644 --- a/pkg/service/settings.go +++ b/pkg/service/settings.go @@ -19,7 +19,6 @@ type Settings struct { DefaultFlashHost string FlashHosts []string LocalHost string - AllowedUsers []string AllowedGroups []string MaxDeviceBytes int64 } @@ -41,7 +40,6 @@ func FromEnv() Settings { DefaultFlashHost: defaultFlashHost, FlashHosts: flashHosts, LocalHost: localHost, - AllowedUsers: splitList(getenvDefault("METIS_ALLOWED_USERS", "")), AllowedGroups: splitList(getenvDefault("METIS_ALLOWED_GROUPS", "admin,maintainer")), MaxDeviceBytes: getenvInt64("METIS_MAX_DEVICE_BYTES", 300000000000), }