diff --git a/overlays/rpi4-armbian-longhorn-root/usr/local/sbin/metis-rpi4-longhorn-firstboot.sh b/overlays/rpi4-armbian-longhorn-root/usr/local/sbin/metis-rpi4-longhorn-firstboot.sh index 43a1f00..c1bd093 100755 --- a/overlays/rpi4-armbian-longhorn-root/usr/local/sbin/metis-rpi4-longhorn-firstboot.sh +++ b/overlays/rpi4-armbian-longhorn-root/usr/local/sbin/metis-rpi4-longhorn-firstboot.sh @@ -5,6 +5,7 @@ marker="/var/lib/metis/rpi4-longhorn-firstboot.done" env_file="/etc/metis/firstboot.env" key_file="/etc/metis/authorized_keys" fstab_append="/etc/metis/fstab.append" +sudoers_file="/etc/metis/sudoers-hecate" default_groups=(tty disk dialout sudo audio video plugdev games users systemd-journal input render netdev) exec > >(tee -a /var/log/metis-rpi4-longhorn-firstboot.log) 2>&1 @@ -49,15 +50,16 @@ if command -v nmcli >/dev/null 2>&1; then retry_cmd 10 sh -c 'nmcli general status >/dev/null 2>&1' nmcli connection reload || true while IFS=: read -r name type device; do - [ "${device}" = "end0" ] || continue + [ "${device}" = "end0" ] || [ "${device}" = "eth0" ] || continue [ "${name}" = "end0-static" ] && continue + [ "${name}" = "eth0-static" ] && continue case "${type}" in ethernet|802-3-ethernet) nmcli connection modify "${name}" connection.autoconnect no || true ;; esac done < <(nmcli -t -f NAME,TYPE,DEVICE connection show 2>/dev/null || true) - nmcli connection up end0-static || true + nmcli connection up end0-static || nmcli connection up eth0-static || true elif [ -f /etc/systemd/network/10-end0-static.network ]; then systemctl enable systemd-networkd.service || true systemctl restart systemd-networkd.service || true @@ -83,11 +85,20 @@ fi if ! command -v iscsiadm >/dev/null 2>&1; then packages+=("open-iscsi") fi +if ! command -v iptables >/dev/null 2>&1; then + packages+=("iptables") +fi if [ "${#packages[@]}" -gt 0 ]; then export DEBIAN_FRONTEND=noninteractive - ensure_network_access - retry_cmd 5 apt-get update - retry_cmd 5 apt-get install -y --no-install-recommends "${packages[@]}" + if ensure_network_access; then + if ! retry_cmd 5 apt-get update; then + echo "WARN: apt-get update failed; continuing without package install." + elif ! retry_cmd 5 apt-get install -y --no-install-recommends "${packages[@]}"; then + echo "WARN: apt-get install failed for: ${packages[*]}; continuing." + fi + else + echo "WARN: outbound network check failed; skipping package install for: ${packages[*]}" + fi fi systemctl daemon-reload @@ -132,6 +143,17 @@ if [ -s "${key_file}" ]; then fi fi +if [ -s "${sudoers_file}" ]; then + install -d -m 755 /etc/sudoers.d + install -m 440 "${sudoers_file}" /etc/sudoers.d/90-hecate-atlas + if command -v visudo >/dev/null 2>&1; then + if ! visudo -cf /etc/sudoers.d/90-hecate-atlas >/dev/null 2>&1; then + echo "WARN: invalid /etc/sudoers.d/90-hecate-atlas generated by metis; removing it." + rm -f /etc/sudoers.d/90-hecate-atlas + fi + fi +fi + rm -f /root/.not_logged_in_yet if ! command -v k3s >/dev/null 2>&1; then diff --git a/pkg/plan/inject.go b/pkg/plan/inject.go index a035f3c..7633d5d 100644 --- a/pkg/plan/inject.go +++ b/pkg/plan/inject.go @@ -116,7 +116,13 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File if cfg.IP != "" { files = append(files, inject.FileSpec{ Path: "etc/NetworkManager/system-connections/end0-static.nmconnection", - Content: []byte(networkManagerConnectionContent(cfg.IP)), + Content: []byte(networkManagerConnectionContent("end0-static", "end0", cfg.IP)), + Mode: 0o600, + RootFS: true, + }) + files = append(files, inject.FileSpec{ + Path: "etc/NetworkManager/system-connections/eth0-static.nmconnection", + Content: []byte(networkManagerConnectionContent("eth0-static", "eth0", cfg.IP)), Mode: 0o600, RootFS: true, }) @@ -142,6 +148,21 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File RootFS: true, }) } + if cfg.SSHUser == "atlas" { + sudoers := hecateSudoersContent(cfg.SSHUser) + files = append(files, inject.FileSpec{ + Path: "etc/sudoers.d/90-hecate-atlas", + Content: []byte(sudoers), + Mode: 0o440, + RootFS: true, + }) + files = append(files, inject.FileSpec{ + Path: "etc/metis/sudoers-hecate", + Content: []byte(sudoers), + Mode: 0o440, + RootFS: true, + }) + } if len(cfg.Fstab) > 0 { files = append(files, inject.FileSpec{ Path: "etc/metis/fstab.append", @@ -201,6 +222,9 @@ func hostsContent(hostname string) string { func k3sConfigContent(cfg *config.NodeConfig) string { var labelList []string for k, v := range cfg.Labels { + if !allowK3sNodeLabel(cfg.K3s.Role, k) { + continue + } labelList = append(labelList, fmt.Sprintf("%s=%s", k, v)) } sort.Strings(labelList) @@ -208,7 +232,9 @@ func k3sConfigContent(cfg *config.NodeConfig) string { sort.Strings(taints) var b bytes.Buffer - b.WriteString("write-kubeconfig-mode: \"0644\"\n") + if cfg.K3s.Role != "agent" { + b.WriteString("write-kubeconfig-mode: \"0644\"\n") + } if cfg.K3s.URL != "" { b.WriteString(fmt.Sprintf("server: %s\n", cfg.K3s.URL)) } @@ -234,6 +260,13 @@ func k3sConfigContent(cfg *config.NodeConfig) string { return b.String() } +func allowK3sNodeLabel(role, key string) bool { + if role != "agent" { + return true + } + return !strings.HasPrefix(key, "node-role.kubernetes.io/") +} + func cloudInitUserData(cfg *config.NodeConfig, sec *secrets.NodeSecrets) string { if cfg == nil { return "" @@ -261,15 +294,15 @@ func firstbootEnvContent(cfg *config.NodeConfig) string { return b.String() } -func networkManagerConnectionContent(ip string) string { +func networkManagerConnectionContent(id, iface, ip string) string { gateway := ip if lastDot := strings.LastIndex(gateway, "."); lastDot >= 0 { gateway = gateway[:lastDot+1] + "1" } return fmt.Sprintf(`[connection] -id=end0-static +id=%s type=ethernet -interface-name=end0 +interface-name=%s autoconnect=true autoconnect-priority=100 @@ -286,7 +319,7 @@ may-fail=false method=ignore [proxy] -`, ip, gateway, gateway) +`, id, iface, ip, gateway, gateway) } func systemdNetworkContent(ip string) string { @@ -295,7 +328,7 @@ func systemdNetworkContent(ip string) string { gateway = gateway[:lastDot+1] + "1" } return fmt.Sprintf(`[Match] -Name=end0 +Name=end0 eth0 [Network] Address=%s/24 @@ -323,6 +356,10 @@ func fstabAppendContent(cfg *config.NodeConfig) string { return strings.Join(lines, "\n") + "\n" } +func hecateSudoersContent(user string) string { + return fmt.Sprintf("%s ALL=(ALL) NOPASSWD: /usr/bin/systemctl, /usr/sbin/poweroff, /sbin/poweroff, /usr/local/bin/hecate\n", user) +} + func shellQuote(value string) string { if value == "" { return "''" diff --git a/pkg/plan/inject_test.go b/pkg/plan/inject_test.go index 0e79de5..df67551 100644 --- a/pkg/plan/inject_test.go +++ b/pkg/plan/inject_test.go @@ -17,6 +17,12 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) { IP: "10.0.0.10", SSHUser: "pi", SSHKeys: []string{"ssh-rsa AAA"}, + K3s: config.K3sConfig{ + Role: "agent", + URL: "https://server:6443", + Token: "secret", + Version: "v1.31.5+k3s1", + }, Fstab: []config.FstabEntry{ { UUID: "disk-uuid", @@ -25,13 +31,8 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) { Options: "defaults,nofail", }, }, - Labels: map[string]string{"role": "worker", "zone": "a"}, + Labels: map[string]string{"role": "worker", "zone": "a", "node-role.kubernetes.io/worker": "true"}, Taints: []string{"gpu=true:NoSchedule"}, - K3s: config.K3sConfig{ - URL: "https://server:6443", - Token: "secret", - Version: "v1.31.5+k3s1", - }, } files, err := buildFiles(cfg, nil) if err != nil { @@ -48,6 +49,12 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) { if !strings.Contains(k3s, "server: https://server:6443") || !strings.Contains(k3s, "node-name: n1") { t.Fatalf("unexpected k3s config: %s", k3s) } + if strings.Contains(k3s, "write-kubeconfig-mode") { + t.Fatalf("agent config should not include write-kubeconfig-mode: %s", k3s) + } + if strings.Contains(k3s, "node-role.kubernetes.io/worker") { + t.Fatalf("agent config should skip reserved node-role label: %s", k3s) + } hostFile, ok := pathMap["etc/hostname"] if !ok || strings.TrimSpace(hostFile) != "n1" { t.Fatalf("hostname file missing/incorrect: %q", hostFile) @@ -64,8 +71,12 @@ func TestBuildFilesProducesK3sConfig(t *testing.T) { if !ok || !strings.Contains(network, "address1=10.0.0.10/24,10.0.0.1") { t.Fatalf("networkmanager config missing/incorrect: %s", network) } + networkEth0, ok := pathMap["etc/NetworkManager/system-connections/eth0-static.nmconnection"] + if !ok || !strings.Contains(networkEth0, "interface-name=eth0") { + t.Fatalf("eth0 networkmanager config missing/incorrect: %s", networkEth0) + } networkd, ok := pathMap["etc/systemd/network/10-end0-static.network"] - if !ok || !strings.Contains(networkd, "Address=10.0.0.10/24") || !strings.Contains(networkd, "Gateway=10.0.0.1") { + if !ok || !strings.Contains(networkd, "Name=end0 eth0") || !strings.Contains(networkd, "Address=10.0.0.10/24") || !strings.Contains(networkd, "Gateway=10.0.0.1") { t.Fatalf("systemd-networkd config missing/incorrect: %s", networkd) } fstab, ok := pathMap["etc/metis/fstab.append"] @@ -123,3 +134,31 @@ func TestSecretsWrite(t *testing.T) { t.Fatalf("secrets file not written") } } + +func TestBuildFilesAddsHecateSudoersForAtlas(t *testing.T) { + cfg := &config.NodeConfig{ + Hostname: "n1", + IP: "10.0.0.10", + SSHUser: "atlas", + SSHKeys: []string{"ssh-ed25519 AAA test"}, + K3s: config.K3sConfig{ + Role: "agent", + }, + } + files, err := buildFiles(cfg, nil) + if err != nil { + t.Fatalf("buildFiles: %v", err) + } + pathMap := map[string]string{} + for _, f := range files { + pathMap[f.Path] = string(f.Content) + } + sudoers, ok := pathMap["etc/sudoers.d/90-hecate-atlas"] + if !ok || !strings.Contains(sudoers, "atlas ALL=(ALL) NOPASSWD: /usr/bin/systemctl") { + t.Fatalf("sudoers file missing/incorrect: %s", sudoers) + } + backup, ok := pathMap["etc/metis/sudoers-hecate"] + if !ok || backup != sudoers { + t.Fatalf("metis sudoers backup missing/incorrect: %s", backup) + } +}