recovery(metis): restore node identity on rebuilt images
This commit is contained in:
parent
ebaa367efd
commit
17069e4677
@ -154,6 +154,10 @@ if [ -s "${sudoers_file}" ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -x /usr/local/sbin/metis-apply-node-identity.sh ]; then
|
||||||
|
/usr/local/sbin/metis-apply-node-identity.sh || true
|
||||||
|
fi
|
||||||
|
|
||||||
rm -f /root/.not_logged_in_yet
|
rm -f /root/.not_logged_in_yet
|
||||||
|
|
||||||
if ! command -v k3s >/dev/null 2>&1; then
|
if ! command -v k3s >/dev/null 2>&1; then
|
||||||
|
|||||||
@ -2,8 +2,6 @@ package plan
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -64,6 +62,7 @@ func Files(inv *inventory.Inventory, nodeName string) ([]inject.FileSpec, error)
|
|||||||
cfg.Secrets = sec.Extra
|
cfg.Secrets = sec.Extra
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
applyNodeMetadataEnv(cfg)
|
||||||
files, err := buildFiles(cfg, sec)
|
files, err := buildFiles(cfg, sec)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -111,7 +110,9 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File
|
|||||||
{Path: "etc/hostname", Content: []byte(cfg.Hostname + "\n"), Mode: 0o644, RootFS: true},
|
{Path: "etc/hostname", Content: []byte(cfg.Hostname + "\n"), Mode: 0o644, RootFS: true},
|
||||||
{Path: "etc/hosts", Content: []byte(hostsContent(cfg.Hostname)), Mode: 0o644, RootFS: true},
|
{Path: "etc/hosts", Content: []byte(hostsContent(cfg.Hostname)), Mode: 0o644, RootFS: true},
|
||||||
{Path: "etc/rancher/k3s/config.yaml", Content: []byte(k3sConfigContent(cfg)), Mode: 0o644, RootFS: true},
|
{Path: "etc/rancher/k3s/config.yaml", Content: []byte(k3sConfigContent(cfg)), Mode: 0o644, RootFS: true},
|
||||||
{Path: "etc/metis/firstboot.env", Content: []byte(firstbootEnvContent(cfg)), Mode: 0o600, RootFS: true},
|
{Path: "etc/metis/firstboot.env", Content: []byte(firstbootEnvContent(cfg, sec)), Mode: 0o600, RootFS: true},
|
||||||
|
{Path: "usr/local/sbin/metis-apply-node-identity.sh", Content: []byte(nodeIdentityScriptContent()), Mode: 0o755, RootFS: true},
|
||||||
|
{Path: "etc/cloud/cloud.cfg.d/90-metis-recovery.cfg", Content: []byte(cloudInitRootFSContent(sec)), Mode: 0o644, RootFS: true},
|
||||||
}
|
}
|
||||||
if cfg.IP != "" {
|
if cfg.IP != "" {
|
||||||
files = append(files, inject.FileSpec{
|
files = append(files, inject.FileSpec{
|
||||||
@ -148,6 +149,14 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File
|
|||||||
RootFS: true,
|
RootFS: true,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
if passwordAuth := sshPasswordConfigContent(sec); passwordAuth != "" {
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/ssh/sshd_config.d/90-metis-password-auth.conf",
|
||||||
|
Content: []byte(passwordAuth),
|
||||||
|
Mode: 0o644,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
if cfg.SSHUser == "atlas" {
|
if cfg.SSHUser == "atlas" {
|
||||||
sudoers := hecateSudoersContent(cfg.SSHUser)
|
sudoers := hecateSudoersContent(cfg.SSHUser)
|
||||||
files = append(files, inject.FileSpec{
|
files = append(files, inject.FileSpec{
|
||||||
@ -172,8 +181,7 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the raw config for debugging/ops.
|
raw, err := jsonMarshalIndent(cfg)
|
||||||
raw, err := json.MarshalIndent(cfg, "", " ")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -184,7 +192,7 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File
|
|||||||
RootFS: true,
|
RootFS: true,
|
||||||
})
|
})
|
||||||
if sec != nil {
|
if sec != nil {
|
||||||
secRaw, err := json.MarshalIndent(sec, "", " ")
|
secRaw, err := jsonMarshalIndent(redactedSecretsForImage(sec))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -196,7 +204,6 @@ func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.File
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optional cloud-init for images that honor NoCloud.
|
|
||||||
userData := cloudInitUserData(cfg, sec)
|
userData := cloudInitUserData(cfg, sec)
|
||||||
if userData != "" {
|
if userData != "" {
|
||||||
files = append(files, inject.FileSpec{
|
files = append(files, inject.FileSpec{
|
||||||
@ -267,33 +274,6 @@ func allowK3sNodeLabel(role, key string) bool {
|
|||||||
return !strings.HasPrefix(key, "node-role.kubernetes.io/")
|
return !strings.HasPrefix(key, "node-role.kubernetes.io/")
|
||||||
}
|
}
|
||||||
|
|
||||||
func cloudInitUserData(cfg *config.NodeConfig, sec *secrets.NodeSecrets) string {
|
|
||||||
if cfg == nil {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
if sec != nil && sec.CloudInit != "" {
|
|
||||||
return sec.CloudInit
|
|
||||||
}
|
|
||||||
var b bytes.Buffer
|
|
||||||
b.WriteString("#cloud-config\n")
|
|
||||||
b.WriteString(fmt.Sprintf("hostname: %s\n", cfg.Hostname))
|
|
||||||
if len(cfg.SSHKeys) > 0 {
|
|
||||||
b.WriteString("ssh_authorized_keys:\n")
|
|
||||||
for _, k := range cfg.SSHKeys {
|
|
||||||
b.WriteString(fmt.Sprintf(" - %s\n", k))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return b.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
func firstbootEnvContent(cfg *config.NodeConfig) string {
|
|
||||||
var b bytes.Buffer
|
|
||||||
b.WriteString(fmt.Sprintf("METIS_HOSTNAME=%s\n", shellQuote(cfg.Hostname)))
|
|
||||||
b.WriteString(fmt.Sprintf("METIS_SSH_USER=%s\n", shellQuote(cfg.SSHUser)))
|
|
||||||
b.WriteString(fmt.Sprintf("METIS_K3S_VERSION=%s\n", shellQuote(cfg.K3s.Version)))
|
|
||||||
return b.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
func networkManagerConnectionContent(id, iface, ip string) string {
|
func networkManagerConnectionContent(id, iface, ip string) string {
|
||||||
gateway := ip
|
gateway := ip
|
||||||
if lastDot := strings.LastIndex(gateway, "."); lastDot >= 0 {
|
if lastDot := strings.LastIndex(gateway, "."); lastDot >= 0 {
|
||||||
@ -347,7 +327,6 @@ func fstabAppendContent(cfg *config.NodeConfig) string {
|
|||||||
source := entry.Source
|
source := entry.Source
|
||||||
switch {
|
switch {
|
||||||
case source != "":
|
case source != "":
|
||||||
// Use the explicit source path for bind mounts.
|
|
||||||
case entry.UUID != "":
|
case entry.UUID != "":
|
||||||
source = "UUID=" + entry.UUID
|
source = "UUID=" + entry.UUID
|
||||||
case entry.Label != "":
|
case entry.Label != "":
|
||||||
@ -374,25 +353,6 @@ func hecateSudoersContent(user string) string {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func shellQuote(value string) string {
|
|
||||||
if value == "" {
|
|
||||||
return "''"
|
|
||||||
}
|
|
||||||
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchSecrets(hostname string) *secrets.NodeSecrets {
|
|
||||||
if os.Getenv("VAULT_ADDR") == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
cli := secrets.NewFromEnv()
|
|
||||||
sec, err := cli.FetchNode(context.Background(), hostname)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return sec
|
|
||||||
}
|
|
||||||
|
|
||||||
func collectOverlays(class *inventory.NodeClass) ([]inject.FileSpec, error) {
|
func collectOverlays(class *inventory.NodeClass) ([]inject.FileSpec, error) {
|
||||||
var files []inject.FileSpec
|
var files []inject.FileSpec
|
||||||
if class == nil {
|
if class == nil {
|
||||||
|
|||||||
@ -174,3 +174,80 @@ func TestBuildFilesAddsHecateSudoersForAtlas(t *testing.T) {
|
|||||||
t.Fatalf("metis sudoers backup missing/incorrect: %s", backup)
|
t.Fatalf("metis sudoers backup missing/incorrect: %s", backup)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildFilesAddsPasswordArtifactsAndRedactsSecrets(t *testing.T) {
|
||||||
|
cfg := &config.NodeConfig{
|
||||||
|
Hostname: "titan-15",
|
||||||
|
IP: "192.168.22.43",
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHKeys: []string{"ssh-ed25519 AAA test"},
|
||||||
|
K3s: config.K3sConfig{
|
||||||
|
Role: "agent",
|
||||||
|
Version: "v1.31.5+k3s1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
sec := &secrets.NodeSecrets{
|
||||||
|
SSHPassword: "atlas-pass",
|
||||||
|
RootPassword: "root-pass",
|
||||||
|
K3sToken: "super-secret-token",
|
||||||
|
Extra: map[string]string{"api_key": "secret"},
|
||||||
|
}
|
||||||
|
files, err := buildFiles(cfg, sec)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("buildFiles: %v", err)
|
||||||
|
}
|
||||||
|
pathMap := map[string]string{}
|
||||||
|
for _, file := range files {
|
||||||
|
pathMap[file.Path] = string(file.Content)
|
||||||
|
}
|
||||||
|
firstboot := pathMap["etc/metis/firstboot.env"]
|
||||||
|
if !strings.Contains(firstboot, "METIS_ATLAS_PASSWORD='atlas-pass'") || !strings.Contains(firstboot, "METIS_ROOT_PASSWORD='root-pass'") {
|
||||||
|
t.Fatalf("firstboot env missing password material: %s", firstboot)
|
||||||
|
}
|
||||||
|
if sshd := pathMap["etc/ssh/sshd_config.d/90-metis-password-auth.conf"]; !strings.Contains(sshd, "PasswordAuthentication yes") || !strings.Contains(sshd, "PermitRootLogin yes") {
|
||||||
|
t.Fatalf("password auth config missing: %s", sshd)
|
||||||
|
}
|
||||||
|
if script := pathMap["usr/local/sbin/metis-apply-node-identity.sh"]; !strings.Contains(script, "apply_password root") || !strings.Contains(script, "METIS_ATLAS_PASSWORD") {
|
||||||
|
t.Fatalf("node identity script missing password application: %s", script)
|
||||||
|
}
|
||||||
|
if cloudCfg := pathMap["etc/cloud/cloud.cfg.d/90-metis-recovery.cfg"]; !strings.Contains(cloudCfg, "ssh_pwauth: true") {
|
||||||
|
t.Fatalf("cloud recovery config missing ssh_pwauth: %s", cloudCfg)
|
||||||
|
}
|
||||||
|
if userData := pathMap["user-data"]; !strings.Contains(userData, "ssh_pwauth: true") || !strings.Contains(userData, "metis-apply-node-identity.sh") {
|
||||||
|
t.Fatalf("cloud-init user-data missing recovery hooks: %s", userData)
|
||||||
|
}
|
||||||
|
secretsJSON := pathMap["etc/metis/secrets.json"]
|
||||||
|
if strings.Contains(secretsJSON, "atlas-pass") || strings.Contains(secretsJSON, "root-pass") || strings.Contains(secretsJSON, "super-secret-token") {
|
||||||
|
t.Fatalf("secrets.json should be redacted: %s", secretsJSON)
|
||||||
|
}
|
||||||
|
if !strings.Contains(secretsJSON, `"has_ssh_password": true`) || !strings.Contains(secretsJSON, `"extra_keys": [`) {
|
||||||
|
t.Fatalf("secrets.json should keep redacted debug metadata: %s", secretsJSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyNodeMetadataEnv(t *testing.T) {
|
||||||
|
cfg := &config.NodeConfig{
|
||||||
|
Labels: map[string]string{"hardware": "rpi4"},
|
||||||
|
Taints: []string{"flash=true:NoSchedule"},
|
||||||
|
K3s: config.K3sConfig{
|
||||||
|
Labels: map[string]string{"hardware": "rpi4"},
|
||||||
|
Taints: []string{"flash=true:NoSchedule"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
t.Setenv("METIS_NODE_LABELS_JSON", `{"hardware":"rpi5","maintenance.bstein.dev/role":"recovery"}`)
|
||||||
|
t.Setenv("METIS_NODE_TAINTS_JSON", `["dedicated=recovery:NoSchedule","flash=true:NoSchedule"]`)
|
||||||
|
applyNodeMetadataEnv(cfg)
|
||||||
|
if cfg.Labels["hardware"] != "rpi5" || cfg.Labels["maintenance.bstein.dev/role"] != "recovery" {
|
||||||
|
t.Fatalf("applyNodeMetadataEnv labels = %#v", cfg.Labels)
|
||||||
|
}
|
||||||
|
if !strings.Contains(strings.Join(cfg.Taints, ","), "dedicated=recovery:NoSchedule") {
|
||||||
|
t.Fatalf("applyNodeMetadataEnv taints = %#v", cfg.Taints)
|
||||||
|
}
|
||||||
|
cfg = &config.NodeConfig{}
|
||||||
|
t.Setenv("METIS_NODE_LABELS_JSON", `{bad-json`)
|
||||||
|
t.Setenv("METIS_NODE_TAINTS_JSON", `{bad-json`)
|
||||||
|
applyNodeMetadataEnv(cfg)
|
||||||
|
if cfg.Labels != nil || cfg.Taints != nil {
|
||||||
|
t.Fatalf("invalid env JSON should be ignored: %#v", cfg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
262
pkg/plan/node_identity.go
Normal file
262
pkg/plan/node_identity.go
Normal file
@ -0,0 +1,262 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"metis/pkg/config"
|
||||||
|
"metis/pkg/secrets"
|
||||||
|
)
|
||||||
|
|
||||||
|
func cloudInitUserData(cfg *config.NodeConfig, sec *secrets.NodeSecrets) string {
|
||||||
|
if cfg == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if sec != nil && sec.CloudInit != "" {
|
||||||
|
return sec.CloudInit
|
||||||
|
}
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteString("#cloud-config\n")
|
||||||
|
b.WriteString(fmt.Sprintf("hostname: %s\n", cfg.Hostname))
|
||||||
|
if len(cfg.SSHKeys) > 0 {
|
||||||
|
b.WriteString("ssh_authorized_keys:\n")
|
||||||
|
for _, k := range cfg.SSHKeys {
|
||||||
|
b.WriteString(fmt.Sprintf(" - %s\n", k))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if hasNodePasswords(sec) {
|
||||||
|
b.WriteString("ssh_pwauth: true\n")
|
||||||
|
b.WriteString("disable_root: false\n")
|
||||||
|
}
|
||||||
|
b.WriteString("runcmd:\n")
|
||||||
|
b.WriteString(" - [bash, -lc, \"/usr/local/sbin/metis-apply-node-identity.sh\"]\n")
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstbootEnvContent(cfg *config.NodeConfig, sec *secrets.NodeSecrets) string {
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_HOSTNAME=%s\n", shellQuote(cfg.Hostname)))
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_SSH_USER=%s\n", shellQuote(cfg.SSHUser)))
|
||||||
|
b.WriteString("METIS_ATLAS_USER='atlas'\n")
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_K3S_VERSION=%s\n", shellQuote(cfg.K3s.Version)))
|
||||||
|
if sec != nil {
|
||||||
|
if value := effectiveAtlasPassword(sec); value != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_ATLAS_PASSWORD=%s\n", shellQuote(value)))
|
||||||
|
}
|
||||||
|
if value := effectiveAtlasPasswordHash(sec); value != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_ATLAS_PASSWORD_HASH=%s\n", shellQuote(value)))
|
||||||
|
}
|
||||||
|
if value := strings.TrimSpace(sec.RootPassword); value != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_ROOT_PASSWORD=%s\n", shellQuote(value)))
|
||||||
|
}
|
||||||
|
if value := strings.TrimSpace(sec.RootPasswordHash); value != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_ROOT_PASSWORD_HASH=%s\n", shellQuote(value)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloudInitRootFSContent(sec *secrets.NodeSecrets) string {
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteString("#cloud-config\n")
|
||||||
|
if hasNodePasswords(sec) {
|
||||||
|
b.WriteString("ssh_pwauth: true\n")
|
||||||
|
b.WriteString("disable_root: false\n")
|
||||||
|
}
|
||||||
|
b.WriteString("runcmd:\n")
|
||||||
|
b.WriteString(" - [bash, -lc, \"/usr/local/sbin/metis-apply-node-identity.sh\"]\n")
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func nodeIdentityScriptContent() string {
|
||||||
|
return `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
marker="/var/lib/metis/node-identity-applied.done"
|
||||||
|
env_file="/etc/metis/firstboot.env"
|
||||||
|
key_file="/etc/metis/authorized_keys"
|
||||||
|
sudoers_file="/etc/metis/sudoers-hecate"
|
||||||
|
default_groups=(adm sudo tty disk dialout audio video plugdev games users systemd-journal input render netdev)
|
||||||
|
|
||||||
|
if [ -f "${marker}" ]; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p /var/lib/metis
|
||||||
|
if [ -f "${env_file}" ]; then
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
. "${env_file}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
atlas_user="${METIS_ATLAS_USER:-atlas}"
|
||||||
|
ssh_user="${METIS_SSH_USER:-${atlas_user}}"
|
||||||
|
atlas_password="${METIS_ATLAS_PASSWORD:-}"
|
||||||
|
atlas_password_hash="${METIS_ATLAS_PASSWORD_HASH:-}"
|
||||||
|
root_password="${METIS_ROOT_PASSWORD:-}"
|
||||||
|
root_password_hash="${METIS_ROOT_PASSWORD_HASH:-}"
|
||||||
|
|
||||||
|
group_list=()
|
||||||
|
for group_name in "${default_groups[@]}"; do
|
||||||
|
if getent group "${group_name}" >/dev/null 2>&1; then
|
||||||
|
group_list+=("${group_name}")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "${#group_list[@]}" -gt 0 ]; then
|
||||||
|
group_csv="$(IFS=,; printf '%s' "${group_list[*]}")"
|
||||||
|
else
|
||||||
|
group_csv=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
ensure_user() {
|
||||||
|
local user_name="$1"
|
||||||
|
[ -n "${user_name}" ] || return 0
|
||||||
|
if ! id "${user_name}" >/dev/null 2>&1; then
|
||||||
|
if [ -n "${group_csv}" ]; then
|
||||||
|
useradd -m -s /bin/bash -G "${group_csv}" "${user_name}"
|
||||||
|
else
|
||||||
|
useradd -m -s /bin/bash "${user_name}"
|
||||||
|
fi
|
||||||
|
elif [ -n "${group_csv}" ]; then
|
||||||
|
usermod -a -G "${group_csv}" "${user_name}" || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
apply_password() {
|
||||||
|
local user_name="$1"
|
||||||
|
local plain_password="$2"
|
||||||
|
local hash_password="$3"
|
||||||
|
if ! id "${user_name}" >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ -n "${hash_password}" ]; then
|
||||||
|
usermod -p "${hash_password}" "${user_name}"
|
||||||
|
passwd -u "${user_name}" >/dev/null 2>&1 || true
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ -n "${plain_password}" ]; then
|
||||||
|
printf '%s:%s\n' "${user_name}" "${plain_password}" | chpasswd
|
||||||
|
passwd -u "${user_name}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
install_keys() {
|
||||||
|
local user_name="$1"
|
||||||
|
[ -n "${user_name}" ] || return 0
|
||||||
|
[ -s "${key_file}" ] || return 0
|
||||||
|
local home_dir
|
||||||
|
home_dir="$(getent passwd "${user_name}" | cut -d: -f6)"
|
||||||
|
if [ -z "${home_dir}" ]; then
|
||||||
|
if [ "${user_name}" = "root" ]; then
|
||||||
|
home_dir="/root"
|
||||||
|
else
|
||||||
|
home_dir="/home/${user_name}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
install -d -m 700 "${home_dir}/.ssh"
|
||||||
|
install -m 600 "${key_file}" "${home_dir}/.ssh/authorized_keys"
|
||||||
|
chown -R "${user_name}:${user_name}" "${home_dir}/.ssh" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_user "${atlas_user}"
|
||||||
|
if [ -n "${ssh_user}" ] && [ "${ssh_user}" != "root" ] && [ "${ssh_user}" != "${atlas_user}" ]; then
|
||||||
|
ensure_user "${ssh_user}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
apply_password root "${root_password}" "${root_password_hash}"
|
||||||
|
apply_password "${atlas_user}" "${atlas_password}" "${atlas_password_hash}"
|
||||||
|
if [ -n "${ssh_user}" ] && [ "${ssh_user}" != "root" ] && [ "${ssh_user}" != "${atlas_user}" ]; then
|
||||||
|
apply_password "${ssh_user}" "${atlas_password}" "${atlas_password_hash}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -s "${key_file}" ]; then
|
||||||
|
install_keys root
|
||||||
|
install_keys "${atlas_user}"
|
||||||
|
if [ -n "${ssh_user}" ] && [ "${ssh_user}" != "root" ] && [ "${ssh_user}" != "${atlas_user}" ]; then
|
||||||
|
install_keys "${ssh_user}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -s "${sudoers_file}" ]; then
|
||||||
|
install -d -m 755 /etc/sudoers.d
|
||||||
|
install -m 440 "${sudoers_file}" /etc/sudoers.d/90-hecate-atlas
|
||||||
|
if command -v visudo >/dev/null 2>&1; then
|
||||||
|
visudo -cf /etc/sudoers.d/90-hecate-atlas >/dev/null 2>&1 || rm -f /etc/sudoers.d/90-hecate-atlas
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
systemctl restart ssh.service >/dev/null 2>&1 || systemctl restart sshd.service >/dev/null 2>&1 || systemctl restart ssh.socket >/dev/null 2>&1 || true
|
||||||
|
touch "${marker}"
|
||||||
|
`
|
||||||
|
}
|
||||||
|
|
||||||
|
func sshPasswordConfigContent(sec *secrets.NodeSecrets) string {
|
||||||
|
if !hasNodePasswords(sec) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return "PasswordAuthentication yes\nKbdInteractiveAuthentication no\nChallengeResponseAuthentication no\nPermitRootLogin yes\nUsePAM yes\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasNodePasswords(sec *secrets.NodeSecrets) bool {
|
||||||
|
if sec == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return effectiveAtlasPassword(sec) != "" || effectiveAtlasPasswordHash(sec) != "" || firstNonEmptyString(sec.RootPassword, sec.RootPasswordHash) != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func effectiveAtlasPassword(sec *secrets.NodeSecrets) string {
|
||||||
|
if sec == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return firstNonEmptyString(sec.AtlasPassword, sec.SSHPassword)
|
||||||
|
}
|
||||||
|
|
||||||
|
func effectiveAtlasPasswordHash(sec *secrets.NodeSecrets) string {
|
||||||
|
if sec == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return firstNonEmptyString(sec.AtlasPasswordHash, sec.SSHPasswordHash)
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstNonEmptyString(values ...string) string {
|
||||||
|
for _, value := range values {
|
||||||
|
if trimmed := strings.TrimSpace(value); trimmed != "" {
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func redactedSecretsForImage(sec *secrets.NodeSecrets) map[string]any {
|
||||||
|
if sec == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
debug := map[string]any{
|
||||||
|
"has_ssh_password": firstNonEmptyString(sec.SSHPassword, sec.SSHPasswordHash) != "",
|
||||||
|
"has_atlas_password": firstNonEmptyString(sec.AtlasPassword, sec.AtlasPasswordHash) != "",
|
||||||
|
"has_root_password": firstNonEmptyString(sec.RootPassword, sec.RootPasswordHash) != "",
|
||||||
|
"has_k3s_token": strings.TrimSpace(sec.K3sToken) != "",
|
||||||
|
"has_cloud_init_override": strings.TrimSpace(sec.CloudInit) != "",
|
||||||
|
}
|
||||||
|
if len(sec.Extra) > 0 {
|
||||||
|
keys := make([]string, 0, len(sec.Extra))
|
||||||
|
for key := range sec.Extra {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
debug["extra_keys"] = keys
|
||||||
|
}
|
||||||
|
return debug
|
||||||
|
}
|
||||||
|
|
||||||
|
func shellQuote(value string) string {
|
||||||
|
if value == "" {
|
||||||
|
return "''"
|
||||||
|
}
|
||||||
|
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
|
||||||
|
}
|
||||||
133
pkg/plan/node_metadata.go
Normal file
133
pkg/plan/node_metadata.go
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"metis/pkg/config"
|
||||||
|
"metis/pkg/secrets"
|
||||||
|
)
|
||||||
|
|
||||||
|
func fetchSecrets(hostname string) *secrets.NodeSecrets {
|
||||||
|
envSecrets := nodeSecretsFromEnv()
|
||||||
|
if os.Getenv("VAULT_ADDR") == "" {
|
||||||
|
return envSecrets
|
||||||
|
}
|
||||||
|
cli := secrets.NewFromEnv()
|
||||||
|
sec, err := cli.FetchNode(context.Background(), hostname)
|
||||||
|
if err != nil {
|
||||||
|
return envSecrets
|
||||||
|
}
|
||||||
|
return mergeNodeSecrets(sec, envSecrets)
|
||||||
|
}
|
||||||
|
|
||||||
|
func nodeSecretsFromEnv() *secrets.NodeSecrets {
|
||||||
|
sec := &secrets.NodeSecrets{
|
||||||
|
SSHPassword: strings.TrimSpace(os.Getenv("METIS_NODE_SSH_PASSWORD")),
|
||||||
|
SSHPasswordHash: strings.TrimSpace(os.Getenv("METIS_NODE_SSH_PASSWORD_HASH")),
|
||||||
|
AtlasPassword: strings.TrimSpace(os.Getenv("METIS_NODE_ATLAS_PASSWORD")),
|
||||||
|
AtlasPasswordHash: strings.TrimSpace(os.Getenv("METIS_NODE_ATLAS_PASSWORD_HASH")),
|
||||||
|
RootPassword: strings.TrimSpace(os.Getenv("METIS_NODE_ROOT_PASSWORD")),
|
||||||
|
RootPasswordHash: strings.TrimSpace(os.Getenv("METIS_NODE_ROOT_PASSWORD_HASH")),
|
||||||
|
}
|
||||||
|
if sec.SSHPassword == "" && sec.SSHPasswordHash == "" && sec.AtlasPassword == "" && sec.AtlasPasswordHash == "" && sec.RootPassword == "" && sec.RootPasswordHash == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return sec
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeNodeSecrets(base, override *secrets.NodeSecrets) *secrets.NodeSecrets {
|
||||||
|
if base == nil {
|
||||||
|
return override
|
||||||
|
}
|
||||||
|
if override == nil {
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
merged := *base
|
||||||
|
merged.SSHPassword = firstNonEmptyString(override.SSHPassword, base.SSHPassword)
|
||||||
|
merged.SSHPasswordHash = firstNonEmptyString(override.SSHPasswordHash, base.SSHPasswordHash)
|
||||||
|
merged.AtlasPassword = firstNonEmptyString(override.AtlasPassword, base.AtlasPassword)
|
||||||
|
merged.AtlasPasswordHash = firstNonEmptyString(override.AtlasPasswordHash, base.AtlasPasswordHash)
|
||||||
|
merged.RootPassword = firstNonEmptyString(override.RootPassword, base.RootPassword)
|
||||||
|
merged.RootPasswordHash = firstNonEmptyString(override.RootPasswordHash, base.RootPasswordHash)
|
||||||
|
merged.K3sToken = firstNonEmptyString(override.K3sToken, base.K3sToken)
|
||||||
|
merged.CloudInit = firstNonEmptyString(override.CloudInit, base.CloudInit)
|
||||||
|
if len(base.Extra) > 0 || len(override.Extra) > 0 {
|
||||||
|
merged.Extra = map[string]string{}
|
||||||
|
for key, value := range base.Extra {
|
||||||
|
merged.Extra[key] = value
|
||||||
|
}
|
||||||
|
for key, value := range override.Extra {
|
||||||
|
merged.Extra[key] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &merged
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyNodeMetadataEnv(cfg *config.NodeConfig) {
|
||||||
|
if cfg == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if labels := parseEnvJSONMap(os.Getenv("METIS_NODE_LABELS_JSON")); len(labels) > 0 {
|
||||||
|
if cfg.Labels == nil {
|
||||||
|
cfg.Labels = map[string]string{}
|
||||||
|
}
|
||||||
|
for key, value := range labels {
|
||||||
|
cfg.Labels[key] = value
|
||||||
|
}
|
||||||
|
cfg.K3s.Labels = cfg.Labels
|
||||||
|
}
|
||||||
|
if taints := parseEnvJSONList(os.Getenv("METIS_NODE_TAINTS_JSON")); len(taints) > 0 {
|
||||||
|
cfg.Taints = uniqueStrings(append(cfg.Taints, taints...))
|
||||||
|
cfg.K3s.Taints = append([]string{}, cfg.Taints...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseEnvJSONMap(raw string) map[string]string {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var values map[string]string
|
||||||
|
if err := json.Unmarshal([]byte(raw), &values); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return values
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseEnvJSONList(raw string) []string {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var values []string
|
||||||
|
if err := json.Unmarshal([]byte(raw), &values); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return values
|
||||||
|
}
|
||||||
|
|
||||||
|
func uniqueStrings(values []string) []string {
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
out := make([]string, 0, len(values))
|
||||||
|
for _, value := range values {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := seen[value]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[value] = struct{}{}
|
||||||
|
out = append(out, value)
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func jsonMarshalIndent(value any) ([]byte, error) {
|
||||||
|
return json.MarshalIndent(value, "", " ")
|
||||||
|
}
|
||||||
127
pkg/plan/node_secrets_test.go
Normal file
127
pkg/plan/node_secrets_test.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"metis/pkg/config"
|
||||||
|
"metis/pkg/secrets"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNodeSecretHelpers(t *testing.T) {
|
||||||
|
if got := effectiveAtlasPassword(nil); got != "" {
|
||||||
|
t.Fatalf("effectiveAtlasPassword(nil) = %q", got)
|
||||||
|
}
|
||||||
|
if got := effectiveAtlasPasswordHash(nil); got != "" {
|
||||||
|
t.Fatalf("effectiveAtlasPasswordHash(nil) = %q", got)
|
||||||
|
}
|
||||||
|
sec := &secrets.NodeSecrets{SSHPassword: "ssh-pass", SSHPasswordHash: "$ssh$hash"}
|
||||||
|
if got := effectiveAtlasPassword(sec); got != "ssh-pass" {
|
||||||
|
t.Fatalf("effectiveAtlasPassword fallback = %q", got)
|
||||||
|
}
|
||||||
|
if got := effectiveAtlasPasswordHash(sec); got != "$ssh$hash" {
|
||||||
|
t.Fatalf("effectiveAtlasPasswordHash fallback = %q", got)
|
||||||
|
}
|
||||||
|
sec.AtlasPassword = "atlas-pass"
|
||||||
|
sec.AtlasPasswordHash = "$atlas$hash"
|
||||||
|
if got := effectiveAtlasPassword(sec); got != "atlas-pass" {
|
||||||
|
t.Fatalf("effectiveAtlasPassword explicit = %q", got)
|
||||||
|
}
|
||||||
|
if got := effectiveAtlasPasswordHash(sec); got != "$atlas$hash" {
|
||||||
|
t.Fatalf("effectiveAtlasPasswordHash explicit = %q", got)
|
||||||
|
}
|
||||||
|
if got := firstNonEmptyString("", " value ", "ignored"); got != "value" {
|
||||||
|
t.Fatalf("firstNonEmptyString = %q", got)
|
||||||
|
}
|
||||||
|
if !hasNodePasswords(&secrets.NodeSecrets{RootPasswordHash: "$root$hash"}) {
|
||||||
|
t.Fatal("expected root password hash to count as password material")
|
||||||
|
}
|
||||||
|
if hasNodePasswords(&secrets.NodeSecrets{}) {
|
||||||
|
t.Fatal("empty node secrets should not count as password material")
|
||||||
|
}
|
||||||
|
debug := redactedSecretsForImage(&secrets.NodeSecrets{Extra: map[string]string{"b": "2", "a": "1"}})
|
||||||
|
if !reflect.DeepEqual(debug["extra_keys"], []string{"a", "b"}) {
|
||||||
|
t.Fatalf("redactedSecretsForImage extra_keys = %#v", debug)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNodeSecretsFromEnvAndMergeNodeSecrets(t *testing.T) {
|
||||||
|
t.Setenv("METIS_NODE_SSH_PASSWORD", "ssh-pass")
|
||||||
|
t.Setenv("METIS_NODE_SSH_PASSWORD_HASH", "$ssh$hash")
|
||||||
|
t.Setenv("METIS_NODE_ATLAS_PASSWORD", "atlas-pass")
|
||||||
|
t.Setenv("METIS_NODE_ATLAS_PASSWORD_HASH", "$atlas$hash")
|
||||||
|
t.Setenv("METIS_NODE_ROOT_PASSWORD", "root-pass")
|
||||||
|
t.Setenv("METIS_NODE_ROOT_PASSWORD_HASH", "$root$hash")
|
||||||
|
envSecrets := nodeSecretsFromEnv()
|
||||||
|
if envSecrets == nil || envSecrets.RootPassword != "root-pass" || envSecrets.AtlasPasswordHash != "$atlas$hash" {
|
||||||
|
t.Fatalf("nodeSecretsFromEnv = %#v", envSecrets)
|
||||||
|
}
|
||||||
|
merged := mergeNodeSecrets(&secrets.NodeSecrets{
|
||||||
|
SSHPassword: "base-ssh",
|
||||||
|
K3sToken: "base-token",
|
||||||
|
CloudInit: "base-cloud",
|
||||||
|
Extra: map[string]string{"base": "1"},
|
||||||
|
}, &secrets.NodeSecrets{
|
||||||
|
AtlasPassword: "override-atlas",
|
||||||
|
RootPassword: "override-root",
|
||||||
|
K3sToken: "override-token",
|
||||||
|
CloudInit: "override-cloud",
|
||||||
|
Extra: map[string]string{"override": "2"},
|
||||||
|
})
|
||||||
|
if merged.K3sToken != "override-token" || merged.CloudInit != "override-cloud" || merged.AtlasPassword != "override-atlas" || merged.RootPassword != "override-root" {
|
||||||
|
t.Fatalf("mergeNodeSecrets = %#v", merged)
|
||||||
|
}
|
||||||
|
if merged.Extra["base"] != "1" || merged.Extra["override"] != "2" {
|
||||||
|
t.Fatalf("mergeNodeSecrets extras = %#v", merged.Extra)
|
||||||
|
}
|
||||||
|
if got := mergeNodeSecrets(nil, envSecrets); got.RootPasswordHash != "$root$hash" {
|
||||||
|
t.Fatalf("mergeNodeSecrets nil base = %#v", got)
|
||||||
|
}
|
||||||
|
if got := mergeNodeSecrets(envSecrets, nil); got.SSHPassword != "ssh-pass" {
|
||||||
|
t.Fatalf("mergeNodeSecrets nil override = %#v", got)
|
||||||
|
}
|
||||||
|
t.Setenv("METIS_NODE_SSH_PASSWORD", "")
|
||||||
|
t.Setenv("METIS_NODE_SSH_PASSWORD_HASH", "")
|
||||||
|
t.Setenv("METIS_NODE_ATLAS_PASSWORD", "")
|
||||||
|
t.Setenv("METIS_NODE_ATLAS_PASSWORD_HASH", "")
|
||||||
|
t.Setenv("METIS_NODE_ROOT_PASSWORD", "")
|
||||||
|
t.Setenv("METIS_NODE_ROOT_PASSWORD_HASH", "")
|
||||||
|
if got := nodeSecretsFromEnv(); got != nil {
|
||||||
|
t.Fatalf("expected empty env secrets to collapse to nil, got %#v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFirstbootEnvContentIncludesHashes(t *testing.T) {
|
||||||
|
cfg := &config.NodeConfig{
|
||||||
|
Hostname: "titan-15",
|
||||||
|
SSHUser: "atlas",
|
||||||
|
K3s: config.K3sConfig{Version: "v1.31.5+k3s1"},
|
||||||
|
}
|
||||||
|
content := firstbootEnvContent(cfg, &secrets.NodeSecrets{
|
||||||
|
AtlasPasswordHash: "$atlas$hash",
|
||||||
|
RootPasswordHash: "$root$hash",
|
||||||
|
})
|
||||||
|
if !reflect.DeepEqual(parseEnvLines(content), map[string]string{
|
||||||
|
"METIS_HOSTNAME": "'titan-15'",
|
||||||
|
"METIS_SSH_USER": "'atlas'",
|
||||||
|
"METIS_ATLAS_USER": "'atlas'",
|
||||||
|
"METIS_K3S_VERSION": "'v1.31.5+k3s1'",
|
||||||
|
"METIS_ATLAS_PASSWORD_HASH": "'$atlas$hash'",
|
||||||
|
"METIS_ROOT_PASSWORD_HASH": "'$root$hash'",
|
||||||
|
}) {
|
||||||
|
t.Fatalf("firstbootEnvContent = %q", content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseEnvLines(raw string) map[string]string {
|
||||||
|
result := map[string]string{}
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
|
||||||
|
parts := strings.SplitN(line, "=", 2)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
@ -15,10 +15,15 @@ import (
|
|||||||
// NodeSecrets holds per-node secret material to inject at burn time.
|
// NodeSecrets holds per-node secret material to inject at burn time.
|
||||||
// These should live in Vault at secret/data/nodes/<hostname>.
|
// These should live in Vault at secret/data/nodes/<hostname>.
|
||||||
type NodeSecrets struct {
|
type NodeSecrets struct {
|
||||||
SSHPassword string `json:"ssh_password,omitempty"`
|
SSHPassword string `json:"ssh_password,omitempty"`
|
||||||
K3sToken string `json:"k3s_token,omitempty"`
|
SSHPasswordHash string `json:"ssh_password_hash,omitempty"`
|
||||||
CloudInit string `json:"cloud_init,omitempty"`
|
AtlasPassword string `json:"atlas_password,omitempty"`
|
||||||
Extra map[string]string `json:"extra,omitempty"`
|
AtlasPasswordHash string `json:"atlas_password_hash,omitempty"`
|
||||||
|
RootPassword string `json:"root_password,omitempty"`
|
||||||
|
RootPasswordHash string `json:"root_password_hash,omitempty"`
|
||||||
|
K3sToken string `json:"k3s_token,omitempty"`
|
||||||
|
CloudInit string `json:"cloud_init,omitempty"`
|
||||||
|
Extra map[string]string `json:"extra,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Client fetches node secrets from Vault using either a token or AppRole.
|
// Client fetches node secrets from Vault using either a token or AppRole.
|
||||||
|
|||||||
@ -16,9 +16,11 @@ func TestFetchNodeReturnsData(t *testing.T) {
|
|||||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
"data": map[string]any{
|
"data": map[string]any{
|
||||||
"data": map[string]any{
|
"data": map[string]any{
|
||||||
"ssh_password": "p1",
|
"ssh_password": "p1",
|
||||||
"k3s_token": "t1",
|
"atlas_password_hash": "$atlas$hash",
|
||||||
"cloud_init": "ci",
|
"root_password": "root-pw",
|
||||||
|
"k3s_token": "t1",
|
||||||
|
"cloud_init": "ci",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
@ -33,7 +35,7 @@ func TestFetchNodeReturnsData(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("fetch: %v", err)
|
t.Fatalf("fetch: %v", err)
|
||||||
}
|
}
|
||||||
if sec.SSHPassword != "p1" || sec.K3sToken != "t1" || sec.CloudInit != "ci" {
|
if sec.SSHPassword != "p1" || sec.AtlasPasswordHash != "$atlas$hash" || sec.RootPassword != "root-pw" || sec.K3sToken != "t1" || sec.CloudInit != "ci" {
|
||||||
t.Fatalf("unexpected secrets: %+v", sec)
|
t.Fatalf("unexpected secrets: %+v", sec)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -117,16 +117,24 @@ type App struct {
|
|||||||
inventory *inventory.Inventory
|
inventory *inventory.Inventory
|
||||||
metrics *Metrics
|
metrics *Metrics
|
||||||
|
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
jobs map[string]*Job
|
jobs map[string]*Job
|
||||||
snapshots map[string]SnapshotRecord
|
snapshots map[string]SnapshotRecord
|
||||||
targets map[string]facts.Targets
|
targets map[string]facts.Targets
|
||||||
artifactStore map[string]ArtifactSummary
|
artifactStore map[string]ArtifactSummary
|
||||||
deviceStore map[string]deviceSnapshot
|
deviceStore map[string]deviceSnapshot
|
||||||
|
desiredMetadata map[string]DesiredNodeMetadata
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewApp creates a Metis service app instance.
|
// NewApp creates a Metis service app instance.
|
||||||
func NewApp(settings Settings) (*App, error) {
|
func NewApp(settings Settings) (*App, error) {
|
||||||
|
if strings.TrimSpace(settings.DesiredMetadataPath) == "" {
|
||||||
|
baseDir := filepath.Dir(settings.SnapshotsPath)
|
||||||
|
if strings.TrimSpace(baseDir) == "" || baseDir == "." {
|
||||||
|
baseDir = filepath.Dir(settings.HistoryPath)
|
||||||
|
}
|
||||||
|
settings.DesiredMetadataPath = filepath.Join(baseDir, "desired-node-metadata.json")
|
||||||
|
}
|
||||||
if err := os.MkdirAll(settings.CacheDir, 0o755); err != nil {
|
if err := os.MkdirAll(settings.CacheDir, 0o755); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -141,18 +149,20 @@ func NewApp(settings Settings) (*App, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
app := &App{
|
app := &App{
|
||||||
settings: settings,
|
settings: settings,
|
||||||
inventory: inv,
|
inventory: inv,
|
||||||
metrics: NewMetrics(),
|
metrics: NewMetrics(),
|
||||||
jobs: map[string]*Job{},
|
jobs: map[string]*Job{},
|
||||||
snapshots: map[string]SnapshotRecord{},
|
snapshots: map[string]SnapshotRecord{},
|
||||||
targets: map[string]facts.Targets{},
|
targets: map[string]facts.Targets{},
|
||||||
artifactStore: map[string]ArtifactSummary{},
|
artifactStore: map[string]ArtifactSummary{},
|
||||||
deviceStore: map[string]deviceSnapshot{},
|
deviceStore: map[string]deviceSnapshot{},
|
||||||
|
desiredMetadata: map[string]DesiredNodeMetadata{},
|
||||||
}
|
}
|
||||||
_ = app.loadSnapshots()
|
_ = app.loadSnapshots()
|
||||||
_ = app.loadTargets()
|
_ = app.loadTargets()
|
||||||
_ = app.loadArtifacts()
|
_ = app.loadArtifacts()
|
||||||
|
_ = app.loadDesiredNodeMetadata()
|
||||||
return app, nil
|
return app, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,6 +299,9 @@ func (a *App) StoreSnapshot(record SnapshotRecord) error {
|
|||||||
if err := a.syncScratchAnnotations(record); err != nil {
|
if err := a.syncScratchAnnotations(record); err != nil {
|
||||||
a.appendEvent(annotationSyncEvent(record.Node, err))
|
a.appendEvent(annotationSyncEvent(record.Node, err))
|
||||||
}
|
}
|
||||||
|
if err := a.syncDesiredNodeMetadata(record); err != nil {
|
||||||
|
a.appendEvent(desiredNodeMetadataSyncEvent(record.Node, err))
|
||||||
|
}
|
||||||
a.appendEvent(Event{
|
a.appendEvent(Event{
|
||||||
Time: record.CollectedAt,
|
Time: record.CollectedAt,
|
||||||
Kind: "sentinel.snapshot",
|
Kind: "sentinel.snapshot",
|
||||||
|
|||||||
@ -22,6 +22,9 @@ type clusterNode struct {
|
|||||||
Worker bool
|
Worker bool
|
||||||
ControlPlane bool
|
ControlPlane bool
|
||||||
Unschedulable bool
|
Unschedulable bool
|
||||||
|
Labels map[string]string
|
||||||
|
Annotations map[string]string
|
||||||
|
Taints []string
|
||||||
USBScratchStatus string
|
USBScratchStatus string
|
||||||
USBScratchManagedPaths string
|
USBScratchManagedPaths string
|
||||||
}
|
}
|
||||||
@ -179,6 +182,11 @@ func clusterNodes() []clusterNode {
|
|||||||
} `json:"metadata"`
|
} `json:"metadata"`
|
||||||
Spec struct {
|
Spec struct {
|
||||||
Unschedulable bool `json:"unschedulable"`
|
Unschedulable bool `json:"unschedulable"`
|
||||||
|
Taints []struct {
|
||||||
|
Key string `json:"key"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
Effect string `json:"effect"`
|
||||||
|
} `json:"taints"`
|
||||||
} `json:"spec"`
|
} `json:"spec"`
|
||||||
} `json:"items"`
|
} `json:"items"`
|
||||||
}
|
}
|
||||||
@ -189,6 +197,28 @@ func clusterNodes() []clusterNode {
|
|||||||
for _, item := range payload.Items {
|
for _, item := range payload.Items {
|
||||||
labels := item.Metadata.Labels
|
labels := item.Metadata.Labels
|
||||||
annotations := item.Metadata.Annotations
|
annotations := item.Metadata.Annotations
|
||||||
|
if labels == nil {
|
||||||
|
labels = map[string]string{}
|
||||||
|
}
|
||||||
|
if annotations == nil {
|
||||||
|
annotations = map[string]string{}
|
||||||
|
}
|
||||||
|
taints := make([]string, 0, len(item.Spec.Taints))
|
||||||
|
for _, taint := range item.Spec.Taints {
|
||||||
|
key := strings.TrimSpace(taint.Key)
|
||||||
|
if key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
raw := key
|
||||||
|
if value := strings.TrimSpace(taint.Value); value != "" {
|
||||||
|
raw += "=" + value
|
||||||
|
}
|
||||||
|
if effect := strings.TrimSpace(taint.Effect); effect != "" {
|
||||||
|
raw += ":" + effect
|
||||||
|
}
|
||||||
|
taints = append(taints, raw)
|
||||||
|
}
|
||||||
|
sort.Strings(taints)
|
||||||
nodes = append(nodes, clusterNode{
|
nodes = append(nodes, clusterNode{
|
||||||
Name: strings.TrimSpace(item.Metadata.Name),
|
Name: strings.TrimSpace(item.Metadata.Name),
|
||||||
Arch: strings.TrimSpace(labels["kubernetes.io/arch"]),
|
Arch: strings.TrimSpace(labels["kubernetes.io/arch"]),
|
||||||
@ -196,6 +226,9 @@ func clusterNodes() []clusterNode {
|
|||||||
Worker: labels["node-role.kubernetes.io/worker"] == "true",
|
Worker: labels["node-role.kubernetes.io/worker"] == "true",
|
||||||
ControlPlane: labels["node-role.kubernetes.io/control-plane"] != "" || labels["node-role.kubernetes.io/master"] != "",
|
ControlPlane: labels["node-role.kubernetes.io/control-plane"] != "" || labels["node-role.kubernetes.io/master"] != "",
|
||||||
Unschedulable: item.Spec.Unschedulable,
|
Unschedulable: item.Spec.Unschedulable,
|
||||||
|
Labels: labels,
|
||||||
|
Annotations: annotations,
|
||||||
|
Taints: taints,
|
||||||
USBScratchStatus: strings.TrimSpace(annotations["maintenance.bstein.dev/usb-scratch-status"]),
|
USBScratchStatus: strings.TrimSpace(annotations["maintenance.bstein.dev/usb-scratch-status"]),
|
||||||
USBScratchManagedPaths: strings.TrimSpace(annotations["maintenance.bstein.dev/usb-scratch-managed-paths"]),
|
USBScratchManagedPaths: strings.TrimSpace(annotations["maintenance.bstein.dev/usb-scratch-managed-paths"]),
|
||||||
})
|
})
|
||||||
|
|||||||
@ -37,6 +37,11 @@ func TestServiceArtifactAndSnapshotPersistenceErrorBranches(t *testing.T) {
|
|||||||
t.Fatal("expected persistTargets to fail when parent is a file")
|
t.Fatal("expected persistTargets to fail when parent is a file")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
app.settings.DesiredMetadataPath = filepath.Join(fileParent, "desired-node-metadata.json")
|
||||||
|
if err := app.persistDesiredNodeMetadata(); err == nil {
|
||||||
|
t.Fatal("expected persistDesiredNodeMetadata to fail when parent is a file")
|
||||||
|
}
|
||||||
|
|
||||||
invalidArtifactState := filepath.Join(t.TempDir(), "artifacts.json")
|
invalidArtifactState := filepath.Join(t.TempDir(), "artifacts.json")
|
||||||
if err := os.WriteFile(invalidArtifactState, []byte("{bad-json"), 0o644); err != nil {
|
if err := os.WriteFile(invalidArtifactState, []byte("{bad-json"), 0o644); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -45,6 +50,15 @@ func TestServiceArtifactAndSnapshotPersistenceErrorBranches(t *testing.T) {
|
|||||||
if err := app.loadArtifacts(); err == nil {
|
if err := app.loadArtifacts(); err == nil {
|
||||||
t.Fatal("expected loadArtifacts to reject invalid json")
|
t.Fatal("expected loadArtifacts to reject invalid json")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
invalidDesiredState := filepath.Join(t.TempDir(), "desired-node-metadata.json")
|
||||||
|
if err := os.WriteFile(invalidDesiredState, []byte("{bad-json"), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
app.settings.DesiredMetadataPath = invalidDesiredState
|
||||||
|
if err := app.loadDesiredNodeMetadata(); err == nil {
|
||||||
|
t.Fatal("expected loadDesiredNodeMetadata to reject invalid json")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestServiceReplacementAndDeviceBranches(t *testing.T) {
|
func TestServiceReplacementAndDeviceBranches(t *testing.T) {
|
||||||
|
|||||||
@ -162,6 +162,7 @@ nodes:
|
|||||||
snapshotsPath := filepath.Join(dir, "snapshots.json")
|
snapshotsPath := filepath.Join(dir, "snapshots.json")
|
||||||
targetsPath := filepath.Join(dir, "targets.json")
|
targetsPath := filepath.Join(dir, "targets.json")
|
||||||
artifactStatePath := filepath.Join(dir, "artifacts.json")
|
artifactStatePath := filepath.Join(dir, "artifacts.json")
|
||||||
|
desiredMetadataPath := filepath.Join(dir, "desired-node-metadata.json")
|
||||||
|
|
||||||
seedSnapshots := map[string]SnapshotRecord{
|
seedSnapshots := map[string]SnapshotRecord{
|
||||||
"titan-15": {
|
"titan-15": {
|
||||||
@ -190,19 +191,33 @@ nodes:
|
|||||||
if err := os.WriteFile(artifactStatePath, data, 0o644); err != nil {
|
if err := os.WriteFile(artifactStatePath, data, 0o644); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
seedDesiredMetadata := map[string]DesiredNodeMetadata{
|
||||||
|
"titan-15": {
|
||||||
|
Node: "titan-15",
|
||||||
|
Hostname: "titan-15",
|
||||||
|
CapturedAt: testTime(t),
|
||||||
|
Labels: map[string]string{"hardware": "rpi5"},
|
||||||
|
Taints: []string{"dedicated=recovery:NoSchedule"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data, _ = json.MarshalIndent(seedDesiredMetadata, "", " ")
|
||||||
|
if err := os.WriteFile(desiredMetadataPath, data, 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
app, err := NewApp(Settings{
|
app, err := NewApp(Settings{
|
||||||
InventoryPath: invPath,
|
InventoryPath: invPath,
|
||||||
CacheDir: filepath.Join(dir, "cache"),
|
CacheDir: filepath.Join(dir, "cache"),
|
||||||
ArtifactDir: filepath.Join(dir, "artifacts"),
|
ArtifactDir: filepath.Join(dir, "artifacts"),
|
||||||
ArtifactStatePath: artifactStatePath,
|
ArtifactStatePath: artifactStatePath,
|
||||||
HistoryPath: filepath.Join(dir, "history.jsonl"),
|
HistoryPath: filepath.Join(dir, "history.jsonl"),
|
||||||
SnapshotsPath: snapshotsPath,
|
SnapshotsPath: snapshotsPath,
|
||||||
TargetsPath: targetsPath,
|
TargetsPath: targetsPath,
|
||||||
DefaultFlashHost: "titan-22",
|
DesiredMetadataPath: desiredMetadataPath,
|
||||||
FlashHosts: []string{"titan-22"},
|
DefaultFlashHost: "titan-22",
|
||||||
LocalHost: "titan-22",
|
FlashHosts: []string{"titan-22"},
|
||||||
AllowedGroups: []string{"admin"},
|
LocalHost: "titan-22",
|
||||||
|
AllowedGroups: []string{"admin"},
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("NewApp: %v", err)
|
t.Fatalf("NewApp: %v", err)
|
||||||
@ -211,6 +226,9 @@ nodes:
|
|||||||
if got := app.artifacts()["titan-15"].Ref; got != "reg/proj/titan-15:latest" {
|
if got := app.artifacts()["titan-15"].Ref; got != "reg/proj/titan-15:latest" {
|
||||||
t.Fatalf("artifacts() = %q", got)
|
t.Fatalf("artifacts() = %q", got)
|
||||||
}
|
}
|
||||||
|
if desired, ok := app.desiredMetadataForNode("titan-15"); !ok || desired.Labels["hardware"] != "rpi5" {
|
||||||
|
t.Fatalf("desiredMetadataForNode() = %#v ok=%v", desired, ok)
|
||||||
|
}
|
||||||
if err := app.recordArtifact(ArtifactSummary{Node: "titan-15", Ref: "reg/proj/titan-15:v2"}); err != nil {
|
if err := app.recordArtifact(ArtifactSummary{Node: "titan-15", Ref: "reg/proj/titan-15:v2"}); err != nil {
|
||||||
t.Fatalf("recordArtifact: %v", err)
|
t.Fatalf("recordArtifact: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
483
pkg/service/node_recovery.go
Normal file
483
pkg/service/node_recovery.go
Normal file
@ -0,0 +1,483 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DesiredNodeMetadata captures the node identity Metis should preserve through
|
||||||
|
// recovery builds and re-assert after the node rejoins the cluster.
|
||||||
|
type DesiredNodeMetadata struct {
|
||||||
|
Node string `json:"node"`
|
||||||
|
Hostname string `json:"hostname,omitempty"`
|
||||||
|
CapturedAt time.Time `json:"captured_at,omitempty"`
|
||||||
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
Taints []string `json:"taints,omitempty"`
|
||||||
|
Unschedulable bool `json:"unschedulable,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) loadDesiredNodeMetadata() error {
|
||||||
|
data, err := os.ReadFile(a.settings.DesiredMetadataPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var desired map[string]DesiredNodeMetadata
|
||||||
|
if err := json.Unmarshal(data, &desired); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
a.mu.Lock()
|
||||||
|
a.desiredMetadata = desired
|
||||||
|
a.mu.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) persistDesiredNodeMetadata() error {
|
||||||
|
a.mu.RLock()
|
||||||
|
data, err := json.MarshalIndent(a.desiredMetadata, "", " ")
|
||||||
|
a.mu.RUnlock()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(a.settings.DesiredMetadataPath), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return os.WriteFile(a.settings.DesiredMetadataPath, data, 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) desiredMetadataForNode(node string) (DesiredNodeMetadata, bool) {
|
||||||
|
node = strings.TrimSpace(node)
|
||||||
|
if node == "" {
|
||||||
|
return DesiredNodeMetadata{}, false
|
||||||
|
}
|
||||||
|
a.mu.RLock()
|
||||||
|
defer a.mu.RUnlock()
|
||||||
|
desired, ok := a.desiredMetadata[node]
|
||||||
|
if !ok {
|
||||||
|
return DesiredNodeMetadata{}, false
|
||||||
|
}
|
||||||
|
return cloneDesiredNodeMetadata(desired), true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) stageDesiredNodeMetadata(nodeName string) (DesiredNodeMetadata, error) {
|
||||||
|
nodeName = strings.TrimSpace(nodeName)
|
||||||
|
if nodeName == "" {
|
||||||
|
return DesiredNodeMetadata{}, fmt.Errorf("node metadata requires a node name")
|
||||||
|
}
|
||||||
|
nodeSpec, _, err := a.inventory.FindNode(nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return DesiredNodeMetadata{}, err
|
||||||
|
}
|
||||||
|
cfg, err := config.Build(a.inventory, nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return DesiredNodeMetadata{}, err
|
||||||
|
}
|
||||||
|
desired := DesiredNodeMetadata{
|
||||||
|
Node: nodeName,
|
||||||
|
Hostname: strings.TrimSpace(nodeSpec.Hostname),
|
||||||
|
CapturedAt: time.Now().UTC(),
|
||||||
|
Labels: filteredRestorableLabels(cfg.Labels),
|
||||||
|
Taints: restorableTaints(cfg.Taints),
|
||||||
|
}
|
||||||
|
if existing, ok := a.desiredMetadataForNode(nodeName); ok {
|
||||||
|
desired = mergeDesiredNodeMetadata(desired, existing)
|
||||||
|
}
|
||||||
|
if live, ok := liveClusterNode(nodeName); ok {
|
||||||
|
desired = mergeDesiredNodeMetadata(desired, desiredMetadataFromCluster(*live))
|
||||||
|
}
|
||||||
|
desired.Labels = normalizeStringMap(desired.Labels)
|
||||||
|
desired.Annotations = normalizeStringMap(desired.Annotations)
|
||||||
|
desired.Taints = normalizeTaints(desired.Taints)
|
||||||
|
a.mu.Lock()
|
||||||
|
if a.desiredMetadata == nil {
|
||||||
|
a.desiredMetadata = map[string]DesiredNodeMetadata{}
|
||||||
|
}
|
||||||
|
a.desiredMetadata[nodeName] = desired
|
||||||
|
a.mu.Unlock()
|
||||||
|
if err := a.persistDesiredNodeMetadata(); err != nil {
|
||||||
|
return DesiredNodeMetadata{}, err
|
||||||
|
}
|
||||||
|
return cloneDesiredNodeMetadata(desired), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) syncDesiredNodeMetadata(record SnapshotRecord) error {
|
||||||
|
desired, ok := a.desiredMetadataForNode(record.Node)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
live, ok := liveClusterNode(record.Node)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return patchDesiredNodeMetadata(*live, desired)
|
||||||
|
}
|
||||||
|
|
||||||
|
func desiredMetadataFromCluster(node clusterNode) DesiredNodeMetadata {
|
||||||
|
return DesiredNodeMetadata{
|
||||||
|
Node: strings.TrimSpace(node.Name),
|
||||||
|
Labels: filteredRestorableLabels(node.Labels),
|
||||||
|
Annotations: filteredRestorableAnnotations(node.Annotations),
|
||||||
|
Taints: restorableTaints(node.Taints),
|
||||||
|
Unschedulable: node.Unschedulable,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeDesiredNodeMetadata(base, overlay DesiredNodeMetadata) DesiredNodeMetadata {
|
||||||
|
merged := cloneDesiredNodeMetadata(base)
|
||||||
|
if hostname := strings.TrimSpace(overlay.Hostname); hostname != "" {
|
||||||
|
merged.Hostname = hostname
|
||||||
|
}
|
||||||
|
if !overlay.CapturedAt.IsZero() {
|
||||||
|
merged.CapturedAt = overlay.CapturedAt
|
||||||
|
}
|
||||||
|
if merged.Labels == nil {
|
||||||
|
merged.Labels = map[string]string{}
|
||||||
|
}
|
||||||
|
for key, value := range overlay.Labels {
|
||||||
|
if key = strings.TrimSpace(key); key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
merged.Labels[key] = strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
if merged.Annotations == nil {
|
||||||
|
merged.Annotations = map[string]string{}
|
||||||
|
}
|
||||||
|
for key, value := range overlay.Annotations {
|
||||||
|
if key = strings.TrimSpace(key); key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
merged.Annotations[key] = strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
if len(overlay.Taints) > 0 {
|
||||||
|
merged.Taints = normalizeTaints(overlay.Taints)
|
||||||
|
}
|
||||||
|
merged.Unschedulable = overlay.Unschedulable
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
func patchDesiredNodeMetadata(live clusterNode, desired DesiredNodeMetadata) error {
|
||||||
|
node := strings.TrimSpace(desired.Node)
|
||||||
|
if node == "" {
|
||||||
|
node = strings.TrimSpace(live.Name)
|
||||||
|
}
|
||||||
|
if node == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
labelPatch := metadataStringPatch(live.Labels, desired.Labels, isRestorableLabel)
|
||||||
|
annotationPatch := metadataStringPatch(live.Annotations, desired.Annotations, isRestorableAnnotation)
|
||||||
|
mergedTaints := mergeLiveAndDesiredTaints(live.Taints, desired.Taints)
|
||||||
|
body := map[string]any{}
|
||||||
|
metadata := map[string]any{}
|
||||||
|
if len(labelPatch) > 0 {
|
||||||
|
metadata["labels"] = labelPatch
|
||||||
|
}
|
||||||
|
if len(annotationPatch) > 0 {
|
||||||
|
metadata["annotations"] = annotationPatch
|
||||||
|
}
|
||||||
|
if len(metadata) > 0 {
|
||||||
|
body["metadata"] = metadata
|
||||||
|
}
|
||||||
|
spec := map[string]any{}
|
||||||
|
if live.Unschedulable != desired.Unschedulable {
|
||||||
|
spec["unschedulable"] = desired.Unschedulable
|
||||||
|
}
|
||||||
|
if !sameTaints(live.Taints, mergedTaints) {
|
||||||
|
spec["taints"] = taintPatchPayload(mergedTaints)
|
||||||
|
}
|
||||||
|
if len(spec) > 0 {
|
||||||
|
body["spec"] = spec
|
||||||
|
}
|
||||||
|
if len(body) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
kube, err := kubeClientFactory()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return kube.mergePatch("/api/v1/nodes/"+node, body)
|
||||||
|
}
|
||||||
|
|
||||||
|
func metadataStringPatch(live, desired map[string]string, allow func(string) bool) map[string]any {
|
||||||
|
patch := map[string]any{}
|
||||||
|
for key, value := range desired {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" || !allow(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if strings.TrimSpace(live[key]) != value {
|
||||||
|
patch[key] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for key := range live {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" || !allow(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := desired[key]; !ok {
|
||||||
|
patch[key] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return patch
|
||||||
|
}
|
||||||
|
|
||||||
|
func liveClusterNode(node string) (*clusterNode, bool) {
|
||||||
|
node = strings.TrimSpace(node)
|
||||||
|
if node == "" {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
for _, live := range clusterNodes() {
|
||||||
|
if strings.TrimSpace(live.Name) == node {
|
||||||
|
copyNode := live
|
||||||
|
return ©Node, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func filteredRestorableLabels(values map[string]string) map[string]string {
|
||||||
|
filtered := map[string]string{}
|
||||||
|
for key, value := range values {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" || !isRestorableLabel(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered[key] = strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
func filteredRestorableAnnotations(values map[string]string) map[string]string {
|
||||||
|
filtered := map[string]string{}
|
||||||
|
for key, value := range values {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" || !isRestorableAnnotation(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered[key] = strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeStringMap(values map[string]string) map[string]string {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
normalized := map[string]string{}
|
||||||
|
for key, value := range values {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
normalized[key] = strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
if len(normalized) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
|
func restorableTaints(values []string) []string {
|
||||||
|
filtered := make([]string, 0, len(values))
|
||||||
|
for _, value := range values {
|
||||||
|
value = normalizeTaint(value)
|
||||||
|
if value == "" || !isRestorableTaint(value) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered = append(filtered, value)
|
||||||
|
}
|
||||||
|
return normalizeTaints(filtered)
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeTaints(values []string) []string {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
out := make([]string, 0, len(values))
|
||||||
|
for _, value := range values {
|
||||||
|
value = normalizeTaint(value)
|
||||||
|
if value == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := seen[value]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[value] = struct{}{}
|
||||||
|
out = append(out, value)
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
if len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeTaint(value string) string {
|
||||||
|
return strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameTaints(left, right []string) bool {
|
||||||
|
left = normalizeTaints(left)
|
||||||
|
right = normalizeTaints(right)
|
||||||
|
if len(left) != len(right) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for idx := range left {
|
||||||
|
if left[idx] != right[idx] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeLiveAndDesiredTaints(live, desired []string) []string {
|
||||||
|
merged := make([]string, 0, len(live)+len(desired))
|
||||||
|
for _, taint := range live {
|
||||||
|
taint = normalizeTaint(taint)
|
||||||
|
if taint == "" || isRestorableTaint(taint) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
merged = append(merged, taint)
|
||||||
|
}
|
||||||
|
merged = append(merged, restorableTaints(desired)...)
|
||||||
|
return normalizeTaints(merged)
|
||||||
|
}
|
||||||
|
|
||||||
|
func taintPatchPayload(values []string) []map[string]string {
|
||||||
|
payload := make([]map[string]string, 0, len(values))
|
||||||
|
for _, value := range normalizeTaints(values) {
|
||||||
|
key, taintValue, effect := splitTaint(value)
|
||||||
|
if key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
entry := map[string]string{"key": key}
|
||||||
|
if taintValue != "" {
|
||||||
|
entry["value"] = taintValue
|
||||||
|
}
|
||||||
|
if effect != "" {
|
||||||
|
entry["effect"] = effect
|
||||||
|
}
|
||||||
|
payload = append(payload, entry)
|
||||||
|
}
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitTaint(raw string) (string, string, string) {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" {
|
||||||
|
return "", "", ""
|
||||||
|
}
|
||||||
|
effect := ""
|
||||||
|
body := raw
|
||||||
|
if idx := strings.LastIndex(raw, ":"); idx >= 0 {
|
||||||
|
body = strings.TrimSpace(raw[:idx])
|
||||||
|
effect = strings.TrimSpace(raw[idx+1:])
|
||||||
|
}
|
||||||
|
key := body
|
||||||
|
value := ""
|
||||||
|
if idx := strings.Index(body, "="); idx >= 0 {
|
||||||
|
key = strings.TrimSpace(body[:idx])
|
||||||
|
value = strings.TrimSpace(body[idx+1:])
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(key), value, effect
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRestorableTaint(raw string) bool {
|
||||||
|
key, _, _ := splitTaint(raw)
|
||||||
|
if key == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, prefix := range []string{
|
||||||
|
"node.kubernetes.io/",
|
||||||
|
"node.cloudprovider.kubernetes.io/",
|
||||||
|
"ToBeDeletedByClusterAutoscaler",
|
||||||
|
} {
|
||||||
|
if strings.HasPrefix(key, prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRestorableLabel(key string) bool {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(key, "node-role.kubernetes.io/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, prefix := range []string{
|
||||||
|
"kubernetes.io/",
|
||||||
|
"beta.kubernetes.io/",
|
||||||
|
"node.kubernetes.io/",
|
||||||
|
"topology.kubernetes.io/",
|
||||||
|
"feature.node.kubernetes.io/",
|
||||||
|
"failure-domain.beta.kubernetes.io/",
|
||||||
|
"nvidia.com/",
|
||||||
|
"k3s.io/",
|
||||||
|
"rke2.io/",
|
||||||
|
"volumes.kubernetes.io/",
|
||||||
|
"node.cloudprovider.kubernetes.io/",
|
||||||
|
} {
|
||||||
|
if strings.HasPrefix(key, prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRestorableAnnotation(key string) bool {
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
if key == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, prefix := range []string{
|
||||||
|
"kubectl.kubernetes.io/",
|
||||||
|
"kubeadm.alpha.kubernetes.io/",
|
||||||
|
"kubernetes.io/",
|
||||||
|
"node.alpha.kubernetes.io/",
|
||||||
|
"node.kubernetes.io/",
|
||||||
|
"volumes.kubernetes.io/",
|
||||||
|
"csi.volume.kubernetes.io/",
|
||||||
|
"csi.storage.k8s.io/",
|
||||||
|
"flannel.alpha.coreos.com/",
|
||||||
|
"projectcalico.org/",
|
||||||
|
"rke2.io/",
|
||||||
|
"k3s.io/",
|
||||||
|
"nvidia.com/",
|
||||||
|
} {
|
||||||
|
if strings.HasPrefix(key, prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloneDesiredNodeMetadata(value DesiredNodeMetadata) DesiredNodeMetadata {
|
||||||
|
clone := value
|
||||||
|
clone.Labels = normalizeStringMap(value.Labels)
|
||||||
|
clone.Annotations = normalizeStringMap(value.Annotations)
|
||||||
|
clone.Taints = normalizeTaints(value.Taints)
|
||||||
|
return clone
|
||||||
|
}
|
||||||
|
|
||||||
|
func desiredNodeMetadataSyncEvent(node string, err error) Event {
|
||||||
|
return Event{
|
||||||
|
Time: time.Now().UTC(),
|
||||||
|
Kind: "sentinel.node-metadata",
|
||||||
|
Summary: fmt.Sprintf("Could not restore desired node metadata for %s", node),
|
||||||
|
Details: map[string]any{
|
||||||
|
"node": node,
|
||||||
|
"error": err.Error(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
254
pkg/service/node_recovery_test.go
Normal file
254
pkg/service/node_recovery_test.go
Normal file
@ -0,0 +1,254 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/sentinel"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestStageDesiredNodeMetadataMergesInventoryAndLiveCluster(t *testing.T) {
|
||||||
|
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"items": []any{
|
||||||
|
map[string]any{
|
||||||
|
"metadata": map[string]any{
|
||||||
|
"name": "titan-15",
|
||||||
|
"labels": map[string]string{
|
||||||
|
"hardware": "rpi5",
|
||||||
|
"rack": "a1",
|
||||||
|
"maintenance.bstein.dev/color": "blue",
|
||||||
|
"kubernetes.io/arch": "arm64",
|
||||||
|
"node-role.kubernetes.io/worker": "true",
|
||||||
|
},
|
||||||
|
"annotations": map[string]string{
|
||||||
|
"maintenance.bstein.dev/owner": "atlas",
|
||||||
|
"volumes.kubernetes.io/controller-managed-attach-detach": "true",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"spec": map[string]any{
|
||||||
|
"unschedulable": true,
|
||||||
|
"taints": []any{
|
||||||
|
map[string]any{"key": "dedicated", "value": "recovery", "effect": "NoSchedule"},
|
||||||
|
map[string]any{"key": "node.kubernetes.io/unreachable", "effect": "NoExecute"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
default:
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kube.Close()
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
|
||||||
|
app := newTestApp(t)
|
||||||
|
app.inventory.Nodes[0].Labels = map[string]string{"hardware": "rpi4", "rack": "a1"}
|
||||||
|
app.inventory.Nodes[0].Taints = []string{"flash=true:NoSchedule"}
|
||||||
|
app.desiredMetadata["titan-15"] = DesiredNodeMetadata{
|
||||||
|
Node: "titan-15",
|
||||||
|
Annotations: map[string]string{"maintenance.bstein.dev/legacy": "keep"},
|
||||||
|
}
|
||||||
|
|
||||||
|
desired, err := app.stageDesiredNodeMetadata("titan-15")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("stageDesiredNodeMetadata: %v", err)
|
||||||
|
}
|
||||||
|
if desired.Hostname != "titan-15" || !desired.Unschedulable {
|
||||||
|
t.Fatalf("unexpected desired metadata header: %#v", desired)
|
||||||
|
}
|
||||||
|
if desired.Labels["hardware"] != "rpi5" || desired.Labels["rack"] != "a1" || desired.Labels["maintenance.bstein.dev/color"] != "blue" {
|
||||||
|
t.Fatalf("unexpected desired labels: %#v", desired.Labels)
|
||||||
|
}
|
||||||
|
if _, ok := desired.Labels["kubernetes.io/arch"]; ok {
|
||||||
|
t.Fatalf("system labels should not be persisted: %#v", desired.Labels)
|
||||||
|
}
|
||||||
|
if desired.Annotations["maintenance.bstein.dev/owner"] != "atlas" || desired.Annotations["maintenance.bstein.dev/legacy"] != "keep" {
|
||||||
|
t.Fatalf("unexpected desired annotations: %#v", desired.Annotations)
|
||||||
|
}
|
||||||
|
if _, ok := desired.Annotations["volumes.kubernetes.io/controller-managed-attach-detach"]; ok {
|
||||||
|
t.Fatalf("controller annotations should not be persisted: %#v", desired.Annotations)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(desired.Taints, []string{"dedicated=recovery:NoSchedule"}) {
|
||||||
|
t.Fatalf("unexpected desired taints: %#v", desired.Taints)
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(app.settings.DesiredMetadataPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read desired metadata file: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(string(data), "titan-15") {
|
||||||
|
t.Fatalf("desired metadata file missing titan-15: %s", string(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStoreSnapshotRestoresDesiredNodeMetadata(t *testing.T) {
|
||||||
|
var patchBody map[string]any
|
||||||
|
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"items": []any{
|
||||||
|
map[string]any{
|
||||||
|
"metadata": map[string]any{
|
||||||
|
"name": "titan-15",
|
||||||
|
"labels": map[string]string{
|
||||||
|
"hardware": "rpi4",
|
||||||
|
"maintenance.bstein.dev/old": "1",
|
||||||
|
},
|
||||||
|
"annotations": map[string]string{
|
||||||
|
"maintenance.bstein.dev/mode": "old",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"spec": map[string]any{
|
||||||
|
"unschedulable": true,
|
||||||
|
"taints": []any{
|
||||||
|
map[string]any{"key": "dedicated", "value": "old", "effect": "NoSchedule"},
|
||||||
|
map[string]any{"key": "node.kubernetes.io/unreachable", "effect": "NoExecute"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
case r.Method == http.MethodPatch && r.URL.Path == "/api/v1/nodes/titan-15":
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&patchBody); err != nil {
|
||||||
|
t.Fatalf("decode patch: %v", err)
|
||||||
|
}
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"status": "ok"})
|
||||||
|
default:
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kube.Close()
|
||||||
|
installKubeFactory(t, kube)
|
||||||
|
|
||||||
|
app := newTestApp(t)
|
||||||
|
app.desiredMetadata["titan-15"] = DesiredNodeMetadata{
|
||||||
|
Node: "titan-15",
|
||||||
|
Hostname: "titan-15",
|
||||||
|
Labels: map[string]string{"hardware": "rpi5"},
|
||||||
|
Annotations: map[string]string{"maintenance.bstein.dev/mode": "recovery"},
|
||||||
|
Taints: []string{"dedicated=recovery:NoSchedule"},
|
||||||
|
Unschedulable: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := app.StoreSnapshot(SnapshotRecord{
|
||||||
|
Node: "titan-15",
|
||||||
|
CollectedAt: time.Date(2026, 4, 24, 6, 0, 0, 0, time.UTC),
|
||||||
|
Snapshot: sentinel.Snapshot{Hostname: "titan-15"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("StoreSnapshot: %v", err)
|
||||||
|
}
|
||||||
|
if patchBody == nil {
|
||||||
|
t.Fatal("expected desired metadata patch")
|
||||||
|
}
|
||||||
|
metadata := patchBody["metadata"].(map[string]any)
|
||||||
|
labels := metadata["labels"].(map[string]any)
|
||||||
|
if labels["hardware"] != "rpi5" || labels["maintenance.bstein.dev/old"] != nil {
|
||||||
|
t.Fatalf("unexpected label patch: %#v", labels)
|
||||||
|
}
|
||||||
|
annotations := metadata["annotations"].(map[string]any)
|
||||||
|
if annotations["maintenance.bstein.dev/mode"] != "recovery" {
|
||||||
|
t.Fatalf("unexpected annotation patch: %#v", annotations)
|
||||||
|
}
|
||||||
|
spec := patchBody["spec"].(map[string]any)
|
||||||
|
if spec["unschedulable"] != false {
|
||||||
|
t.Fatalf("unexpected spec patch: %#v", spec)
|
||||||
|
}
|
||||||
|
taints := spec["taints"].([]any)
|
||||||
|
if len(taints) != 2 {
|
||||||
|
t.Fatalf("unexpected taint payload: %#v", taints)
|
||||||
|
}
|
||||||
|
entries := map[string]map[string]any{}
|
||||||
|
for _, raw := range taints {
|
||||||
|
entry := raw.(map[string]any)
|
||||||
|
key := entry["key"].(string)
|
||||||
|
entries[key] = entry
|
||||||
|
}
|
||||||
|
if entries["dedicated"]["value"] != "recovery" || entries["dedicated"]["effect"] != "NoSchedule" {
|
||||||
|
t.Fatalf("missing desired taint replacement: %#v", entries)
|
||||||
|
}
|
||||||
|
if entries["node.kubernetes.io/unreachable"]["effect"] != "NoExecute" {
|
||||||
|
t.Fatalf("system taint should be preserved: %#v", entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDesiredNodeMetadataHelpers(t *testing.T) {
|
||||||
|
app := newTestApp(t)
|
||||||
|
if _, ok := app.desiredMetadataForNode("missing"); ok {
|
||||||
|
t.Fatal("expected no desired metadata for missing node")
|
||||||
|
}
|
||||||
|
if err := app.syncDesiredNodeMetadata(SnapshotRecord{Node: "missing"}); err != nil {
|
||||||
|
t.Fatalf("syncDesiredNodeMetadata missing should noop: %v", err)
|
||||||
|
}
|
||||||
|
if _, ok := liveClusterNode(""); ok {
|
||||||
|
t.Fatal("empty liveClusterNode lookup should fail")
|
||||||
|
}
|
||||||
|
if !isRestorableLabel("maintenance.bstein.dev/role") || isRestorableLabel("kubernetes.io/arch") {
|
||||||
|
t.Fatal("unexpected label restoration filter")
|
||||||
|
}
|
||||||
|
if !isRestorableAnnotation("maintenance.bstein.dev/state") || isRestorableAnnotation("volumes.kubernetes.io/foo") {
|
||||||
|
t.Fatal("unexpected annotation restoration filter")
|
||||||
|
}
|
||||||
|
if !isRestorableTaint("dedicated=recovery:NoSchedule") || isRestorableTaint("node.kubernetes.io/not-ready:NoExecute") {
|
||||||
|
t.Fatal("unexpected taint restoration filter")
|
||||||
|
}
|
||||||
|
key, value, effect := splitTaint("dedicated=recovery:NoSchedule")
|
||||||
|
if key != "dedicated" || value != "recovery" || effect != "NoSchedule" {
|
||||||
|
t.Fatalf("splitTaint mismatch: %q %q %q", key, value, effect)
|
||||||
|
}
|
||||||
|
if key, value, effect := splitTaint("just-a-key"); key != "just-a-key" || value != "" || effect != "" {
|
||||||
|
t.Fatalf("splitTaint key-only mismatch: %q %q %q", key, value, effect)
|
||||||
|
}
|
||||||
|
labels := filteredRestorableLabels(map[string]string{"hardware": "rpi5", "kubernetes.io/arch": "arm64"})
|
||||||
|
if !reflect.DeepEqual(labels, map[string]string{"hardware": "rpi5"}) {
|
||||||
|
t.Fatalf("filteredRestorableLabels = %#v", labels)
|
||||||
|
}
|
||||||
|
annotations := filteredRestorableAnnotations(map[string]string{"maintenance.bstein.dev/state": "ok", "volumes.kubernetes.io/foo": "bar"})
|
||||||
|
if !reflect.DeepEqual(annotations, map[string]string{"maintenance.bstein.dev/state": "ok"}) {
|
||||||
|
t.Fatalf("filteredRestorableAnnotations = %#v", annotations)
|
||||||
|
}
|
||||||
|
patch := metadataStringPatch(
|
||||||
|
map[string]string{"hardware": "rpi4", "maintenance.bstein.dev/old": "1"},
|
||||||
|
map[string]string{"hardware": "rpi5"},
|
||||||
|
isRestorableLabel,
|
||||||
|
)
|
||||||
|
if patch["hardware"] != "rpi5" || patch["maintenance.bstein.dev/old"] != nil {
|
||||||
|
t.Fatalf("metadataStringPatch = %#v", patch)
|
||||||
|
}
|
||||||
|
mergedTaints := mergeLiveAndDesiredTaints(
|
||||||
|
[]string{"node.kubernetes.io/unreachable:NoExecute", "dedicated=old:NoSchedule"},
|
||||||
|
[]string{"dedicated=new:NoSchedule", "dedicated=new:NoSchedule"},
|
||||||
|
)
|
||||||
|
if !reflect.DeepEqual(mergedTaints, []string{"dedicated=new:NoSchedule", "node.kubernetes.io/unreachable:NoExecute"}) {
|
||||||
|
t.Fatalf("mergeLiveAndDesiredTaints = %#v", mergedTaints)
|
||||||
|
}
|
||||||
|
payload := taintPatchPayload([]string{"dedicated=new:NoSchedule"})
|
||||||
|
if len(payload) != 1 || payload[0]["key"] != "dedicated" || payload[0]["value"] != "new" || payload[0]["effect"] != "NoSchedule" {
|
||||||
|
t.Fatalf("taintPatchPayload = %#v", payload)
|
||||||
|
}
|
||||||
|
original := DesiredNodeMetadata{Labels: map[string]string{"hardware": "rpi5"}, Taints: []string{"dedicated=new:NoSchedule"}}
|
||||||
|
cloned := cloneDesiredNodeMetadata(original)
|
||||||
|
cloned.Labels["hardware"] = "mutated"
|
||||||
|
cloned.Taints[0] = "changed"
|
||||||
|
if original.Labels["hardware"] != "rpi5" || original.Taints[0] != "dedicated=new:NoSchedule" {
|
||||||
|
t.Fatalf("cloneDesiredNodeMetadata should deep-copy slices/maps: %#v %#v", original, cloned)
|
||||||
|
}
|
||||||
|
if err := patchDesiredNodeMetadata(
|
||||||
|
clusterNode{Name: "titan-15", Labels: map[string]string{"hardware": "rpi5"}, Taints: []string{"dedicated=new:NoSchedule"}},
|
||||||
|
DesiredNodeMetadata{Node: "titan-15", Labels: map[string]string{"hardware": "rpi5"}, Taints: []string{"dedicated=new:NoSchedule"}},
|
||||||
|
); err != nil {
|
||||||
|
t.Fatalf("patchDesiredNodeMetadata should noop when already in sync: %v", err)
|
||||||
|
}
|
||||||
|
if event := desiredNodeMetadataSyncEvent("titan-15", os.ErrPermission); event.Kind != "sentinel.node-metadata" || event.Details["node"] != "titan-15" {
|
||||||
|
t.Fatalf("desiredNodeMetadataSyncEvent = %#v", event)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -74,12 +74,17 @@ func (a *App) RefreshDevices(host string) ([]Device, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) runBuild(job *Job, flash bool) {
|
func (a *App) runBuild(job *Job, flash bool) {
|
||||||
_, class, err := a.inventory.FindNode(job.Node)
|
nodeSpec, class, err := a.inventory.FindNode(job.Node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.failJob(job.ID, err)
|
a.failJob(job.ID, err)
|
||||||
a.metrics.RecordBuild(job.Node, "error")
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if _, err := a.stageDesiredNodeMetadata(job.Node); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
if err := a.ensureHarborProject(); err != nil {
|
if err := a.ensureHarborProject(); err != nil {
|
||||||
a.failJob(job.ID, err)
|
a.failJob(job.ID, err)
|
||||||
a.metrics.RecordBuild(job.Node, "error")
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
@ -112,7 +117,8 @@ func (a *App) runBuild(job *Job, flash bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
buildPod := fmt.Sprintf("metis-build-%d", time.Now().UTC().UnixNano())
|
buildPod := fmt.Sprintf("metis-build-%d", time.Now().UTC().UnixNano())
|
||||||
logs, err := a.runRemotePod(job.ID, buildPod, a.remoteBuildPodSpec(buildPod, builder.Name, buildImage, job.Node, artifactRef, buildTag))
|
job.Builder = builder.Name
|
||||||
|
logs, err := a.runRemotePod(job.ID, buildPod, a.remoteBuildPodSpec(buildPod, builder.Name, buildImage, job.Node, strings.TrimSpace(nodeSpec.Hostname), artifactRef, buildTag))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.failJob(job.ID, err)
|
a.failJob(job.ID, err)
|
||||||
a.metrics.RecordBuild(job.Node, "error")
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
@ -183,6 +189,9 @@ func (a *App) runFlash(job *Job) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) runFlashSequence(job *Job, artifactRef string) (RemoteFlashResult, error) {
|
func (a *App) runFlashSequence(job *Job, artifactRef string) (RemoteFlashResult, error) {
|
||||||
|
if _, err := a.stageDesiredNodeMetadata(job.Node); err != nil {
|
||||||
|
return RemoteFlashResult{}, err
|
||||||
|
}
|
||||||
a.setJob(job.ID, func(j *Job) {
|
a.setJob(job.ID, func(j *Job) {
|
||||||
j.Status = JobRunning
|
j.Status = JobRunning
|
||||||
j.Stage = "preflight"
|
j.Stage = "preflight"
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
package service
|
package service
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -246,8 +247,9 @@ func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any {
|
func (a *App) remoteBuildPodSpec(name, host, image, node, nodeHostname, artifactRef, buildTag string) map[string]any {
|
||||||
workspaceHostPath := remoteWorkspaceHostPath(a.settings.RemoteWorkspaceDir, name)
|
workspaceHostPath := remoteWorkspaceHostPath(a.settings.RemoteWorkspaceDir, name)
|
||||||
|
desiredEnv := remoteDesiredMetadataEnv(a, node)
|
||||||
return map[string]any{
|
return map[string]any{
|
||||||
"apiVersion": "v1",
|
"apiVersion": "v1",
|
||||||
"kind": "Pod",
|
"kind": "Pod",
|
||||||
@ -255,7 +257,7 @@ func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag
|
|||||||
"name": name,
|
"name": name,
|
||||||
"namespace": a.settings.Namespace,
|
"namespace": a.settings.Namespace,
|
||||||
"labels": map[string]string{"app": "metis-remote", "metis-run": "build"},
|
"labels": map[string]string{"app": "metis-remote", "metis-run": "build"},
|
||||||
"annotations": vaultRuntimeAnnotations(true),
|
"annotations": vaultRuntimeAnnotations(true, nodeHostname),
|
||||||
},
|
},
|
||||||
"spec": map[string]any{
|
"spec": map[string]any{
|
||||||
"restartPolicy": "Never",
|
"restartPolicy": "Never",
|
||||||
@ -283,6 +285,7 @@ func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
"securityContext": map[string]any{"runAsUser": 0, "runAsGroup": 0},
|
"securityContext": map[string]any{"runAsUser": 0, "runAsGroup": 0},
|
||||||
|
"env": desiredEnv,
|
||||||
"envFrom": []map[string]any{
|
"envFrom": []map[string]any{
|
||||||
{"configMapRef": map[string]any{"name": "metis"}},
|
{"configMapRef": map[string]any{"name": "metis"}},
|
||||||
},
|
},
|
||||||
@ -309,7 +312,7 @@ func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef st
|
|||||||
"name": name,
|
"name": name,
|
||||||
"namespace": a.settings.Namespace,
|
"namespace": a.settings.Namespace,
|
||||||
"labels": map[string]string{"app": "metis-remote", "metis-run": "flash"},
|
"labels": map[string]string{"app": "metis-remote", "metis-run": "flash"},
|
||||||
"annotations": vaultRuntimeAnnotations(false),
|
"annotations": vaultRuntimeAnnotations(false, ""),
|
||||||
},
|
},
|
||||||
"spec": map[string]any{
|
"spec": map[string]any{
|
||||||
"restartPolicy": "Never",
|
"restartPolicy": "Never",
|
||||||
@ -378,7 +381,46 @@ func mountedHostTmpDir(path string) string {
|
|||||||
return "/host-tmp"
|
return "/host-tmp"
|
||||||
}
|
}
|
||||||
|
|
||||||
func vaultRuntimeAnnotations(includeSSHKeys bool) map[string]string {
|
func remoteDesiredMetadataEnv(a *App, node string) []map[string]any {
|
||||||
|
desired, ok := a.desiredMetadataForNode(node)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
labelsJSON, _ := jsonMarshalStringMap(desired.Labels)
|
||||||
|
taintsJSON, _ := jsonMarshalStringSlice(desired.Taints)
|
||||||
|
env := []map[string]any{}
|
||||||
|
if labelsJSON != "" {
|
||||||
|
env = append(env, map[string]any{"name": "METIS_NODE_LABELS_JSON", "value": labelsJSON})
|
||||||
|
}
|
||||||
|
if taintsJSON != "" {
|
||||||
|
env = append(env, map[string]any{"name": "METIS_NODE_TAINTS_JSON", "value": taintsJSON})
|
||||||
|
}
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
|
||||||
|
func jsonMarshalStringMap(values map[string]string) (string, error) {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
data, err := json.Marshal(values)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(data), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func jsonMarshalStringSlice(values []string) (string, error) {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
data, err := json.Marshal(values)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(data), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func vaultRuntimeAnnotations(includeSSHKeys bool, nodeHostname string) map[string]string {
|
||||||
annotations := map[string]string{
|
annotations := map[string]string{
|
||||||
"vault.hashicorp.com/agent-inject": "true",
|
"vault.hashicorp.com/agent-inject": "true",
|
||||||
"vault.hashicorp.com/agent-pre-populate-only": "true",
|
"vault.hashicorp.com/agent-pre-populate-only": "true",
|
||||||
@ -399,6 +441,19 @@ export METIS_SSH_KEY_BASTION="{{ .Data.data.bastion_pub }}"
|
|||||||
export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
|
export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
|
||||||
export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}"
|
export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}"
|
||||||
export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}"
|
export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}"
|
||||||
|
{{ end }}`
|
||||||
|
}
|
||||||
|
nodeHostname = strings.TrimSpace(nodeHostname)
|
||||||
|
if nodeHostname != "" {
|
||||||
|
secretPath := fmt.Sprintf("secret/data/nodes/%s", nodeHostname)
|
||||||
|
annotations["vault.hashicorp.com/agent-inject-secret-metis-node-secrets-env.sh"] = secretPath
|
||||||
|
annotations["vault.hashicorp.com/agent-inject-template-metis-node-secrets-env.sh"] = `{{ with secret "` + secretPath + `" }}
|
||||||
|
export METIS_NODE_SSH_PASSWORD="{{ .Data.data.ssh_password }}"
|
||||||
|
export METIS_NODE_SSH_PASSWORD_HASH="{{ .Data.data.ssh_password_hash }}"
|
||||||
|
export METIS_NODE_ATLAS_PASSWORD="{{ .Data.data.atlas_password }}"
|
||||||
|
export METIS_NODE_ATLAS_PASSWORD_HASH="{{ .Data.data.atlas_password_hash }}"
|
||||||
|
export METIS_NODE_ROOT_PASSWORD="{{ .Data.data.root_password }}"
|
||||||
|
export METIS_NODE_ROOT_PASSWORD_HASH="{{ .Data.data.root_password_hash }}"
|
||||||
{{ end }}`
|
{{ end }}`
|
||||||
}
|
}
|
||||||
return annotations
|
return annotations
|
||||||
@ -413,6 +468,7 @@ func remoteWorkerEntrypoint(includeSSHKeys bool, args ...string) string {
|
|||||||
if includeSSHKeys {
|
if includeSSHKeys {
|
||||||
lines = append(lines, ". /vault/secrets/metis-ssh-env.sh")
|
lines = append(lines, ". /vault/secrets/metis-ssh-env.sh")
|
||||||
}
|
}
|
||||||
|
lines = append(lines, "if [ -f /vault/secrets/metis-node-secrets-env.sh ]; then . /vault/secrets/metis-node-secrets-env.sh; fi")
|
||||||
lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...))
|
lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...))
|
||||||
return strings.Join(lines, "\n")
|
return strings.Join(lines, "\n")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -251,8 +251,13 @@ func TestRemoteWorkspaceAndHostTmpPathsPreferUsbScratchRoots(t *testing.T) {
|
|||||||
app := newTestApp(t)
|
app := newTestApp(t)
|
||||||
app.settings.RemoteWorkspaceDir = "/var/tmp/metis-workspace"
|
app.settings.RemoteWorkspaceDir = "/var/tmp/metis-workspace"
|
||||||
app.settings.HostTmpDir = "/var/tmp/metis-flash-test"
|
app.settings.HostTmpDir = "/var/tmp/metis-flash-test"
|
||||||
|
app.desiredMetadata["titan-10"] = DesiredNodeMetadata{
|
||||||
|
Node: "titan-10",
|
||||||
|
Labels: map[string]string{"hardware": "rpi5"},
|
||||||
|
Taints: []string{"dedicated=recovery:NoSchedule"},
|
||||||
|
}
|
||||||
|
|
||||||
buildSpec := app.remoteBuildPodSpec("metis-build-123", "titan-04", "runner:arm64", "titan-10", "registry.example/metis/titan-10", "build-1")
|
buildSpec := app.remoteBuildPodSpec("metis-build-123", "titan-04", "runner:arm64", "titan-10", "titan-10", "registry.example/metis/titan-10", "build-1")
|
||||||
buildBody := buildSpec["spec"].(map[string]any)
|
buildBody := buildSpec["spec"].(map[string]any)
|
||||||
buildVolumes := buildBody["volumes"].([]map[string]any)
|
buildVolumes := buildBody["volumes"].([]map[string]any)
|
||||||
workspaceVolume := buildVolumes[0]["hostPath"].(map[string]any)
|
workspaceVolume := buildVolumes[0]["hostPath"].(map[string]any)
|
||||||
@ -260,6 +265,17 @@ func TestRemoteWorkspaceAndHostTmpPathsPreferUsbScratchRoots(t *testing.T) {
|
|||||||
t.Fatalf("build workspace hostPath = %v", got)
|
t.Fatalf("build workspace hostPath = %v", got)
|
||||||
}
|
}
|
||||||
buildContainer := buildBody["containers"].([]map[string]any)[0]
|
buildContainer := buildBody["containers"].([]map[string]any)[0]
|
||||||
|
buildEnv := buildContainer["env"].([]map[string]any)
|
||||||
|
if len(buildEnv) != 2 {
|
||||||
|
t.Fatalf("expected desired metadata env, got %#v", buildEnv)
|
||||||
|
}
|
||||||
|
metadataAnnotations := buildSpec["metadata"].(map[string]any)["annotations"].(map[string]string)
|
||||||
|
if metadataAnnotations["vault.hashicorp.com/agent-inject-secret-metis-node-secrets-env.sh"] != "secret/data/nodes/titan-10" {
|
||||||
|
t.Fatalf("unexpected node secret annotation: %#v", metadataAnnotations)
|
||||||
|
}
|
||||||
|
if !strings.Contains(metadataAnnotations["vault.hashicorp.com/agent-inject-template-metis-node-secrets-env.sh"], "METIS_NODE_ROOT_PASSWORD") {
|
||||||
|
t.Fatalf("expected node password exports in vault template: %#v", metadataAnnotations)
|
||||||
|
}
|
||||||
buildSecurity := buildContainer["securityContext"].(map[string]any)
|
buildSecurity := buildContainer["securityContext"].(map[string]any)
|
||||||
if got := buildSecurity["runAsUser"]; got != 0 {
|
if got := buildSecurity["runAsUser"]; got != 0 {
|
||||||
t.Fatalf("build runAsUser = %v", got)
|
t.Fatalf("build runAsUser = %v", got)
|
||||||
|
|||||||
@ -15,7 +15,7 @@ func TestMountedHostTmpDirMapsConfiguredTmpPathIntoMount(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestVaultRuntimeAnnotationsIncludeReciprocalHecateKeys(t *testing.T) {
|
func TestVaultRuntimeAnnotationsIncludeReciprocalHecateKeys(t *testing.T) {
|
||||||
withKeys := vaultRuntimeAnnotations(true)
|
withKeys := vaultRuntimeAnnotations(true, "titan-15")
|
||||||
template := withKeys["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"]
|
template := withKeys["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"]
|
||||||
if !strings.Contains(template, "METIS_SSH_KEY_HECATE_TETHYS") {
|
if !strings.Contains(template, "METIS_SSH_KEY_HECATE_TETHYS") {
|
||||||
t.Fatalf("expected tethys hecate key export in vault template: %q", template)
|
t.Fatalf("expected tethys hecate key export in vault template: %q", template)
|
||||||
@ -24,7 +24,7 @@ func TestVaultRuntimeAnnotationsIncludeReciprocalHecateKeys(t *testing.T) {
|
|||||||
t.Fatalf("expected db hecate key export in vault template: %q", template)
|
t.Fatalf("expected db hecate key export in vault template: %q", template)
|
||||||
}
|
}
|
||||||
|
|
||||||
withoutKeys := vaultRuntimeAnnotations(false)
|
withoutKeys := vaultRuntimeAnnotations(false, "")
|
||||||
if _, ok := withoutKeys["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"]; ok {
|
if _, ok := withoutKeys["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"]; ok {
|
||||||
t.Fatalf("did not expect ssh key template when includeSSHKeys=false")
|
t.Fatalf("did not expect ssh key template when includeSSHKeys=false")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -11,30 +11,31 @@ var hostNameLookup = os.Hostname
|
|||||||
|
|
||||||
// Settings configures the Metis service runtime.
|
// Settings configures the Metis service runtime.
|
||||||
type Settings struct {
|
type Settings struct {
|
||||||
BindAddr string
|
BindAddr string
|
||||||
InventoryPath string
|
InventoryPath string
|
||||||
CacheDir string
|
CacheDir string
|
||||||
ArtifactDir string
|
ArtifactDir string
|
||||||
ArtifactStatePath string
|
ArtifactStatePath string
|
||||||
HistoryPath string
|
HistoryPath string
|
||||||
SnapshotsPath string
|
SnapshotsPath string
|
||||||
TargetsPath string
|
TargetsPath string
|
||||||
DefaultFlashHost string
|
DesiredMetadataPath string
|
||||||
FlashHosts []string
|
DefaultFlashHost string
|
||||||
LocalHost string
|
FlashHosts []string
|
||||||
AllowedGroups []string
|
LocalHost string
|
||||||
MaxDeviceBytes int64
|
AllowedGroups []string
|
||||||
Namespace string
|
MaxDeviceBytes int64
|
||||||
RunnerImageAMD64 string
|
Namespace string
|
||||||
RunnerImageARM64 string
|
RunnerImageAMD64 string
|
||||||
HarborRegistry string
|
RunnerImageARM64 string
|
||||||
HarborProject string
|
HarborRegistry string
|
||||||
HarborAPIBase string
|
HarborProject string
|
||||||
HarborUsername string
|
HarborAPIBase string
|
||||||
HarborPassword string
|
HarborUsername string
|
||||||
HostTmpDir string
|
HarborPassword string
|
||||||
RemoteWorkspaceDir string
|
HostTmpDir string
|
||||||
RemotePodTimeout int64
|
RemoteWorkspaceDir string
|
||||||
|
RemotePodTimeout int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// FromEnv builds service settings with sensible defaults for local dev and in-cluster use.
|
// FromEnv builds service settings with sensible defaults for local dev and in-cluster use.
|
||||||
@ -44,30 +45,31 @@ func FromEnv() Settings {
|
|||||||
defaultFlashHost := getenvDefault("METIS_DEFAULT_FLASH_HOST", localHost)
|
defaultFlashHost := getenvDefault("METIS_DEFAULT_FLASH_HOST", localHost)
|
||||||
flashHosts := splitList(getenvDefault("METIS_FLASH_HOSTS", defaultFlashHost))
|
flashHosts := splitList(getenvDefault("METIS_FLASH_HOSTS", defaultFlashHost))
|
||||||
return Settings{
|
return Settings{
|
||||||
BindAddr: getenvDefault("METIS_BIND_ADDR", ":8080"),
|
BindAddr: getenvDefault("METIS_BIND_ADDR", ":8080"),
|
||||||
InventoryPath: getenvDefault("METIS_INVENTORY_PATH", "inventory.titan-rpi4.yaml"),
|
InventoryPath: getenvDefault("METIS_INVENTORY_PATH", "inventory.titan-rpi4.yaml"),
|
||||||
CacheDir: getenvDefault("METIS_CACHE_DIR", filepath.Join(dataDir, "cache")),
|
CacheDir: getenvDefault("METIS_CACHE_DIR", filepath.Join(dataDir, "cache")),
|
||||||
ArtifactDir: getenvDefault("METIS_ARTIFACT_DIR", filepath.Join(dataDir, "artifacts")),
|
ArtifactDir: getenvDefault("METIS_ARTIFACT_DIR", filepath.Join(dataDir, "artifacts")),
|
||||||
ArtifactStatePath: getenvDefault("METIS_ARTIFACT_STATE_PATH", filepath.Join(dataDir, "artifacts.json")),
|
ArtifactStatePath: getenvDefault("METIS_ARTIFACT_STATE_PATH", filepath.Join(dataDir, "artifacts.json")),
|
||||||
HistoryPath: getenvDefault("METIS_HISTORY_PATH", filepath.Join(dataDir, "history.jsonl")),
|
HistoryPath: getenvDefault("METIS_HISTORY_PATH", filepath.Join(dataDir, "history.jsonl")),
|
||||||
SnapshotsPath: getenvDefault("METIS_SNAPSHOTS_PATH", filepath.Join(dataDir, "snapshots.json")),
|
SnapshotsPath: getenvDefault("METIS_SNAPSHOTS_PATH", filepath.Join(dataDir, "snapshots.json")),
|
||||||
TargetsPath: getenvDefault("METIS_TARGETS_PATH", filepath.Join(dataDir, "targets.json")),
|
TargetsPath: getenvDefault("METIS_TARGETS_PATH", filepath.Join(dataDir, "targets.json")),
|
||||||
DefaultFlashHost: defaultFlashHost,
|
DesiredMetadataPath: getenvDefault("METIS_DESIRED_METADATA_PATH", filepath.Join(dataDir, "desired-node-metadata.json")),
|
||||||
FlashHosts: flashHosts,
|
DefaultFlashHost: defaultFlashHost,
|
||||||
LocalHost: localHost,
|
FlashHosts: flashHosts,
|
||||||
AllowedGroups: splitList(getenvDefault("METIS_ALLOWED_GROUPS", "admin,maintenance")),
|
LocalHost: localHost,
|
||||||
MaxDeviceBytes: getenvInt64("METIS_MAX_DEVICE_BYTES", 300000000000),
|
AllowedGroups: splitList(getenvDefault("METIS_ALLOWED_GROUPS", "admin,maintenance")),
|
||||||
Namespace: getenvDefault("METIS_NAMESPACE", "maintenance"),
|
MaxDeviceBytes: getenvInt64("METIS_MAX_DEVICE_BYTES", 300000000000),
|
||||||
RunnerImageAMD64: getenvDefault("METIS_RUNNER_IMAGE_AMD64", ""),
|
Namespace: getenvDefault("METIS_NAMESPACE", "maintenance"),
|
||||||
RunnerImageARM64: getenvDefault("METIS_RUNNER_IMAGE_ARM64", ""),
|
RunnerImageAMD64: getenvDefault("METIS_RUNNER_IMAGE_AMD64", ""),
|
||||||
HarborRegistry: getenvDefault("METIS_HARBOR_REGISTRY", "registry.bstein.dev"),
|
RunnerImageARM64: getenvDefault("METIS_RUNNER_IMAGE_ARM64", ""),
|
||||||
HarborProject: getenvDefault("METIS_HARBOR_PROJECT", "metis"),
|
HarborRegistry: getenvDefault("METIS_HARBOR_REGISTRY", "registry.bstein.dev"),
|
||||||
HarborAPIBase: getenvDefault("METIS_HARBOR_API_BASE", "https://registry.bstein.dev/api/v2.0"),
|
HarborProject: getenvDefault("METIS_HARBOR_PROJECT", "metis"),
|
||||||
HarborUsername: getenvDefault("METIS_HARBOR_USERNAME", ""),
|
HarborAPIBase: getenvDefault("METIS_HARBOR_API_BASE", "https://registry.bstein.dev/api/v2.0"),
|
||||||
HarborPassword: getenvDefault("METIS_HARBOR_PASSWORD", ""),
|
HarborUsername: getenvDefault("METIS_HARBOR_USERNAME", ""),
|
||||||
HostTmpDir: getenvDefault("METIS_HOST_TMP_DIR", "/var/tmp/metis-flash-test"),
|
HarborPassword: getenvDefault("METIS_HARBOR_PASSWORD", ""),
|
||||||
RemoteWorkspaceDir: getenvDefault("METIS_REMOTE_WORKSPACE_DIR", "/var/tmp/metis-workspace"),
|
HostTmpDir: getenvDefault("METIS_HOST_TMP_DIR", "/var/tmp/metis-flash-test"),
|
||||||
RemotePodTimeout: getenvInt64("METIS_REMOTE_POD_TIMEOUT_SEC", 1800),
|
RemoteWorkspaceDir: getenvDefault("METIS_REMOTE_WORKSPACE_DIR", "/var/tmp/metis-workspace"),
|
||||||
|
RemotePodTimeout: getenvInt64("METIS_REMOTE_POD_TIMEOUT_SEC", 1800),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user