package service import ( "fmt" "math" "path/filepath" "sort" "strings" "time" "metis/pkg/inventory" ) func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64, string) { seconds := elapsed.Seconds() switch { case seconds < 20: return ramp(seconds, 0, 20, 8, 14), fmt.Sprintf("Scheduling a remote builder on %s for %s", builder, node) case seconds < 120: return ramp(seconds, 20, 120, 14, 30), fmt.Sprintf("Injecting %s recovery config into the base image on %s", node, builder) case seconds < 360: return ramp(seconds, 120, 360, 30, 58), fmt.Sprintf("Building the replacement image filesystem for %s on %s", node, builder) case seconds < 540: return ramp(seconds, 360, 540, 58, 70), fmt.Sprintf("Compressing the replacement image for %s before upload", node) default: return math.Min(76, ramp(seconds, 540, 900, 70, 76)), fmt.Sprintf("Publishing %s to Harbor and refreshing the latest tag", node) } } func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) { seconds := elapsed.Seconds() switch { case seconds < 10: return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host) case seconds < 45: return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host) default: return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host) } } func prettyDeviceTarget(path string) string { switch { case strings.HasPrefix(path, "hosttmp://"): return strings.TrimPrefix(path, "hosttmp://") case strings.TrimSpace(path) == "": return "the selected target" default: return path } } func hostTmpHostPath(path string) string { clean := filepath.Clean(strings.TrimSpace(path)) if clean == "" || clean == "." || clean == "/" { return "/var/tmp/metis-flash-test" } return clean } func remoteWorkspaceHostPath(root, podName string) string { cleanRoot := filepath.Clean(strings.TrimSpace(root)) if cleanRoot == "" || cleanRoot == "." || cleanRoot == "/" { cleanRoot = "/var/tmp/metis-workspace" } if strings.TrimSpace(podName) == "" { return cleanRoot } return filepath.Join(cleanRoot, podName) } func managedPathsContain(raw, want string) bool { want = strings.TrimSpace(want) if want == "" { return false } for _, path := range strings.Split(raw, "_") { if strings.TrimSpace(path) == want { return true } } return false } func usbScratchReadyForWorkspace(node clusterNode) bool { return node.USBScratchStatus == "ok" && managedPathsContain(node.USBScratchManagedPaths, "/var/tmp") } func ramp(value, start, end, min, max float64) float64 { if end <= start { return max } if value <= start { return min } if value >= end { return max } return min + ((value-start)/(end-start))*(max-min) } func (a *App) ensureDevice(host, path string) (*Device, error) { if strings.TrimSpace(path) == "" { return nil, fmt.Errorf("select removable media before starting a flash run") } devices, err := a.RefreshDevices(host) if err != nil { return nil, err } for _, device := range devices { if device.Path == path { return &device, nil } } return nil, fmt.Errorf("device %s is not a current flash candidate on %s", path, host) } func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) { nodes := clusterNodes() activeBuilds := clusterActiveRemotePodLoads(a.settings.Namespace, "build") activeRemotePods := clusterActiveRemotePodLoads(a.settings.Namespace, "") storageNodes := map[string]struct{}{} for _, node := range a.inventory.Nodes { if len(node.LonghornDisks) > 0 { storageNodes[node.Name] = struct{}{} } } type scored struct { node clusterNode score int } candidates := make([]scored, 0) for _, node := range nodes { if node.Arch != arch || node.Unschedulable || node.ControlPlane { continue } score := 0 if node.Worker { score += 40 } switch arch { case "arm64": if node.Hardware == "rpi5" { score += 30 } if usbScratchReadyForWorkspace(node) { score += 120 } else if node.USBScratchStatus == "error" { score -= 200 } else { score -= 80 } if _, storage := storageNodes[node.Name]; storage { score -= 50 } case "amd64": if node.Name == a.settings.DefaultFlashHost { score += 30 } if node.Name == "titan-24" { score -= 10 } } if flashHost != "" && node.Name == flashHost { score += 5 } if count := activeBuilds[node.Name]; count > 0 { score -= 100 * count } if count := activeRemotePods[node.Name]; count > 0 { score -= 15 * count } candidates = append(candidates, scored{node: node, score: score}) } sort.Slice(candidates, func(i, j int) bool { if candidates[i].score != candidates[j].score { return candidates[i].score > candidates[j].score } return candidates[i].node.Name < candidates[j].node.Name }) if len(candidates) == 0 { return clusterNode{}, fmt.Errorf("no build host available for arch %s", arch) } return candidates[0].node, nil } func (a *App) remoteDevicePodSpec(name, host, image string) map[string]any { return map[string]any{ "apiVersion": "v1", "kind": "Pod", "metadata": map[string]any{ "name": name, "namespace": a.settings.Namespace, "labels": map[string]string{"app": "metis-remote", "metis-run": "devices"}, }, "spec": map[string]any{ "restartPolicy": "Never", "serviceAccountName": "metis", "nodeSelector": map[string]string{ "kubernetes.io/hostname": host, }, "containers": []map[string]any{ { "name": "remote-devices", "image": image, "imagePullPolicy": "Always", "command": []string{ "metis", "remote-devices", "--max-device-bytes", fmt.Sprintf("%d", a.settings.MaxDeviceBytes), "--host-tmp-dir", hostTmpHostPath(a.settings.HostTmpDir), }, "securityContext": map[string]any{"privileged": true, "runAsUser": 0}, "volumeMounts": []map[string]any{ {"name": "host-dev", "mountPath": "/dev"}, {"name": "host-sys", "mountPath": "/sys", "readOnly": true}, {"name": "host-udev", "mountPath": "/run/udev", "readOnly": true}, }, }, }, "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, "volumes": []map[string]any{ {"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}}, {"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}}, {"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}}, }, }, } } func (a *App) remoteBuildPodSpec(name, host, image, node, artifactRef, buildTag string) map[string]any { workspaceHostPath := remoteWorkspaceHostPath(a.settings.RemoteWorkspaceDir, name) return map[string]any{ "apiVersion": "v1", "kind": "Pod", "metadata": map[string]any{ "name": name, "namespace": a.settings.Namespace, "labels": map[string]string{"app": "metis-remote", "metis-run": "build"}, "annotations": vaultRuntimeAnnotations(true), }, "spec": map[string]any{ "restartPolicy": "Never", "serviceAccountName": "metis", "nodeSelector": map[string]string{ "kubernetes.io/hostname": host, }, "containers": []map[string]any{ { "name": "remote-build", "image": image, "imagePullPolicy": "Always", "command": []string{"/bin/sh", "-c"}, "args": []string{ remoteWorkerEntrypoint( true, "remote-build", "--inventory", a.settings.InventoryPath, "--node", node, "--cache", "/workspace/cache", "--work-dir", "/workspace/build", "--artifact-ref", artifactRef, "--build-tag", buildTag, "--harbor-registry", a.settings.HarborRegistry, ), }, "securityContext": map[string]any{"runAsUser": 0, "runAsGroup": 0}, "envFrom": []map[string]any{ {"configMapRef": map[string]any{"name": "metis"}}, }, "volumeMounts": []map[string]any{ {"name": "workspace", "mountPath": "/workspace"}, }, }, }, "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, "volumes": []map[string]any{ {"name": "workspace", "hostPath": map[string]any{"path": workspaceHostPath, "type": "DirectoryOrCreate"}}, }, }, } } func (a *App) remoteFlashPodSpec(name, host, image, node, device, artifactRef string) map[string]any { workspaceHostPath := remoteWorkspaceHostPath(a.settings.RemoteWorkspaceDir, name) hostTmpPath := hostTmpHostPath(a.settings.HostTmpDir) return map[string]any{ "apiVersion": "v1", "kind": "Pod", "metadata": map[string]any{ "name": name, "namespace": a.settings.Namespace, "labels": map[string]string{"app": "metis-remote", "metis-run": "flash"}, "annotations": vaultRuntimeAnnotations(false), }, "spec": map[string]any{ "restartPolicy": "Never", "serviceAccountName": "metis", "nodeSelector": map[string]string{ "kubernetes.io/hostname": host, }, "containers": []map[string]any{ { "name": "remote-flash", "image": image, "imagePullPolicy": "Always", "command": []string{"/bin/sh", "-c"}, "args": []string{ remoteWorkerEntrypoint( false, "remote-flash", "--node", node, "--device", device, "--artifact-ref", artifactRef, "--work-dir", "/workspace/flash", "--harbor-registry", a.settings.HarborRegistry, "--host-tmp-dir", mountedHostTmpDir(a.settings.HostTmpDir), ), }, "securityContext": map[string]any{"privileged": true, "runAsUser": 0}, "envFrom": []map[string]any{ {"configMapRef": map[string]any{"name": "metis"}}, }, "volumeMounts": []map[string]any{ {"name": "workspace", "mountPath": "/workspace"}, {"name": "host-dev", "mountPath": "/dev"}, {"name": "host-sys", "mountPath": "/sys", "readOnly": true}, {"name": "host-udev", "mountPath": "/run/udev", "readOnly": true}, {"name": "host-tmp", "mountPath": "/host-tmp"}, }, }, }, "imagePullSecrets": []map[string]string{{"name": "harbor-regcred"}}, "volumes": []map[string]any{ {"name": "workspace", "hostPath": map[string]any{"path": workspaceHostPath, "type": "DirectoryOrCreate"}}, {"name": "host-dev", "hostPath": map[string]any{"path": "/dev"}}, {"name": "host-sys", "hostPath": map[string]any{"path": "/sys"}}, {"name": "host-udev", "hostPath": map[string]any{"path": "/run/udev"}}, {"name": "host-tmp", "hostPath": map[string]any{"path": hostTmpPath, "type": "DirectoryOrCreate"}}, }, }, } } func (a *App) remoteArtifactNote(node string) string { if summary, ok := a.artifacts()[node]; ok && strings.TrimSpace(summary.Ref) != "" { return summary.Ref } return a.artifactRepo(node) + ":latest" } func inventoryNodeArch(spec *inventory.NodeSpec, class *inventory.NodeClass) string { if class != nil && strings.TrimSpace(class.Arch) != "" { return strings.TrimSpace(class.Arch) } return "arm64" } func mountedHostTmpDir(path string) string { return "/host-tmp" } func vaultRuntimeAnnotations(includeSSHKeys bool) map[string]string { annotations := map[string]string{ "vault.hashicorp.com/agent-inject": "true", "vault.hashicorp.com/agent-pre-populate-only": "true", "vault.hashicorp.com/role": vaultRoleMaintenance, "vault.hashicorp.com/agent-inject-secret-metis-runtime-env.sh": vaultRuntimeSecretPath, "vault.hashicorp.com/agent-inject-template-metis-runtime-env.sh": `{{ with secret "kv/data/atlas/maintenance/metis-runtime" }} export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}" {{ end }}`, "vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh": vaultHarborSecretPath, "vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh": `{{ with secret "kv/data/atlas/harbor/harbor-core" }} export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}" {{ end }}`, } if includeSSHKeys { annotations["vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh"] = vaultSSHKeysSecretPath annotations["vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh"] = `{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }} export METIS_SSH_KEY_BASTION="{{ .Data.data.bastion_pub }}" export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}" export METIS_SSH_KEY_HECATE_TETHYS="{{ .Data.data.hecate_tethys_pub }}" export METIS_SSH_KEY_HECATE_DB="{{ .Data.data.hecate_db_pub }}" {{ end }}` } return annotations } func remoteWorkerEntrypoint(includeSSHKeys bool, args ...string) string { lines := []string{ "set -e", ". /vault/secrets/metis-runtime-env.sh", ". /vault/secrets/metis-harbor-env.sh", } if includeSSHKeys { lines = append(lines, ". /vault/secrets/metis-ssh-env.sh") } lines = append(lines, "exec "+shellJoin(append([]string{"metis"}, args...)...)) return strings.Join(lines, "\n") } func shellJoin(args ...string) string { quoted := make([]string, 0, len(args)) for _, arg := range args { quoted = append(quoted, shellQuote(arg)) } return strings.Join(quoted, " ") } func shellQuote(value string) string { if value == "" { return "''" } return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'" }