feat: add metis service and autonomous recovery path
This commit is contained in:
parent
26eb9af430
commit
b8f26ecf41
10
.dockerignore
Normal file
10
.dockerignore
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
AGENTS.md
|
||||||
|
artifacts/
|
||||||
|
build/
|
||||||
|
tmp/
|
||||||
|
*.img
|
||||||
|
*.img.xz
|
||||||
|
*.qcow2
|
||||||
|
*.iso
|
||||||
45
Dockerfile
Normal file
45
Dockerfile
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# syntax=docker/dockerfile:1.7
|
||||||
|
|
||||||
|
FROM golang:1.22-bookworm AS build
|
||||||
|
|
||||||
|
ARG TARGETOS=linux
|
||||||
|
ARG TARGETARCH=arm64
|
||||||
|
|
||||||
|
WORKDIR /src
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
COPY . .
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||||
|
--mount=type=cache,target=/go/pkg/mod \
|
||||||
|
CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/metis ./cmd/metis && \
|
||||||
|
CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /out/metis-sentinel ./cmd/metis-sentinel
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim AS runtime-base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends ca-certificates e2fsprogs util-linux openssh-client xz-utils \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=build /out/metis /usr/local/bin/metis
|
||||||
|
COPY --from=build /out/metis-sentinel /usr/local/bin/metis-sentinel
|
||||||
|
COPY inventory.example.yaml /app/inventory.example.yaml
|
||||||
|
COPY inventory.titan-rpi4.yaml /app/inventory.titan-rpi4.yaml
|
||||||
|
COPY overlays /app/overlays
|
||||||
|
|
||||||
|
FROM runtime-base AS runtime
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
ENTRYPOINT ["metis"]
|
||||||
|
CMD ["serve"]
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim AS sentinel
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends ca-certificates util-linux \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY --from=build /out/metis-sentinel /usr/local/bin/metis-sentinel
|
||||||
|
|
||||||
|
ENTRYPOINT ["metis-sentinel"]
|
||||||
236
Jenkinsfile
vendored
Normal file
236
Jenkinsfile
vendored
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
kubernetes {
|
||||||
|
label 'metis'
|
||||||
|
defaultContainer 'builder'
|
||||||
|
yaml """
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: metis
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/arch: arm64
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-robot-pipeline
|
||||||
|
containers:
|
||||||
|
- name: dind
|
||||||
|
image: docker:27-dind
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
env:
|
||||||
|
- name: DOCKER_TLS_CERTDIR
|
||||||
|
value: ""
|
||||||
|
args:
|
||||||
|
- --mtu=1400
|
||||||
|
- --host=unix:///var/run/docker.sock
|
||||||
|
- --host=tcp://0.0.0.0:2375
|
||||||
|
volumeMounts:
|
||||||
|
- name: dind-storage
|
||||||
|
mountPath: /var/lib/docker
|
||||||
|
- name: builder
|
||||||
|
image: docker:27
|
||||||
|
command: ["cat"]
|
||||||
|
tty: true
|
||||||
|
env:
|
||||||
|
- name: DOCKER_HOST
|
||||||
|
value: tcp://localhost:2375
|
||||||
|
- name: DOCKER_TLS_CERTDIR
|
||||||
|
value: ""
|
||||||
|
- name: DOCKER_CONFIG
|
||||||
|
value: /root/.docker
|
||||||
|
volumeMounts:
|
||||||
|
- name: workspace-volume
|
||||||
|
mountPath: /home/jenkins/agent
|
||||||
|
- name: docker-config-writable
|
||||||
|
mountPath: /root/.docker
|
||||||
|
- name: harbor-config
|
||||||
|
mountPath: /docker-config
|
||||||
|
- name: tester
|
||||||
|
image: golang:1.22-bookworm
|
||||||
|
command: ["cat"]
|
||||||
|
tty: true
|
||||||
|
volumeMounts:
|
||||||
|
- name: workspace-volume
|
||||||
|
mountPath: /home/jenkins/agent
|
||||||
|
- name: publisher
|
||||||
|
image: python:3.12-slim
|
||||||
|
command: ["cat"]
|
||||||
|
tty: true
|
||||||
|
volumeMounts:
|
||||||
|
- name: workspace-volume
|
||||||
|
mountPath: /home/jenkins/agent
|
||||||
|
volumes:
|
||||||
|
- name: workspace-volume
|
||||||
|
emptyDir: {}
|
||||||
|
- name: docker-config-writable
|
||||||
|
emptyDir: {}
|
||||||
|
- name: dind-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: jenkins-dind-cache
|
||||||
|
- name: harbor-config
|
||||||
|
secret:
|
||||||
|
secretName: harbor-robot-pipeline
|
||||||
|
items:
|
||||||
|
- key: .dockerconfigjson
|
||||||
|
path: config.json
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
environment {
|
||||||
|
REGISTRY = 'registry.bstein.dev/bstein'
|
||||||
|
IMAGE = "${REGISTRY}/metis"
|
||||||
|
SENTINEL_IMAGE = "${REGISTRY}/metis-sentinel"
|
||||||
|
VERSION_TAG = 'dev'
|
||||||
|
SEMVER = 'dev'
|
||||||
|
COVERAGE_JSON = 'build/coverage.json'
|
||||||
|
JUNIT_XML = 'build/junit.xml'
|
||||||
|
METRICS_PREFIX = 'ariadne_ci'
|
||||||
|
VM_IMPORT_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428/api/v1/import/prometheus'
|
||||||
|
REPO_NAME = 'metis'
|
||||||
|
}
|
||||||
|
options {
|
||||||
|
disableConcurrentBuilds()
|
||||||
|
}
|
||||||
|
triggers {
|
||||||
|
pollSCM('H/5 * * * *')
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Checkout') {
|
||||||
|
steps {
|
||||||
|
checkout scm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Unit tests') {
|
||||||
|
steps {
|
||||||
|
container('tester') {
|
||||||
|
sh '''
|
||||||
|
bash -lc '
|
||||||
|
set -euo pipefail
|
||||||
|
apt-get update >/dev/null
|
||||||
|
apt-get install -y --no-install-recommends xz-utils >/dev/null
|
||||||
|
mkdir -p build
|
||||||
|
go install github.com/jstemmer/go-junit-report/v2@latest
|
||||||
|
set +e
|
||||||
|
go test -coverprofile=build/coverage.out ./... 2>&1 | tee build/test.out
|
||||||
|
test_rc=${PIPESTATUS[0]}
|
||||||
|
set -e
|
||||||
|
/root/go/bin/go-junit-report < build/test.out > "${JUNIT_XML}"
|
||||||
|
coverage="0"
|
||||||
|
if [ -f build/coverage.out ]; then
|
||||||
|
coverage="$(go tool cover -func=build/coverage.out | awk '/^total:/ {gsub("%","",$3); print $3}')"
|
||||||
|
fi
|
||||||
|
export GO_COVERAGE="${coverage}"
|
||||||
|
python3 - <<'"'"'PY'"'"'
|
||||||
|
import json, os
|
||||||
|
coverage = float(os.environ.get("GO_COVERAGE", "0") or "0")
|
||||||
|
with open("build/coverage.json", "w", encoding="utf-8") as handle:
|
||||||
|
json.dump({"summary": {"percent_covered": coverage}}, handle)
|
||||||
|
PY
|
||||||
|
exit ${test_rc}
|
||||||
|
'
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Publish test metrics') {
|
||||||
|
steps {
|
||||||
|
container('publisher') {
|
||||||
|
sh '''
|
||||||
|
set -euo pipefail
|
||||||
|
python scripts/publish_test_metrics.py
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Prep toolchain') {
|
||||||
|
steps {
|
||||||
|
container('builder') {
|
||||||
|
sh '''
|
||||||
|
set -euo pipefail
|
||||||
|
mkdir -p /root/.docker
|
||||||
|
cp /docker-config/config.json /root/.docker/config.json
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Compute version') {
|
||||||
|
steps {
|
||||||
|
container('builder') {
|
||||||
|
script {
|
||||||
|
sh '''
|
||||||
|
set -euo pipefail
|
||||||
|
SEMVER="0.1.0-${BUILD_NUMBER}"
|
||||||
|
echo "SEMVER=${SEMVER}" > build.env
|
||||||
|
'''
|
||||||
|
def props = readProperties file: 'build.env'
|
||||||
|
env.SEMVER = props['SEMVER'] ?: "0.1.0-${env.BUILD_NUMBER}"
|
||||||
|
env.VERSION_TAG = env.SEMVER
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Buildx setup') {
|
||||||
|
steps {
|
||||||
|
container('builder') {
|
||||||
|
sh '''
|
||||||
|
set -euo pipefail
|
||||||
|
for i in $(seq 1 10); do
|
||||||
|
if docker info >/dev/null 2>&1; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
docker buildx use default || docker buildx create --name default --driver docker --use
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Build & push image') {
|
||||||
|
steps {
|
||||||
|
container('builder') {
|
||||||
|
sh '''
|
||||||
|
set -euo pipefail
|
||||||
|
VERSION_TAG="$(cut -d= -f2 build.env)"
|
||||||
|
docker buildx build \
|
||||||
|
--platform linux/amd64,linux/arm64 \
|
||||||
|
--tag "${IMAGE}:${VERSION_TAG}" \
|
||||||
|
--tag "${IMAGE}:latest" \
|
||||||
|
--target runtime \
|
||||||
|
--push \
|
||||||
|
.
|
||||||
|
docker buildx build \
|
||||||
|
--platform linux/amd64,linux/arm64 \
|
||||||
|
--tag "${SENTINEL_IMAGE}:${VERSION_TAG}" \
|
||||||
|
--tag "${SENTINEL_IMAGE}:latest" \
|
||||||
|
--target sentinel \
|
||||||
|
--push \
|
||||||
|
.
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post {
|
||||||
|
always {
|
||||||
|
script {
|
||||||
|
if (fileExists('build/junit.xml')) {
|
||||||
|
try {
|
||||||
|
junit allowEmptyResults: true, testResults: 'build/junit.xml'
|
||||||
|
} catch (Throwable err) {
|
||||||
|
echo "junit step unavailable: ${err.class.simpleName}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
archiveArtifacts artifacts: 'build/junit.xml,build/coverage.json,build/coverage.out', allowEmptyArchive: true, fingerprint: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
32
README.md
32
README.md
@ -26,3 +26,35 @@ Metis produces fully configured recovery SD cards for any node in the lab (RPi 4
|
|||||||
- `pkg/` – shared lib (inventory, imaging, injectors, platform abstraction)
|
- `pkg/` – shared lib (inventory, imaging, injectors, platform abstraction)
|
||||||
- `docs/` – user/operator docs (this will stay light; working notes live in AGENTS.md untracked)
|
- `docs/` – user/operator docs (this will stay light; working notes live in AGENTS.md untracked)
|
||||||
- `AGENTS.md` – local, untracked working notes (do not add to git)
|
- `AGENTS.md` – local, untracked working notes (do not add to git)
|
||||||
|
|
||||||
|
## Current modes
|
||||||
|
- `metis plan --inventory inv.yaml --node titan-13 --device /dev/sdz --cache /tmp/metis-cache` prints the burn plan (respects `--boot/--root` or `METIS_*` envs for injection steps).
|
||||||
|
- `metis burn ... --yes` downloads/verifies the golden image, writes it (dd for `/dev/*`, file copy otherwise), and injects node config when mounts are provided.
|
||||||
|
- Pass `--boot /mnt/boot --root /mnt/root` (or set `METIS_BOOT_PATH`/`METIS_ROOT_PATH`) to drop hostname, k3s config, ssh keys, NoCloud user-data, and a debug `etc/metis/node.json` into the mounted card. If unset, injection is skipped (write-only).
|
||||||
|
- `--auto-mount` attempts to mount `/dev/*` partitions (or loop images) automatically for injection on Linux (requires privileges).
|
||||||
|
- `metis image --inventory inv.yaml --node titan-13 --output artifacts/titan-13.img` produces a fully injected raw image artifact without writing to removable media.
|
||||||
|
- `metis serve` runs the operator-facing Metis service:
|
||||||
|
- web UI for build/flash workflows
|
||||||
|
- Prometheus metrics on `/metrics`
|
||||||
|
- internal sentinel snapshot + watch endpoints
|
||||||
|
- Container images are split for gentler cluster operation:
|
||||||
|
- `metis` carries the flash/build toolchain and is intended to run on `titan-22`
|
||||||
|
- `metis-sentinel` stays slim for the DaemonSet that samples node facts
|
||||||
|
- Class overlays: define `boot_overlay`/`root_overlay` on a class to merge static files into boot/root at burn time (e.g., cloud-init/netplan drop-ins, GPU driver configs). Per-node config still injects hostname/IP/k3s/SSH/Longhorn.
|
||||||
|
- Linux loop-mount helper (losetup/mount) exists for automation; wiring into CLI burn is next. Windows writer/GUI stub forthcoming.
|
||||||
|
- Vault: Metis can read per-node secrets from `secret/data/nodes/<hostname>` using VAULT_ADDR plus either VAULT_TOKEN or AppRole (VAULT_ROLE_ID/VAULT_SECRET_ID). Expected fields: ssh_password, k3s_token, cloud_init, extra map.
|
||||||
|
- Sentinel: `metis-sentinel` collects host facts and can either print them, write local history, or push them into the Metis service. The intended deployment shape is a DaemonSet on cluster nodes plus an Ariadne-triggered Metis watch that recomputes recommended class targets and drift history.
|
||||||
|
- Facts aggregation: `metis facts --inventory inv.yaml --snapshots ./snapshots` reads sentinel snapshot JSON files and prints per-class drift summary (kernels, containerd, k3s, package samples). Use exported ConfigMaps or `METIS_SENTINEL_OUT` history as input.
|
||||||
|
- `metis config --inventory inv.yaml --node titan-13` prints the merged node config (hostname/IP/k3s labels/taints/Longhorn UUIDs).
|
||||||
|
|
||||||
|
## Service direction
|
||||||
|
- Deployed UI protected by Atlas SSO headers (`admin` / `maintainer`)
|
||||||
|
- Default flash host support for `titan-22`
|
||||||
|
- Recent build / flash / sentinel change history
|
||||||
|
- Ariadne-driven sentinel watch cadence
|
||||||
|
- Prometheus/Grafana visibility for Metis runs and tests
|
||||||
|
- CI test metrics share the `ariadne_ci_*` series and are distinguished by `repo="metis"`
|
||||||
|
|
||||||
|
Current deployment note: the service can fetch and verify the rpi4 base image from an official URL via `METIS_IMAGE_RPI4_ARMBIAN_LONGHORN` and `METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256`, then cache it locally on the flash host. A mirrored Harbor-backed base image is still preferable long term, but it is no longer a prerequisite for Texas-side builds.
|
||||||
|
|
||||||
|
Next steps: publish the service images, add the SCM remote/repo for Metis, and broaden inventory coverage beyond the current Titan recovery classes.
|
||||||
|
|||||||
89
cmd/metis-sentinel/main.go
Normal file
89
cmd/metis-sentinel/main.go
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/sentinel"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
interval := time.Duration(getenvInt("METIS_SENTINEL_INTERVAL_SEC", 300)) * time.Second
|
||||||
|
pushURL := os.Getenv("METIS_SENTINEL_PUSH_URL")
|
||||||
|
runOnce := os.Getenv("METIS_SENTINEL_RUN_ONCE") == "1"
|
||||||
|
|
||||||
|
for {
|
||||||
|
snap := sentinel.Collect()
|
||||||
|
enc := json.NewEncoder(os.Stdout)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
if err := enc.Encode(snap); err != nil {
|
||||||
|
log.Fatalf("encode: %v", err)
|
||||||
|
}
|
||||||
|
if out := os.Getenv("METIS_SENTINEL_OUT"); out != "" {
|
||||||
|
writeHistory(out, snap)
|
||||||
|
}
|
||||||
|
if pushURL != "" {
|
||||||
|
if err := pushSnapshot(pushURL, snap); err != nil {
|
||||||
|
log.Printf("push snapshot failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if runOnce || pushURL == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(interval)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeHistory(path string, snap *sentinel.Snapshot) {
|
||||||
|
if path == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ts := time.Now().UTC().Format("20060102T150405Z")
|
||||||
|
b, _ := json.MarshalIndent(snap, "", " ")
|
||||||
|
_ = os.WriteFile(filepath.Join(path, "snapshot-"+ts+".json"), b, 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func pushSnapshot(url string, snap *sentinel.Snapshot) error {
|
||||||
|
payload := map[string]any{
|
||||||
|
"node": snap.Hostname,
|
||||||
|
"collected_at": time.Now().UTC(),
|
||||||
|
"snapshot": snap,
|
||||||
|
}
|
||||||
|
body, err := json.Marshal(payload)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("push snapshot: %s", resp.Status)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getenvInt(key string, fallback int) int {
|
||||||
|
if raw := os.Getenv(key); raw != "" {
|
||||||
|
if value, err := strconv.Atoi(raw); err == nil {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
33
cmd/metis/facts_cmd.go
Normal file
33
cmd/metis/facts_cmd.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"metis/pkg/facts"
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
)
|
||||||
|
|
||||||
|
func factsCmd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("facts", flag.ExitOnError)
|
||||||
|
invPath := fs.String("inventory", "inventory.yaml", "inventory file")
|
||||||
|
dir := fs.String("snapshots", "snapshots", "directory of sentinel snapshot json files")
|
||||||
|
fs.Parse(args)
|
||||||
|
inv, err := inventory.Load(*invPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("load inventory: %v", err)
|
||||||
|
}
|
||||||
|
snaps, err := facts.LoadDir(*dir)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("load snapshots: %v", err)
|
||||||
|
}
|
||||||
|
sum := facts.Aggregate(inv, snaps)
|
||||||
|
enc := json.NewEncoder(os.Stdout)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
if err := enc.Encode(sum); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "encode: %v\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
38
cmd/metis/image_cmd.go
Normal file
38
cmd/metis/image_cmd.go
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"metis/pkg/plan"
|
||||||
|
)
|
||||||
|
|
||||||
|
func imageCmd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("image", flag.ExitOnError)
|
||||||
|
invPath := fs.String("inventory", "inventory.yaml", "inventory file")
|
||||||
|
node := fs.String("node", "", "target node")
|
||||||
|
output := fs.String("output", "", "output raw image path")
|
||||||
|
cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
|
||||||
|
fs.Parse(args)
|
||||||
|
if *node == "" {
|
||||||
|
log.Fatalf("--node is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
inv := loadInventory(*invPath)
|
||||||
|
targetOutput := *output
|
||||||
|
if targetOutput == "" {
|
||||||
|
targetOutput = filepath.Join("artifacts", fmt.Sprintf("%s.img", *node))
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := plan.BuildImageFile(context.Background(), inv, *node, *cache, targetOutput); err != nil {
|
||||||
|
log.Fatalf("build image: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Wrote %s\n", targetOutput)
|
||||||
|
fmt.Println("Injected rootfs recovery config and overlays.")
|
||||||
|
fmt.Println("Boot-partition NoCloud files are intentionally skipped for this Armbian rpi4 recovery flow.")
|
||||||
|
}
|
||||||
27
cmd/metis/inject_cmd.go
Normal file
27
cmd/metis/inject_cmd.go
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"metis/pkg/plan"
|
||||||
|
)
|
||||||
|
|
||||||
|
func injectCmd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("inject", flag.ExitOnError)
|
||||||
|
invPath := fs.String("inventory", "inventory.yaml", "inventory file")
|
||||||
|
node := fs.String("node", "", "target node")
|
||||||
|
boot := fs.String("boot", "", "mounted boot path")
|
||||||
|
root := fs.String("root", "", "mounted root path")
|
||||||
|
fs.Parse(args)
|
||||||
|
if *node == "" {
|
||||||
|
log.Fatalf("--node is required")
|
||||||
|
}
|
||||||
|
if *boot == "" && *root == "" {
|
||||||
|
log.Fatalf("--boot or --root is required")
|
||||||
|
}
|
||||||
|
inv := loadInventory(*invPath)
|
||||||
|
if err := plan.Inject(inv, *node, *boot, *root); err != nil {
|
||||||
|
log.Fatalf("inject: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -22,8 +22,16 @@ func main() {
|
|||||||
planCmd(os.Args[2:])
|
planCmd(os.Args[2:])
|
||||||
case "burn":
|
case "burn":
|
||||||
burnCmd(os.Args[2:])
|
burnCmd(os.Args[2:])
|
||||||
|
case "image":
|
||||||
|
imageCmd(os.Args[2:])
|
||||||
|
case "serve":
|
||||||
|
serveCmd(os.Args[2:])
|
||||||
|
case "inject":
|
||||||
|
injectCmd(os.Args[2:])
|
||||||
case "config":
|
case "config":
|
||||||
configCmd(os.Args[2:])
|
configCmd(os.Args[2:])
|
||||||
|
case "facts":
|
||||||
|
factsCmd(os.Args[2:])
|
||||||
default:
|
default:
|
||||||
usage()
|
usage()
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
@ -31,7 +39,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
fmt.Fprintf(os.Stderr, "Usage: metis <plan|burn> [options]\n")
|
fmt.Fprintf(os.Stderr, "Usage: metis <plan|burn|image|serve|inject|config|facts> [options]\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadInventory(path string) *inventory.Inventory {
|
func loadInventory(path string) *inventory.Inventory {
|
||||||
@ -48,11 +56,19 @@ func planCmd(args []string) {
|
|||||||
node := fs.String("node", "", "target node")
|
node := fs.String("node", "", "target node")
|
||||||
device := fs.String("device", "/dev/sdX", "target block device")
|
device := fs.String("device", "/dev/sdX", "target block device")
|
||||||
cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
|
cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
|
||||||
|
boot := fs.String("boot", "", "mounted boot path for injection (optional)")
|
||||||
|
root := fs.String("root", "", "mounted root path for injection (optional)")
|
||||||
fs.Parse(args)
|
fs.Parse(args)
|
||||||
if *node == "" {
|
if *node == "" {
|
||||||
log.Fatalf("--node is required")
|
log.Fatalf("--node is required")
|
||||||
}
|
}
|
||||||
inv := loadInventory(*invPath)
|
inv := loadInventory(*invPath)
|
||||||
|
if *boot != "" {
|
||||||
|
os.Setenv("METIS_BOOT_PATH", *boot)
|
||||||
|
}
|
||||||
|
if *root != "" {
|
||||||
|
os.Setenv("METIS_ROOT_PATH", *root)
|
||||||
|
}
|
||||||
p, err := plan.Build(inv, *node, *device, *cache)
|
p, err := plan.Build(inv, *node, *device, *cache)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("build plan: %v", err)
|
log.Fatalf("build plan: %v", err)
|
||||||
@ -68,12 +84,24 @@ func burnCmd(args []string) {
|
|||||||
node := fs.String("node", "", "target node")
|
node := fs.String("node", "", "target node")
|
||||||
device := fs.String("device", "", "target block device (e.g. /dev/sdX)")
|
device := fs.String("device", "", "target block device (e.g. /dev/sdX)")
|
||||||
cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
|
cache := fs.String("cache", filepath.Join(os.TempDir(), "metis-cache"), "image cache dir")
|
||||||
|
boot := fs.String("boot", "", "mounted boot path for injection (optional)")
|
||||||
|
root := fs.String("root", "", "mounted root path for injection (optional)")
|
||||||
|
autoMount := fs.Bool("auto-mount", false, "auto-mount boot/root for injection (linux, requires privileges)")
|
||||||
confirm := fs.Bool("yes", false, "actually write to device")
|
confirm := fs.Bool("yes", false, "actually write to device")
|
||||||
fs.Parse(args)
|
fs.Parse(args)
|
||||||
if *node == "" || *device == "" {
|
if *node == "" || *device == "" {
|
||||||
log.Fatalf("--node and --device are required")
|
log.Fatalf("--node and --device are required")
|
||||||
}
|
}
|
||||||
inv := loadInventory(*invPath)
|
inv := loadInventory(*invPath)
|
||||||
|
if *boot != "" {
|
||||||
|
os.Setenv("METIS_BOOT_PATH", *boot)
|
||||||
|
}
|
||||||
|
if *root != "" {
|
||||||
|
os.Setenv("METIS_ROOT_PATH", *root)
|
||||||
|
}
|
||||||
|
if *autoMount {
|
||||||
|
os.Setenv("METIS_AUTO_MOUNT", "1")
|
||||||
|
}
|
||||||
p, err := plan.Execute(inv, *node, *device, *cache, *confirm)
|
p, err := plan.Execute(inv, *node, *device, *cache, *confirm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("burn: %v", err)
|
log.Fatalf("burn: %v", err)
|
||||||
|
|||||||
28
cmd/metis/serve_cmd.go
Normal file
28
cmd/metis/serve_cmd.go
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"metis/pkg/service"
|
||||||
|
)
|
||||||
|
|
||||||
|
func serveCmd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("serve", flag.ExitOnError)
|
||||||
|
bindAddr := fs.String("bind", "", "override bind address")
|
||||||
|
fs.Parse(args)
|
||||||
|
|
||||||
|
settings := service.FromEnv()
|
||||||
|
if *bindAddr != "" {
|
||||||
|
settings.BindAddr = *bindAddr
|
||||||
|
}
|
||||||
|
app, err := service.NewApp(settings)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("init service: %v", err)
|
||||||
|
}
|
||||||
|
log.Printf("metis listening on %s", settings.BindAddr)
|
||||||
|
if err := http.ListenAndServe(settings.BindAddr, app.Handler()); err != nil {
|
||||||
|
log.Fatalf("serve: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
89
docs/titan-rpi4-recovery.md
Normal file
89
docs/titan-rpi4-recovery.md
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# Titan rpi4 Longhorn Recovery
|
||||||
|
|
||||||
|
This flow is for `titan-13`, `titan-15`, `titan-17`, and `titan-19`.
|
||||||
|
|
||||||
|
## Why this works
|
||||||
|
|
||||||
|
- The replacement card is burned from a plain Armbian rpi4 image.
|
||||||
|
- Metis injects the original node identity, k3s config, SSH key, and Longhorn disk UUIDs.
|
||||||
|
- The image also carries a static NetworkManager profile for the node IP plus local `k3s` and `open-iscsi` payloads sourced from a healthy rpi4 Longhorn node.
|
||||||
|
- An Armbian first-boot hook finishes the host bootstrap automatically:
|
||||||
|
- enables SSH on port `2277`
|
||||||
|
- mounts `/mnt/astreae` and `/mnt/asteria`
|
||||||
|
- ensures the iSCSI initiator identity exists
|
||||||
|
- starts `open-iscsi`
|
||||||
|
- starts `k3s-agent`
|
||||||
|
- For this Armbian flow, the important recovery files live on the root partition; boot NoCloud files are optional and not required for node recovery.
|
||||||
|
|
||||||
|
## Before burning
|
||||||
|
|
||||||
|
For a same-name replacement, remove the old node object first so k3s can re-register the node cleanly.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl delete node titan-13
|
||||||
|
kubectl delete node titan-19
|
||||||
|
```
|
||||||
|
|
||||||
|
Then export the live cluster join token:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export METIS_K3S_TOKEN="$(ssh titan-0a 'sudo cat /var/lib/rancher/k3s/server/node-token')"
|
||||||
|
export METIS_IMAGE_RPI4_ARMBIAN_LONGHORN="file://${HOME}/Downloads/Armbian_25.8.1_Rpi4b_noble_current_6.12.41.img"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Burn commands
|
||||||
|
|
||||||
|
Inspect the merged config first:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go run ./cmd/metis config --inventory inventory.titan-rpi4.yaml --node titan-13
|
||||||
|
go run ./cmd/metis config --inventory inventory.titan-rpi4.yaml --node titan-19
|
||||||
|
```
|
||||||
|
|
||||||
|
If you want ready-to-flash artifacts before inserting SD cards, build them first:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go run ./cmd/metis image \
|
||||||
|
--inventory inventory.titan-rpi4.yaml \
|
||||||
|
--node titan-13 \
|
||||||
|
--cache "${HOME}/.cache/metis" \
|
||||||
|
--output artifacts/titan-13.img
|
||||||
|
|
||||||
|
go run ./cmd/metis image \
|
||||||
|
--inventory inventory.titan-rpi4.yaml \
|
||||||
|
--node titan-19 \
|
||||||
|
--cache "${HOME}/.cache/metis" \
|
||||||
|
--output artifacts/titan-19.img
|
||||||
|
```
|
||||||
|
|
||||||
|
Burn the cards:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo -E go run ./cmd/metis burn \
|
||||||
|
--inventory inventory.titan-rpi4.yaml \
|
||||||
|
--node titan-13 \
|
||||||
|
--device /dev/sdX \
|
||||||
|
--cache "${HOME}/.cache/metis" \
|
||||||
|
--auto-mount \
|
||||||
|
--yes
|
||||||
|
|
||||||
|
sudo -E go run ./cmd/metis burn \
|
||||||
|
--inventory inventory.titan-rpi4.yaml \
|
||||||
|
--node titan-19 \
|
||||||
|
--device /dev/sdY \
|
||||||
|
--cache "${HOME}/.cache/metis" \
|
||||||
|
--auto-mount \
|
||||||
|
--yes
|
||||||
|
```
|
||||||
|
|
||||||
|
## After boot
|
||||||
|
|
||||||
|
Because the hardware stays the same, the Pi should keep the same MAC address and reclaim the same DHCP reservation.
|
||||||
|
|
||||||
|
Validate:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get nodes | grep 'titan-13\|titan-19'
|
||||||
|
kubectl -n longhorn-system get nodes.longhorn.io
|
||||||
|
kubectl -n longhorn-system get replicas.longhorn.io -o wide | grep 'titan-13\|titan-19'
|
||||||
|
```
|
||||||
113
docs/titan-rpi4-remote-replacement.md
Normal file
113
docs/titan-rpi4-remote-replacement.md
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
# Titan rpi4 Remote Replacement
|
||||||
|
|
||||||
|
This is the low-touch replacement flow for `titan-13` and `titan-19` when the
|
||||||
|
person onsite can only:
|
||||||
|
|
||||||
|
1. insert an SD card into the flashing machine
|
||||||
|
2. swap the card into the Pi
|
||||||
|
3. power-cycle the Pi
|
||||||
|
|
||||||
|
The remote operator does everything else.
|
||||||
|
|
||||||
|
## What the image does by itself
|
||||||
|
|
||||||
|
After the stale Kubernetes node object is deleted and the replacement image is
|
||||||
|
flashed, the booted Pi is expected to do the rest automatically:
|
||||||
|
|
||||||
|
- bring up SSH on port `2277`
|
||||||
|
- set the node hostname
|
||||||
|
- bring up the node's static `192.168.22.x` address on `end0`
|
||||||
|
- mount `/mnt/astreae` and `/mnt/asteria`
|
||||||
|
- start `open-iscsi`
|
||||||
|
- start `k3s-agent`
|
||||||
|
- rejoin the cluster with the baked-in node token and server URL
|
||||||
|
|
||||||
|
## Version clarification
|
||||||
|
|
||||||
|
As of **March 31, 2026**, the live cluster reports:
|
||||||
|
|
||||||
|
- control plane: `k3s v1.33.3+k3s1`
|
||||||
|
- healthy rpi4 Longhorn workers (`titan-15`, `titan-17`): `k3s v1.31.5+k3s1`
|
||||||
|
|
||||||
|
The `6.6.63` and `6.12.41` numbers are Linux kernel versions, not Kubernetes
|
||||||
|
versions.
|
||||||
|
|
||||||
|
Kubernetes' official version skew policy says a `kubelet` may be up to three
|
||||||
|
minor versions older than the `kube-apiserver`, so `1.31` workers against a
|
||||||
|
`1.33` control plane are supported today:
|
||||||
|
|
||||||
|
- https://kubernetes.io/releases/version-skew-policy/
|
||||||
|
|
||||||
|
The replacement images intentionally keep the rpi4 worker `k3s` version aligned
|
||||||
|
with the healthy HDD-backed rpi4 workers to avoid introducing a Kubernetes minor
|
||||||
|
change during node recovery.
|
||||||
|
|
||||||
|
## Remote flashing flow
|
||||||
|
|
||||||
|
Run these commands from the machine that has the `metis` repo and your SSH
|
||||||
|
access.
|
||||||
|
|
||||||
|
### 1. Build the image and delete the stale node object
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/Development/metis
|
||||||
|
./scripts/prepare_titan_rpi4_replacement.sh titan-13 titan-22
|
||||||
|
./scripts/prepare_titan_rpi4_replacement.sh titan-19 titan-22
|
||||||
|
```
|
||||||
|
|
||||||
|
This does all of the following:
|
||||||
|
|
||||||
|
- fetches the current cluster node token from `titan-0a`
|
||||||
|
- deletes the stale Kubernetes `Node` object
|
||||||
|
- builds the replacement image under `artifacts/`
|
||||||
|
- copies it to `titan-22:/tmp/metis-images/`
|
||||||
|
|
||||||
|
### 2. Ask the onsite helper to insert the SD card into `titan-22`
|
||||||
|
|
||||||
|
When the card is inserted, identify the target device:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/remote_sd_candidates.sh titan-22
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Flash the card remotely
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/remote_flash_titan_image.sh titan-22 titan-13 /dev/sdX
|
||||||
|
./scripts/remote_flash_titan_image.sh titan-22 titan-19 /dev/sdY
|
||||||
|
```
|
||||||
|
|
||||||
|
The remote machine will ask for its `sudo` password during the flash.
|
||||||
|
|
||||||
|
### 4. Ask the onsite helper to swap the card and power-cycle the Pi
|
||||||
|
|
||||||
|
That should be the end of the onsite work.
|
||||||
|
|
||||||
|
### 5. Validate remotely
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get nodes -w
|
||||||
|
kubectl -n longhorn-system get nodes.longhorn.io
|
||||||
|
kubectl -n longhorn-system get replicas.longhorn.io -o wide | grep 'titan-13\|titan-19'
|
||||||
|
ssh titan-13
|
||||||
|
ssh titan-19
|
||||||
|
```
|
||||||
|
|
||||||
|
## USB boot
|
||||||
|
|
||||||
|
Raspberry Pi 4 supports USB mass storage boot via its EEPROM bootloader:
|
||||||
|
|
||||||
|
- https://www.raspberrypi.com/documentation/computers/raspberry-pi.html#usb-mass-storage-boot
|
||||||
|
|
||||||
|
That means the same general recovery image approach can be used on a USB device
|
||||||
|
instead of an SD card.
|
||||||
|
|
||||||
|
For this cluster, the safer rollout is:
|
||||||
|
|
||||||
|
1. first recover `titan-13` and `titan-19` to known-good SD cards
|
||||||
|
2. pilot USB boot on one non-critical rpi4
|
||||||
|
3. only then migrate the Longhorn HDD-backed rpi4s
|
||||||
|
|
||||||
|
USB boot is attractive for wear reduction, but it adds EEPROM boot-order,
|
||||||
|
adapter, and power-delivery variables. The emergency replacement process above
|
||||||
|
should stay SD-based until the USB path has been tested on your actual hardware.
|
||||||
@ -5,6 +5,8 @@ classes:
|
|||||||
os: ubuntu-24.04
|
os: ubuntu-24.04
|
||||||
image: https://harbor.bstein.dev/library/rpi5-ubuntu-worker.img
|
image: https://harbor.bstein.dev/library/rpi5-ubuntu-worker.img
|
||||||
checksum: sha256:REPLACE_ME
|
checksum: sha256:REPLACE_ME
|
||||||
|
boot_overlay: overlays/rpi5-boot
|
||||||
|
root_overlay: overlays/rpi5-root
|
||||||
default_labels:
|
default_labels:
|
||||||
hardware: rpi5
|
hardware: rpi5
|
||||||
node-role.kubernetes.io/worker: "true"
|
node-role.kubernetes.io/worker: "true"
|
||||||
@ -14,6 +16,8 @@ classes:
|
|||||||
os: armbian-6.6
|
os: armbian-6.6
|
||||||
image: https://harbor.bstein.dev/library/rpi4-armbian-longhorn.img
|
image: https://harbor.bstein.dev/library/rpi4-armbian-longhorn.img
|
||||||
checksum: sha256:REPLACE_ME
|
checksum: sha256:REPLACE_ME
|
||||||
|
boot_overlay: overlays/rpi4-boot
|
||||||
|
root_overlay: overlays/rpi4-root
|
||||||
default_labels:
|
default_labels:
|
||||||
hardware: rpi4
|
hardware: rpi4
|
||||||
longhorn: "true"
|
longhorn: "true"
|
||||||
@ -24,10 +28,24 @@ classes:
|
|||||||
os: ubuntu-24.04
|
os: ubuntu-24.04
|
||||||
image: https://harbor.bstein.dev/library/rpi5-ubuntu-control.img
|
image: https://harbor.bstein.dev/library/rpi5-ubuntu-control.img
|
||||||
checksum: sha256:REPLACE_ME
|
checksum: sha256:REPLACE_ME
|
||||||
|
boot_overlay: overlays/cp-boot
|
||||||
|
root_overlay: overlays/cp-root
|
||||||
default_labels:
|
default_labels:
|
||||||
node-role.kubernetes.io/control-plane: "true"
|
node-role.kubernetes.io/control-plane: "true"
|
||||||
default_taints:
|
default_taints:
|
||||||
- node-role.kubernetes.io/control-plane:NoSchedule
|
- node-role.kubernetes.io/control-plane:NoSchedule
|
||||||
|
- name: jetson-accelerator
|
||||||
|
arch: arm64
|
||||||
|
os: ubuntu-20.04-tegra
|
||||||
|
image: https://harbor.bstein.dev/library/jetson-accelerator.img
|
||||||
|
checksum: sha256:REPLACE_ME
|
||||||
|
boot_overlay: overlays/jetson-boot
|
||||||
|
root_overlay: overlays/jetson-root
|
||||||
|
default_labels:
|
||||||
|
accelerator: nvidia
|
||||||
|
jetson: "true"
|
||||||
|
node-role.kubernetes.io/accelerator: ""
|
||||||
|
default_taints: []
|
||||||
|
|
||||||
nodes:
|
nodes:
|
||||||
- name: titan-04
|
- name: titan-04
|
||||||
@ -54,3 +72,21 @@ nodes:
|
|||||||
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
|
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
|
||||||
fs: ext4
|
fs: ext4
|
||||||
ssh_user: root
|
ssh_user: root
|
||||||
|
- name: titan-20
|
||||||
|
class: jetson-accelerator
|
||||||
|
hostname: titan-20
|
||||||
|
ip: 192.168.22.20
|
||||||
|
k3s_role: agent
|
||||||
|
labels:
|
||||||
|
accelerator: nvidia
|
||||||
|
jetson: "true"
|
||||||
|
ssh_user: ubuntu
|
||||||
|
- name: titan-21
|
||||||
|
class: jetson-accelerator
|
||||||
|
hostname: titan-21
|
||||||
|
ip: 192.168.22.21
|
||||||
|
k3s_role: agent
|
||||||
|
labels:
|
||||||
|
accelerator: nvidia
|
||||||
|
jetson: "true"
|
||||||
|
ssh_user: ubuntu
|
||||||
|
|||||||
81
inventory.titan-rpi4.yaml
Normal file
81
inventory.titan-rpi4.yaml
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
classes:
|
||||||
|
- name: rpi4-armbian-longhorn
|
||||||
|
arch: arm64
|
||||||
|
os: armbian-noble
|
||||||
|
image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
|
||||||
|
checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
|
||||||
|
k3s_version: v1.31.5+k3s1
|
||||||
|
default_labels:
|
||||||
|
hardware: rpi4
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
root_overlay: overlays/rpi4-armbian-longhorn-root
|
||||||
|
|
||||||
|
nodes:
|
||||||
|
- name: titan-13
|
||||||
|
class: rpi4-armbian-longhorn
|
||||||
|
hostname: titan-13
|
||||||
|
ip: 192.168.22.41
|
||||||
|
k3s_role: agent
|
||||||
|
k3s_url: https://192.168.22.7:6443
|
||||||
|
k3s_token: ${METIS_K3S_TOKEN}
|
||||||
|
ssh_user: atlas
|
||||||
|
ssh_authorized_keys:
|
||||||
|
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOb8oMX6u0z3sH/p/WBGlvPXXdbGETCKzWYwR/dd6fZb titan-bastion
|
||||||
|
longhorn_disks:
|
||||||
|
- mountpoint: /mnt/astreae
|
||||||
|
uuid: 6031fa8b-f28c-45c3-b7bc-6133300e07c6
|
||||||
|
fs: ext4
|
||||||
|
- mountpoint: /mnt/asteria
|
||||||
|
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
|
||||||
|
fs: ext4
|
||||||
|
- name: titan-15
|
||||||
|
class: rpi4-armbian-longhorn
|
||||||
|
hostname: titan-15
|
||||||
|
ip: 192.168.22.43
|
||||||
|
k3s_role: agent
|
||||||
|
k3s_url: https://192.168.22.7:6443
|
||||||
|
k3s_token: ${METIS_K3S_TOKEN}
|
||||||
|
ssh_user: atlas
|
||||||
|
ssh_authorized_keys:
|
||||||
|
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOb8oMX6u0z3sH/p/WBGlvPXXdbGETCKzWYwR/dd6fZb titan-bastion
|
||||||
|
longhorn_disks:
|
||||||
|
- mountpoint: /mnt/astreae
|
||||||
|
uuid: f3362f14-5822-449f-944b-ac570b5cd615
|
||||||
|
fs: ext4
|
||||||
|
- mountpoint: /mnt/asteria
|
||||||
|
uuid: 9c5316e6-f847-4884-b502-11f2d0d15d6f
|
||||||
|
fs: ext4
|
||||||
|
- name: titan-17
|
||||||
|
class: rpi4-armbian-longhorn
|
||||||
|
hostname: titan-17
|
||||||
|
ip: 192.168.22.45
|
||||||
|
k3s_role: agent
|
||||||
|
k3s_url: https://192.168.22.7:6443
|
||||||
|
k3s_token: ${METIS_K3S_TOKEN}
|
||||||
|
ssh_user: atlas
|
||||||
|
ssh_authorized_keys:
|
||||||
|
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOb8oMX6u0z3sH/p/WBGlvPXXdbGETCKzWYwR/dd6fZb titan-bastion
|
||||||
|
longhorn_disks:
|
||||||
|
- mountpoint: /mnt/astreae
|
||||||
|
uuid: 1fecdade-08b0-49cb-9ae3-be6c188b0a96
|
||||||
|
fs: ext4
|
||||||
|
- mountpoint: /mnt/asteria
|
||||||
|
uuid: 2fe9f613-d372-47ca-b84f-82084e4edda0
|
||||||
|
fs: ext4
|
||||||
|
- name: titan-19
|
||||||
|
class: rpi4-armbian-longhorn
|
||||||
|
hostname: titan-19
|
||||||
|
ip: 192.168.22.47
|
||||||
|
k3s_role: agent
|
||||||
|
k3s_url: https://192.168.22.7:6443
|
||||||
|
k3s_token: ${METIS_K3S_TOKEN}
|
||||||
|
ssh_user: atlas
|
||||||
|
ssh_authorized_keys:
|
||||||
|
- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOb8oMX6u0z3sH/p/WBGlvPXXdbGETCKzWYwR/dd6fZb titan-bastion
|
||||||
|
longhorn_disks:
|
||||||
|
- mountpoint: /mnt/astreae
|
||||||
|
uuid: 4890abb9-dda2-4f4f-9c0f-081ee82849cf
|
||||||
|
fs: ext4
|
||||||
|
- mountpoint: /mnt/asteria
|
||||||
|
uuid: 2b4ea28d-b0e6-4fa3-841b-cd7067ae9153
|
||||||
|
fs: ext4
|
||||||
67
overlays/rpi4-armbian-longhorn-root/etc/default/open-iscsi
Normal file
67
overlays/rpi4-armbian-longhorn-root/etc/default/open-iscsi
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# List of LVMed iSCSI Volume Groups.
|
||||||
|
# Multiple Volume Groups can be specified with spaces
|
||||||
|
#
|
||||||
|
# This list defines the Volume Groups that should be activated at boot
|
||||||
|
# after iSCSI has been activated. If you use dynamic activation of LVM
|
||||||
|
# volumes (lvmetad), you can (and should) leave this empty.
|
||||||
|
#
|
||||||
|
# On shutdown, this setting typically has no effect, since open-iscsi
|
||||||
|
# tries to determine all active VGs on iSCSI and deactivate them.
|
||||||
|
# However, if you have a really complicated stacking setup that isn't
|
||||||
|
# automatically detected, volume groups defined here will also be
|
||||||
|
# deactivated.
|
||||||
|
#
|
||||||
|
# To see whether open-iscsi is able to properly detect your setup for
|
||||||
|
# shutdown, execute the following on a running system:
|
||||||
|
# /lib/open-iscsi/umountiscsi.sh --dry-run
|
||||||
|
# This will tell you what steps will betaken at shutdown before logging
|
||||||
|
# out of the iSCSI session.
|
||||||
|
LVMGROUPS=""
|
||||||
|
|
||||||
|
|
||||||
|
# Handle _netdev devices
|
||||||
|
# You can specify your iSCSI (LVMed or Multipathed or DM Encrypted)
|
||||||
|
# devices with the _netdev mount option and open-iscsi will treat them
|
||||||
|
# accordingly.
|
||||||
|
#
|
||||||
|
# Note: however, handling _netdev devices comes with the caveat that
|
||||||
|
# other _netdev mounts, like an NFS share, also get pulled in with it.
|
||||||
|
#
|
||||||
|
# If this option is set to 0, no iSCSI mounts in /etc/fstab will be
|
||||||
|
# automatically mounted on systems running sysvinit. This setting is
|
||||||
|
# not necessary when using systemd as init system (Debian's default).
|
||||||
|
HANDLE_NETDEV=1
|
||||||
|
|
||||||
|
|
||||||
|
# Additional mounts to exclude at shutdown.
|
||||||
|
#
|
||||||
|
# If you have additional mounts on iSCSI that shouldn't be umounted at
|
||||||
|
# shutdown by open-iscsi (by default, open-iscsi excludes / and on
|
||||||
|
# systemd systems als /usr), place them here. iSCSI sessions that carry
|
||||||
|
# these mounts will also be kept open.
|
||||||
|
#
|
||||||
|
# If any of these mountpoints contain spaces, please use the same
|
||||||
|
# escaping as in /etc/fstab, i.e. replace the spaces with \040.
|
||||||
|
EXCLUDE_MOUNTS_AT_SHUTDOWN=""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Don't logout from ANY iSCSI session on shutdown
|
||||||
|
#
|
||||||
|
# When shutting down, if the root filesystem is on iSCSI, open-iscsi
|
||||||
|
# tries to determine which sessions are still required for the root
|
||||||
|
# filesystem. By default, the host will still logout from all other
|
||||||
|
# sessions.
|
||||||
|
#
|
||||||
|
# If you are running a very complicated setup of your root filesystem
|
||||||
|
# (multiple mapping levels stacked on top of each other), it may be the
|
||||||
|
# case that the autodetection logic doesn't work propery. You may then
|
||||||
|
# enable this setting to keep around all iSCSI sessions.
|
||||||
|
#
|
||||||
|
# Note that /etc/iscsi/iscsi.initramfs must exist for this option to
|
||||||
|
# have any effect at all.
|
||||||
|
#
|
||||||
|
# This was the default behavior in previous versions of this package
|
||||||
|
# up to the version that shipped with Debian 8 (Jessie).
|
||||||
|
#
|
||||||
|
ISCSI_ROOT_KEEP_ALL_SESSIONS_AT_SHUTDOWN=0
|
||||||
361
overlays/rpi4-armbian-longhorn-root/etc/iscsi/iscsid.conf
Normal file
361
overlays/rpi4-armbian-longhorn-root/etc/iscsi/iscsid.conf
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
#
|
||||||
|
# Open-iSCSI default configuration.
|
||||||
|
#
|
||||||
|
# Note: To set any of these values for a specific node/session run
|
||||||
|
# the iscsiadm --mode node --op command for the value. See the README
|
||||||
|
# and man page for iscsiadm for details on the --op command.
|
||||||
|
#
|
||||||
|
|
||||||
|
######################
|
||||||
|
# iscsid daemon config
|
||||||
|
######################
|
||||||
|
#
|
||||||
|
# If you want iscsid to start the first time an iscsi tool
|
||||||
|
# needs to access it, instead of starting it when the init
|
||||||
|
# scripts run, set the iscsid startup command here. This
|
||||||
|
# should normally only need to be done by distro package
|
||||||
|
# maintainers. If you leave the iscsid daemon running all
|
||||||
|
# the time then leave this attribute commented out.
|
||||||
|
#
|
||||||
|
# Default for Fedora and RHEL. Uncomment to activate.
|
||||||
|
# iscsid.startup = /bin/systemctl start iscsid.socket iscsiuio.socket
|
||||||
|
#
|
||||||
|
# Default for Debian and Ubuntu. Uncomment to activate.
|
||||||
|
iscsid.startup = /bin/systemctl start iscsid.socket
|
||||||
|
#
|
||||||
|
# Default if you are not using systemd. Uncomment to activate.
|
||||||
|
# iscsid.startup = /usr/bin/service start iscsid
|
||||||
|
|
||||||
|
# Check for active mounts on devices reachable through a session
|
||||||
|
# and refuse to logout if there are any. Defaults to "No".
|
||||||
|
# iscsid.safe_logout = Yes
|
||||||
|
|
||||||
|
# Only require UID auth for MGMT IPCs, and not username.
|
||||||
|
# Checking username is a legacy security practice, and is on the path
|
||||||
|
# to deprecation.
|
||||||
|
# Set to "No" for legacy compatibility.
|
||||||
|
# Defaults to "Yes".
|
||||||
|
# iscsid.ipc_auth_uid = No
|
||||||
|
|
||||||
|
#############################
|
||||||
|
# NIC/HBA and driver settings
|
||||||
|
#############################
|
||||||
|
# open-iscsi can create a session and bind it to a NIC/HBA.
|
||||||
|
# To set this up see the example iface config file.
|
||||||
|
|
||||||
|
#*****************
|
||||||
|
# Startup settings
|
||||||
|
#*****************
|
||||||
|
|
||||||
|
# To request that the iscsi service scripts startup a session, use "automatic":
|
||||||
|
# node.startup = automatic
|
||||||
|
#
|
||||||
|
# To manually startup the session, use "manual". The default is manual.
|
||||||
|
node.startup = manual
|
||||||
|
|
||||||
|
# For "automatic" startup nodes, setting this to "Yes" will try logins on each
|
||||||
|
# available iface until one succeeds, and then stop. The default "No" will try
|
||||||
|
# logins on all available ifaces simultaneously.
|
||||||
|
node.leading_login = No
|
||||||
|
|
||||||
|
# *************
|
||||||
|
# CHAP Settings
|
||||||
|
# *************
|
||||||
|
|
||||||
|
# To enable CHAP authentication set node.session.auth.authmethod
|
||||||
|
# to CHAP. The default is None.
|
||||||
|
#node.session.auth.authmethod = CHAP
|
||||||
|
|
||||||
|
# To configure which CHAP algorithms to enable, set
|
||||||
|
# node.session.auth.chap_algs to a comma separated list.
|
||||||
|
# The algorithms should be listed in order of decreasing
|
||||||
|
# preference — in particular, with the most preferred algorithm first.
|
||||||
|
# Valid values are MD5, SHA1, SHA256, and SHA3-256.
|
||||||
|
# The default is MD5.
|
||||||
|
#node.session.auth.chap_algs = SHA3-256,SHA256,SHA1,MD5
|
||||||
|
|
||||||
|
# To set a CHAP username and password for initiator
|
||||||
|
# authentication by the target(s), uncomment the following lines:
|
||||||
|
#node.session.auth.username = username
|
||||||
|
#node.session.auth.password = password
|
||||||
|
|
||||||
|
# To set a CHAP username and password for target(s)
|
||||||
|
# authentication by the initiator, uncomment the following lines:
|
||||||
|
#node.session.auth.username_in = username_in
|
||||||
|
#node.session.auth.password_in = password_in
|
||||||
|
|
||||||
|
# To enable CHAP authentication for a discovery session to the target,
|
||||||
|
# set discovery.sendtargets.auth.authmethod to CHAP. The default is None.
|
||||||
|
#discovery.sendtargets.auth.authmethod = CHAP
|
||||||
|
|
||||||
|
# To set a discovery session CHAP username and password for the initiator
|
||||||
|
# authentication by the target(s), uncomment the following lines:
|
||||||
|
#discovery.sendtargets.auth.username = username
|
||||||
|
#discovery.sendtargets.auth.password = password
|
||||||
|
|
||||||
|
# To set a discovery session CHAP username and password for target(s)
|
||||||
|
# authentication by the initiator, uncomment the following lines:
|
||||||
|
#discovery.sendtargets.auth.username_in = username_in
|
||||||
|
#discovery.sendtargets.auth.password_in = password_in
|
||||||
|
|
||||||
|
# ********
|
||||||
|
# Timeouts
|
||||||
|
# ********
|
||||||
|
#
|
||||||
|
# See the iSCSI README's Advanced Configuration section for tips
|
||||||
|
# on setting timeouts when using multipath or doing root over iSCSI.
|
||||||
|
#
|
||||||
|
# To specify the length of time to wait for session re-establishment
|
||||||
|
# before failing SCSI commands back to the application when running
|
||||||
|
# the Linux SCSI Layer error handler, edit the line.
|
||||||
|
# The value is in seconds and the default is 120 seconds.
|
||||||
|
# Special values:
|
||||||
|
# - If the value is 0, IO will be failed immediately.
|
||||||
|
# - If the value is less than 0, IO will remain queued until the session
|
||||||
|
# is logged back in, or until the user runs the logout command.
|
||||||
|
node.session.timeo.replacement_timeout = 120
|
||||||
|
|
||||||
|
# To specify the time to wait for login to complete, edit the line.
|
||||||
|
# The value is in seconds and the default is 15 seconds.
|
||||||
|
node.conn[0].timeo.login_timeout = 15
|
||||||
|
|
||||||
|
# To specify the time to wait for logout to complete, edit the line.
|
||||||
|
# The value is in seconds and the default is 15 seconds.
|
||||||
|
node.conn[0].timeo.logout_timeout = 15
|
||||||
|
|
||||||
|
# Time interval to wait for on connection before sending a ping.
|
||||||
|
# The value is in seconds and the default is 5 seconds.
|
||||||
|
node.conn[0].timeo.noop_out_interval = 5
|
||||||
|
|
||||||
|
# To specify the time to wait for a Nop-out response before failing
|
||||||
|
# the connection, edit this line. Failing the connection will
|
||||||
|
# cause IO to be failed back to the SCSI layer. If using dm-multipath
|
||||||
|
# this will cause the IO to be failed to the multipath layer.
|
||||||
|
# The value is in seconds and the default is 5 seconds.
|
||||||
|
node.conn[0].timeo.noop_out_timeout = 5
|
||||||
|
|
||||||
|
# To specify the time to wait for an abort response before
|
||||||
|
# failing the operation and trying a logical unit reset, edit the line.
|
||||||
|
# The value is in seconds and the default is 15 seconds.
|
||||||
|
node.session.err_timeo.abort_timeout = 15
|
||||||
|
|
||||||
|
# To specify the time to wait for a logical unit response
|
||||||
|
# before failing the operation and trying session re-establishment,
|
||||||
|
# edit the line.
|
||||||
|
# The value is in seconds and the default is 30 seconds.
|
||||||
|
node.session.err_timeo.lu_reset_timeout = 30
|
||||||
|
|
||||||
|
# To specify the time to wait for a target response
|
||||||
|
# before failing the operation and trying session re-establishment,
|
||||||
|
# edit the line.
|
||||||
|
# The value is in seconds and the default is 30 seconds.
|
||||||
|
node.session.err_timeo.tgt_reset_timeout = 30
|
||||||
|
|
||||||
|
# The value is in seconds and the default is 60 seconds.
|
||||||
|
node.session.err_timeo.host_reset_timeout = 60
|
||||||
|
|
||||||
|
|
||||||
|
#******
|
||||||
|
# Retry
|
||||||
|
#******
|
||||||
|
|
||||||
|
# To specify the number of times iscsid should retry a login
|
||||||
|
# if the login attempt fails due to the node.conn[0].timeo.login_timeout
|
||||||
|
# expiring, modify the following line. Note that if the login fails
|
||||||
|
# quickly (before node.conn[0].timeo.login_timeout fires) because the network
|
||||||
|
# layer or the target returns an error, iscsid may retry the login more than
|
||||||
|
# node.session.initial_login_retry_max times.
|
||||||
|
#
|
||||||
|
# This retry count along with node.conn[0].timeo.login_timeout
|
||||||
|
# determines the maximum amount of time iscsid will try to
|
||||||
|
# establish the initial login. node.session.initial_login_retry_max is
|
||||||
|
# multiplied by the node.conn[0].timeo.login_timeout to determine the
|
||||||
|
# maximum amount.
|
||||||
|
#
|
||||||
|
# The default node.session.initial_login_retry_max is 8 and
|
||||||
|
# node.conn[0].timeo.login_timeout is 15 so we have:
|
||||||
|
#
|
||||||
|
# node.conn[0].timeo.login_timeout * node.session.initial_login_retry_max = 120s
|
||||||
|
#
|
||||||
|
# Valid values are any integer value. This only
|
||||||
|
# affects the initial login. Setting it to a high value can slow
|
||||||
|
# down the iscsi service startup. Setting it to a low value can
|
||||||
|
# cause a session to not get logged into, if there are distuptions
|
||||||
|
# during startup or if the network is not ready at that time.
|
||||||
|
node.session.initial_login_retry_max = 8
|
||||||
|
|
||||||
|
################################
|
||||||
|
# session and device queue depth
|
||||||
|
################################
|
||||||
|
|
||||||
|
# To control how many commands the session will queue, set
|
||||||
|
# node.session.cmds_max to an integer between 2 and 2048 that is also
|
||||||
|
# a power of 2. The default is 128.
|
||||||
|
node.session.cmds_max = 128
|
||||||
|
|
||||||
|
# To control the device's queue depth, set node.session.queue_depth
|
||||||
|
# to a value between 1 and 1024. The default is 32.
|
||||||
|
node.session.queue_depth = 32
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# MISC SYSTEM PERFORMANCE SETTINGS
|
||||||
|
##################################
|
||||||
|
|
||||||
|
# For software iscsi (iscsi_tcp) and iser (ib_iser), each session
|
||||||
|
# has a thread used to transmit or queue data to the hardware. For
|
||||||
|
# cxgb3i, you will get a thread per host.
|
||||||
|
#
|
||||||
|
# Setting the thread's priority to a lower value can lead to higher throughput
|
||||||
|
# and lower latencies. The lowest value is -20. Setting the priority to
|
||||||
|
# a higher value, can lead to reduced IO performance, but if you are seeing
|
||||||
|
# the iscsi or scsi threads dominate the use of the CPU then you may want
|
||||||
|
# to set this value higher.
|
||||||
|
#
|
||||||
|
# Note: For cxgb3i, you must set all sessions to the same value.
|
||||||
|
# Otherwise the behavior is not defined.
|
||||||
|
#
|
||||||
|
# This is done by scanning /proc/PID/stat, and this doesn't work in
|
||||||
|
# newer kernels (6.* on), as the workqueue transmit thread can be
|
||||||
|
# passive, and not show in in the process table when not actively
|
||||||
|
# doing work. If the proper workqueue process is found, and the
|
||||||
|
# priority value is non-zero, then the priority of that process will
|
||||||
|
# be modified when a session is created.
|
||||||
|
#
|
||||||
|
# Note: as mentioned above, the default value is now zero, which means
|
||||||
|
# that we don't do anything to the transmit workqueue process priority,
|
||||||
|
# by default. If you wish to get the previous behavior, set this value
|
||||||
|
# to -20. In the future, this functionality will be removed, once this
|
||||||
|
# functionality is no longer needed or works.
|
||||||
|
#
|
||||||
|
# The default value is 0. The setting must be between -20 and 20.
|
||||||
|
# node.session.xmit_thread_priority = 0
|
||||||
|
|
||||||
|
|
||||||
|
#***************
|
||||||
|
# iSCSI settings
|
||||||
|
#***************
|
||||||
|
|
||||||
|
# To enable R2T flow control (i.e., the initiator must wait for an R2T
|
||||||
|
# command before sending any data), uncomment the following line:
|
||||||
|
#
|
||||||
|
#node.session.iscsi.InitialR2T = Yes
|
||||||
|
#
|
||||||
|
# To disable R2T flow control (i.e., the initiator has an implied
|
||||||
|
# initial R2T of "FirstBurstLength" at offset 0), uncomment the following line:
|
||||||
|
#
|
||||||
|
# The defaults is No.
|
||||||
|
node.session.iscsi.InitialR2T = No
|
||||||
|
|
||||||
|
#
|
||||||
|
# To disable immediate data (i.e., the initiator does not send
|
||||||
|
# unsolicited data with the iSCSI command PDU), uncomment the following line:
|
||||||
|
#
|
||||||
|
#node.session.iscsi.ImmediateData = No
|
||||||
|
#
|
||||||
|
# To enable immediate data (i.e., the initiator sends unsolicited data
|
||||||
|
# with the iSCSI command packet), uncomment the following line:
|
||||||
|
#
|
||||||
|
# The default is Yes.
|
||||||
|
node.session.iscsi.ImmediateData = Yes
|
||||||
|
|
||||||
|
# To specify the maximum number of unsolicited data bytes the initiator
|
||||||
|
# can send in an iSCSI PDU to a target, edit the following line.
|
||||||
|
#
|
||||||
|
# The value is the number of bytes in the range of 512 to (2^24-1) and
|
||||||
|
# the default is 262144.
|
||||||
|
node.session.iscsi.FirstBurstLength = 262144
|
||||||
|
|
||||||
|
# To specify the maximum SCSI payload that the initiator will negotiate
|
||||||
|
# with the target for, edit the following line.
|
||||||
|
#
|
||||||
|
# The value is the number of bytes in the range of 512 to (2^24-1) and
|
||||||
|
# the defauls it 16776192.
|
||||||
|
node.session.iscsi.MaxBurstLength = 16776192
|
||||||
|
|
||||||
|
# To specify the maximum number of data bytes the initiator can receive
|
||||||
|
# in an iSCSI PDU from a target, edit the following line.
|
||||||
|
#
|
||||||
|
# The value is the number of bytes in the range of 512 to (2^24-1) and
|
||||||
|
# the default is 262144.
|
||||||
|
node.conn[0].iscsi.MaxRecvDataSegmentLength = 262144
|
||||||
|
|
||||||
|
# To specify the maximum number of data bytes the initiator will send
|
||||||
|
# in an iSCSI PDU to the target, edit the following line.
|
||||||
|
#
|
||||||
|
# The value is the number of bytes in the range of 512 to (2^24-1).
|
||||||
|
# Zero is a special case. If set to zero, the initiator will use
|
||||||
|
# the target's MaxRecvDataSegmentLength for the MaxXmitDataSegmentLength.
|
||||||
|
# The default is 0.
|
||||||
|
node.conn[0].iscsi.MaxXmitDataSegmentLength = 0
|
||||||
|
|
||||||
|
# To specify the maximum number of data bytes the initiator can receive
|
||||||
|
# in an iSCSI PDU from a target during a discovery session, edit the
|
||||||
|
# following line.
|
||||||
|
#
|
||||||
|
# The value is the number of bytes in the range of 512 to (2^24-1) and
|
||||||
|
# the default is 32768.
|
||||||
|
discovery.sendtargets.iscsi.MaxRecvDataSegmentLength = 32768
|
||||||
|
|
||||||
|
# To allow the targets to control the setting of the digest checking,
|
||||||
|
# with the initiator requesting a preference of enabling the checking,
|
||||||
|
# uncomment one or both of the following lines:
|
||||||
|
#node.conn[0].iscsi.HeaderDigest = CRC32C,None
|
||||||
|
#node.conn[0].iscsi.DataDigest = CRC32C,None
|
||||||
|
#
|
||||||
|
# To allow the targets to control the setting of the digest checking,
|
||||||
|
# with the initiator requesting a preference of disabling the checking,
|
||||||
|
# uncomment one or both of the following lines:
|
||||||
|
#node.conn[0].iscsi.HeaderDigest = None,CRC32C
|
||||||
|
#node.conn[0].iscsi.DataDigest = None,CRC32C
|
||||||
|
#
|
||||||
|
# To enable CRC32C digest checking for the header and/or data part of
|
||||||
|
# iSCSI PDUs, uncomment one or both of the following lines:
|
||||||
|
#node.conn[0].iscsi.HeaderDigest = CRC32C
|
||||||
|
#node.conn[0].iscsi.DataDigest = CRC32C
|
||||||
|
#
|
||||||
|
# To disable digest checking for the header and/or data part of
|
||||||
|
# iSCSI PDUs, uncomment one or both of the following lines:
|
||||||
|
#node.conn[0].iscsi.HeaderDigest = None
|
||||||
|
#node.conn[0].iscsi.DataDigest = None
|
||||||
|
#
|
||||||
|
# The default is to never use DataDigests or HeaderDigests.
|
||||||
|
#
|
||||||
|
|
||||||
|
# For multipath configurations, you may want more than one session to be
|
||||||
|
# created on each iface record. If node.session.nr_sessions is greater
|
||||||
|
# than 1, performing a 'login' for that node will ensure that the
|
||||||
|
# appropriate number of sessions is created.
|
||||||
|
node.session.nr_sessions = 1
|
||||||
|
|
||||||
|
# When iscsid starts up, it recovers existing sessions (if possible).
|
||||||
|
# If the target for a session has gone away when this occurs, the
|
||||||
|
# iscsid daemon normally tries to reestablish each session,
|
||||||
|
# in succession, in the background, by trying again every two
|
||||||
|
# seconds until all sessions are restored. This configuration
|
||||||
|
# variable can limits the number of retries for each session.
|
||||||
|
# For example, setting reopen_max=150 would mean that each session
|
||||||
|
# recovery was limited to about five minutes.
|
||||||
|
node.session.reopen_max = 0
|
||||||
|
|
||||||
|
#************
|
||||||
|
# Workarounds
|
||||||
|
#************
|
||||||
|
|
||||||
|
# Some targets like IET prefer that an initiator does not respond to PDUs like
|
||||||
|
# R2Ts after it has sent a task management function like an ABORT TASK or a
|
||||||
|
# LOGICAL UNIT RESET. To adopt this behavior, uncomment the following line.
|
||||||
|
# The default is Yes.
|
||||||
|
node.session.iscsi.FastAbort = Yes
|
||||||
|
|
||||||
|
# Some targets like Equalogic prefer that an initiator continue to respond to
|
||||||
|
# R2Ts after it has sent a task management function like an ABORT TASK or a
|
||||||
|
# LOGICAL UNIT RESET. To adopt this behavior, uncomment the following line.
|
||||||
|
# node.session.iscsi.FastAbort = No
|
||||||
|
|
||||||
|
# To prevent doing automatic scans that would add unwanted luns to the system,
|
||||||
|
# we can disable them and have sessions only do manually requested scans.
|
||||||
|
# Automatic scans are performed on startup, on login, and on AEN/AER reception
|
||||||
|
# on devices supporting it. For HW drivers, all sessions will use the value
|
||||||
|
# defined in the configuration file. This configuration option is independent
|
||||||
|
# of the scsi_mod.scan parameter. The default is auto.
|
||||||
|
node.session.scan = auto
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
Port 2277
|
||||||
|
PasswordAuthentication no
|
||||||
|
PermitRootLogin prohibit-password
|
||||||
@ -0,0 +1,2 @@
|
|||||||
|
[Service]
|
||||||
|
ExecStartPost=/usr/local/sbin/metis-rpi4-longhorn-firstboot.sh
|
||||||
@ -0,0 +1,27 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Lightweight Kubernetes
|
||||||
|
Documentation=https://k3s.io
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=notify
|
||||||
|
EnvironmentFile=-/etc/default/%N
|
||||||
|
EnvironmentFile=-/etc/sysconfig/%N
|
||||||
|
EnvironmentFile=-/etc/systemd/system/k3s-agent.service.env
|
||||||
|
KillMode=process
|
||||||
|
Delegate=yes
|
||||||
|
LimitNOFILE=1048576
|
||||||
|
LimitNPROC=infinity
|
||||||
|
LimitCORE=infinity
|
||||||
|
TasksMax=infinity
|
||||||
|
TimeoutStartSec=0
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5s
|
||||||
|
ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service 2>/dev/null'
|
||||||
|
ExecStartPre=-/sbin/modprobe br_netfilter
|
||||||
|
ExecStartPre=-/sbin/modprobe overlay
|
||||||
|
ExecStart=/usr/local/bin/k3s agent
|
||||||
@ -0,0 +1,4 @@
|
|||||||
|
[Service]
|
||||||
|
Environment="K3S_KUBELET_ARG=image-gc-high-threshold=65"
|
||||||
|
Environment="K3S_KUBELET_ARG=image-gc-low-threshold=50"
|
||||||
|
Environment="K3S_KUBELET_ARG=image-gc-minimum-available=8Gi"
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
[Service]
|
||||||
|
Environment="K3S_KUBELET_ARG=container-log-max-size=10Mi"
|
||||||
|
Environment="K3S_KUBELET_ARG=container-log-max-files=2"
|
||||||
@ -0,0 +1,2 @@
|
|||||||
|
[Service]
|
||||||
|
LimitNOFILE=1048576
|
||||||
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
libopeniscsiusr.so.0.2.0
|
||||||
Binary file not shown.
80
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/activate-storage.sh
Executable file
80
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/activate-storage.sh
Executable file
@ -0,0 +1,80 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# This script activates storage at boot after the iSCSI login. It can
|
||||||
|
# be called from both the init script as well as the native systemd
|
||||||
|
# service.
|
||||||
|
#
|
||||||
|
|
||||||
|
PATH=/usr/sbin:/sbin:/usr/bin:/bin
|
||||||
|
|
||||||
|
MULTIPATH=/sbin/multipath
|
||||||
|
VGCHANGE=/sbin/vgchange
|
||||||
|
|
||||||
|
if [ -f /etc/default/open-iscsi ]; then
|
||||||
|
. /etc/default/open-iscsi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# See if we need to handle LVM
|
||||||
|
if [ ! -x $VGCHANGE ] && [ -n "$LVMGROUPS" ]; then
|
||||||
|
echo "Warning: LVM2 tools are not installed, not honouring LVMGROUPS." >&2
|
||||||
|
LVMGROUPS=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If we don't have to activate any VGs and are running systemd, we
|
||||||
|
# don't have to activate anything, so doing udevadm settle here and
|
||||||
|
# potentially sleeping (if multipath is used) will not be productive,
|
||||||
|
# because after waiting for both of these things, we will do nothing.
|
||||||
|
# Therefore just drop out early if that is the case.
|
||||||
|
if [ -d /run/systemd/system ] && [ -z "$LVMGROUPS" ] ; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure we pick up all devices
|
||||||
|
udevadm settle || true
|
||||||
|
|
||||||
|
# Work around race condition here: after udevadm settle it is
|
||||||
|
# guaranteed that all iSCSI disks have now properly appeared, but
|
||||||
|
# other dependent devices may not have. This can include multipath
|
||||||
|
# mappings of iSCSI devices (multipathd will race against udev for
|
||||||
|
# locking the underlying source block devices when it comes to
|
||||||
|
# creating the mappings, and it will retry the lock only once per
|
||||||
|
# second, and typically succeed only on second try), but also
|
||||||
|
# partitions on the given disks (which the kernel scans
|
||||||
|
# asyncronously).
|
||||||
|
#
|
||||||
|
# The proper way of handling this is to have LVM activation and/or
|
||||||
|
# mounting of file systems be handled in a completely event-driven
|
||||||
|
# manner, but that requires configuration by the sysadmin in the
|
||||||
|
# case of LVM, and for mounting it only works with systemd at the
|
||||||
|
# moment. For compatibility with how the package handled this
|
||||||
|
# previously, we will work around this race for a while longer.
|
||||||
|
|
||||||
|
if [ -x $MULTIPATH ] ; then
|
||||||
|
# 1 second is too short for multipath devices to appear,
|
||||||
|
# because multipathd takes more than 1s to activate them
|
||||||
|
# after udevadm settle is done.
|
||||||
|
sleep 3
|
||||||
|
else
|
||||||
|
sleep 1
|
||||||
|
fi
|
||||||
|
udevadm settle || true
|
||||||
|
|
||||||
|
# Handle LVM
|
||||||
|
if [ -n "$LVMGROUPS" ] ; then
|
||||||
|
if ! $VGCHANGE -ay $LVMGROUPS ; then
|
||||||
|
echo "Warning: could not activate all LVM groups." >&2
|
||||||
|
fi
|
||||||
|
# Make sure we pick up all LVM devices
|
||||||
|
udevadm settle || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mount all network filesystems
|
||||||
|
# (systemd takes care of it directly, so don't do it there)
|
||||||
|
if ! [ -d /run/systemd/system ] ; then
|
||||||
|
if [ $HANDLE_NETDEV -eq 1 ] ; then
|
||||||
|
mount -a -O _netdev >/dev/null 2>&1 || true
|
||||||
|
# FIXME: should we really support swap on iSCSI?
|
||||||
|
# If so, we should update umountiscsi.sh!
|
||||||
|
swapon -a -e >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
68
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/logout-all.sh
Executable file
68
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/logout-all.sh
Executable file
@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# This script logs out from all active iSCSI sessions, excluding those
|
||||||
|
# listed in /run/open-iscsi/shutdown-keep-sessions. That file is
|
||||||
|
# generated by umountiscsi.sh and determines which sessions should not
|
||||||
|
# be terminated.
|
||||||
|
#
|
||||||
|
|
||||||
|
ISCSIADM=/usr/sbin/iscsiadm
|
||||||
|
PIDFILE=/run/iscsid.pid
|
||||||
|
|
||||||
|
ISCSI_ROOT_KEEP_ALL_SESSIONS_AT_SHUTDOWN=0
|
||||||
|
if [ -f /etc/default/open-iscsi ]; then
|
||||||
|
. /etc/default/open-iscsi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f /etc/iscsi/iscsi.initramfs ] && [ $ISCSI_ROOT_KEEP_ALL_SESSIONS_AT_SHUTDOWN -eq 1 ]; then
|
||||||
|
# Don't logout from any sessions if root is on initramfs and the
|
||||||
|
# administrator wanted it that way.
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -s $PIDFILE ] || ! kill -0 `sed -n 1p $PIDFILE` >/dev/null 2>/dev/null ; then
|
||||||
|
# Don't logout from iSCSI sessions if daemon isn't running
|
||||||
|
echo "iSCSI initiator daemon not running, not logging out from targets." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
EXCLUDED_SESSIONS=""
|
||||||
|
if [ -f /run/open-iscsi/shutdown-keep-sessions ] ; then
|
||||||
|
_EXCLUDED_SESSIONS=$(cat /run/open-iscsi/shutdown-keep-sessions)
|
||||||
|
for s in ${_EXCLUDED_SESSIONS} ; do
|
||||||
|
EXCLUDED_SESSIONS="${EXCLUDED_SESSIONS:+$EXCLUDED_SESSIONS }${s}"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# trivial case
|
||||||
|
if [ -z "$EXCLUDED_SESSIONS" ] ; then
|
||||||
|
$ISCSIADM -m node --logoutall=all
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
in_set() {
|
||||||
|
eval _set=\$$1
|
||||||
|
case "${_set}" in
|
||||||
|
("$2"|*" $2"|"$2 "*|*" $2 "*) return 0 ;;
|
||||||
|
(*) return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
# go through all iSCSI sessions, but exclude those where we don't want
|
||||||
|
# to logout from
|
||||||
|
RC=0
|
||||||
|
for host_dir in /sys/devices/platform/host* ; do
|
||||||
|
[ -d "$host_dir"/iscsi_host* ] || continue
|
||||||
|
for session_dir in "$host_dir"/session* ; do
|
||||||
|
if in_set EXCLUDED_SESSIONS "$session_dir" ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
$ISCSIADM -m session -r "$session_dir" --logout
|
||||||
|
rc=$?
|
||||||
|
if [ $rc -ne 0 ] ; then
|
||||||
|
RC=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
exit $RC
|
||||||
80
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/net-interface-handler
Executable file
80
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/net-interface-handler
Executable file
@ -0,0 +1,80 @@
|
|||||||
|
#!/bin/sh -e
|
||||||
|
# suppress configuration of network interface used
|
||||||
|
# by iSCSI root device
|
||||||
|
#
|
||||||
|
# If the root filesystem is on iSCSI, then we must take care to avoid
|
||||||
|
# changing the state of its network interface. To this end, the initramfs
|
||||||
|
# leaves a note for us which interface was used, and we mangle
|
||||||
|
# /run/network/ifstate manually to stop it being brought up or down
|
||||||
|
# automatically. This is a slight layering violation, but, unfortunately,
|
||||||
|
# ifupdown appears to have no way to do this without also running
|
||||||
|
# /etc/network/*.d/ scripts.
|
||||||
|
|
||||||
|
assert_interface() {
|
||||||
|
# udev sets INTERFACE to the name of the currently-processed nic.
|
||||||
|
[ -n "$INTERFACE" ] && return 0
|
||||||
|
echo "environment variable INTERFACE not set." 1>&2;
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
start() {
|
||||||
|
CR="
|
||||||
|
"
|
||||||
|
assert_interface || return
|
||||||
|
ifile=/run/initramfs/open-iscsi.interface
|
||||||
|
|
||||||
|
[ -f "$ifile" ] && read iface < "$ifile" || return 0
|
||||||
|
[ "$INTERFACE" = "$iface" ] || return
|
||||||
|
|
||||||
|
if ! grep -qs "^$iface=" /run/network/ifstate; then
|
||||||
|
mkdir -p /run/network
|
||||||
|
echo "$iface=$iface" >>/run/network/ifstate
|
||||||
|
|
||||||
|
if [ -f /run/net-$iface.conf ]; then
|
||||||
|
conf=/run/net-$iface.conf
|
||||||
|
elif [ -f /run/net6-$iface.conf ]; then
|
||||||
|
conf=/run/net6-$iface.conf
|
||||||
|
else
|
||||||
|
conf=""
|
||||||
|
fi
|
||||||
|
if command -v resolvconf >/dev/null &&
|
||||||
|
[ -n "$conf" ]; then
|
||||||
|
. "$conf"
|
||||||
|
R=""
|
||||||
|
[ -n "$DOMAINSEARCH" ] && R="$R${CR}search $DOMAINSEARCH"
|
||||||
|
[ -n "$IPV6DOMAINSEARCH" ] && R="$R${CR}search $IPV6DOMAINSEARCH"
|
||||||
|
for ns in "$IPV4DNS0" "$IPV4DNS1" "$IPV6DNS0" "$IPV6DNS1"; do
|
||||||
|
[ -n "$ns" -a "$ns" != "0.0.0.0" ] && R="$R${CR}nameserver $ns"
|
||||||
|
done
|
||||||
|
if [ -n "$R" ]; then
|
||||||
|
# create the dir in case resolvconf did not start yet
|
||||||
|
mkdir -p /run/resolvconf/interface
|
||||||
|
resolvconf -a $iface.iscsi-network <<EOF
|
||||||
|
${R#${CR}}
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
stop() {
|
||||||
|
assert_interface || return
|
||||||
|
ifile=/run/initramfs/open-iscsi.interface
|
||||||
|
[ -f "$ifile" ] && read iface < "$ifile" || return 0
|
||||||
|
[ "$INTERFACE" = "$iface" ] || return
|
||||||
|
|
||||||
|
if grep -qs "^$iface=" /run/network/ifstate; then
|
||||||
|
grep -v "^$iface=" /run/network/ifstate >/run/network/.ifstate.tmp || true
|
||||||
|
mv /run/network/.ifstate.tmp /run/network/ifstate
|
||||||
|
|
||||||
|
if command -v resolvconf >/dev/null; then
|
||||||
|
resolvconf -d $iface.iscsi-network
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
start) start ;;
|
||||||
|
stop) stop ;;
|
||||||
|
*) echo "ERROR: must be called with 'start' or 'stop'" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
59
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/startup-checks.sh
Executable file
59
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/startup-checks.sh
Executable file
@ -0,0 +1,59 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# This script does the required startup checks before the iSCSI
|
||||||
|
# daemon should be started. It also generates a name if that
|
||||||
|
# hadn't been done before.
|
||||||
|
#
|
||||||
|
|
||||||
|
PATH=/usr/sbin:/sbin:/usr/bin:/bin
|
||||||
|
|
||||||
|
NAMEFILE=/etc/iscsi/initiatorname.iscsi
|
||||||
|
CONFIGFILE=/etc/iscsi/iscsid.conf
|
||||||
|
|
||||||
|
if [ ! -e "$CONFIGFILE" ]; then
|
||||||
|
echo >&2
|
||||||
|
echo "Error: configuration file $CONFIGFILE is missing!" >&2
|
||||||
|
echo "The iSCSI driver has not been correctly installed and cannot start." >&2
|
||||||
|
echo >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $NAMEFILE ]; then
|
||||||
|
echo >&2
|
||||||
|
echo "Error: InitiatorName file $NAMEFILE is missing!" >&2
|
||||||
|
echo "The iSCSI driver has not been correctly installed and cannot start." >&2
|
||||||
|
echo >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# see if we need to generate a unique iSCSI InitiatorName
|
||||||
|
if grep -q "^GenerateName=yes" $NAMEFILE ; then
|
||||||
|
if [ ! -x /usr/sbin/iscsi-iname ] ; then
|
||||||
|
echo "Error: /usr/sbin/iscsi-iname does not exist, driver was not successfully installed" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# Generate a unique InitiatorName and save it
|
||||||
|
INAME=`/sbin/iscsi-iname -p iqn.2004-10.com.ubuntu:01`
|
||||||
|
if [ "$INAME" != "" ] ; then
|
||||||
|
echo "## DO NOT EDIT OR REMOVE THIS FILE!" > $NAMEFILE
|
||||||
|
echo "## If you remove this file, the iSCSI daemon will not start." >> $NAMEFILE
|
||||||
|
echo "## If you change the InitiatorName, existing access control lists" >> $NAMEFILE
|
||||||
|
echo "## may reject this initiator. The InitiatorName must be unique">> $NAMEFILE
|
||||||
|
echo "## for each iSCSI initiator. Do NOT duplicate iSCSI InitiatorNames." >> $NAMEFILE
|
||||||
|
printf "InitiatorName=$INAME\n" >> $NAMEFILE
|
||||||
|
chmod 600 $NAMEFILE
|
||||||
|
else
|
||||||
|
echo "Error: failed to generate an iSCSI InitiatorName, driver cannot start." >&2
|
||||||
|
echo >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# make sure there is a valid InitiatorName for the driver
|
||||||
|
if ! grep -q "^InitiatorName=[^ \t\n]" $NAMEFILE ; then
|
||||||
|
echo >&2
|
||||||
|
echo "Error: $NAMEFILE does not contain a valid InitiatorName." >&2
|
||||||
|
echo "The iSCSI driver has not been correctly installed and cannot start." >&2
|
||||||
|
echo >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
673
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/umountiscsi.sh
Executable file
673
overlays/rpi4-armbian-longhorn-root/usr/lib/open-iscsi/umountiscsi.sh
Executable file
@ -0,0 +1,673 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# This script umounts mounted iSCSI devices on shutdown, if possible.
|
||||||
|
# It is supposed to catch most use cases but is not designed to work
|
||||||
|
# for every corner-case. It handles LVM and multipath, but only if
|
||||||
|
# one of the following stackings is used:
|
||||||
|
# LVM -> multipath -> iSCSI
|
||||||
|
# multipath -> iSCSI
|
||||||
|
# LVM -> iSCSI
|
||||||
|
# LVM -> LUKS -> multipath -> iSCSI
|
||||||
|
# LVM -> LUKS -> iSCSI
|
||||||
|
# LUKS -> LVM -> multipath -> iSCSI
|
||||||
|
# LUKS -> multipath -> iSCSI
|
||||||
|
# LUKS -> LVM -> iSCSI
|
||||||
|
# LUKS -> iSCSI
|
||||||
|
# It does not try to umount anything belonging to any device that is
|
||||||
|
# also used as a backing store for the root filesystem. Any iSCSI
|
||||||
|
# device part of the backing store of the root filesystem will be noted
|
||||||
|
# in /run/open-iscsi/shutdown-keep-sessions, so that the session not be
|
||||||
|
# closed on shutdown.
|
||||||
|
#
|
||||||
|
# KNOWN ISSUES:
|
||||||
|
# - It doesn't handle submounts properly in all corner cases.
|
||||||
|
# Specifically, it doesn't handle a non-iSCSI mount below an
|
||||||
|
# iSCSI mount if it isn't also marked _netdev in /etc/fstab.
|
||||||
|
# - It does not handle other things device mapper can do, such as
|
||||||
|
# RAID, crypto, manual mappings of parts of disks, etc.
|
||||||
|
# - It doesn't try to kill programs still accessing those mounts,
|
||||||
|
# umount will just fail then.
|
||||||
|
# - It doesn't handle more complicated stackings such as overlayfs,
|
||||||
|
# FUSE filesystems, loop devices, etc.
|
||||||
|
# - It doesn't handle swap.
|
||||||
|
#
|
||||||
|
# LONG TERM GOAL:
|
||||||
|
# - In the long term, there should be a solution where for each part
|
||||||
|
# of the stacking (device mapper, LVM, overlayfs, etc.) explicit
|
||||||
|
# depdendencies are declared with the init system such that it can
|
||||||
|
# be automatically dismantled. That would make this script
|
||||||
|
# superfluous and also not be a layering violation, as it
|
||||||
|
# currently is.
|
||||||
|
#
|
||||||
|
# Author: Christian Seiler <christian@iwakd.de>
|
||||||
|
#
|
||||||
|
|
||||||
|
PATH=/usr/sbin:/sbin:/usr/bin:/bin
|
||||||
|
|
||||||
|
EXCLUDE_MOUNTS_AT_SHUTDOWN=""
|
||||||
|
if [ -f /etc/default/open-iscsi ]; then
|
||||||
|
. /etc/default/open-iscsi
|
||||||
|
fi
|
||||||
|
|
||||||
|
MULTIPATH=/sbin/multipath
|
||||||
|
PVS=/sbin/pvs
|
||||||
|
LVS=/sbin/lvs
|
||||||
|
VGS=/sbin/vgs
|
||||||
|
VGCHANGE=/sbin/vgchange
|
||||||
|
CRYPTSETUP=/sbin/cryptsetup
|
||||||
|
DMSETUP=/sbin/dmsetup
|
||||||
|
|
||||||
|
if [ -x $PVS ] && [ -x $LVS ] && [ -x $VGCHANGE ] ; then
|
||||||
|
HAVE_LVM=1
|
||||||
|
else
|
||||||
|
HAVE_LVM=0
|
||||||
|
fi
|
||||||
|
if [ -x $CRYPTSETUP ] && [ -x $DMSETUP ] ; then
|
||||||
|
HAVE_LUKS=1
|
||||||
|
else
|
||||||
|
HAVE_LUKS=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
DRY_RUN=0
|
||||||
|
|
||||||
|
# We need to make sure that we don't try to umount the root device
|
||||||
|
# and for systemd systems, also /usr (which is pre-mounted in initrd
|
||||||
|
# there).
|
||||||
|
EXCLUDE_MOUNTS="/"
|
||||||
|
if [ -d /run/systemd/system ] ; then
|
||||||
|
EXCLUDE_MOUNTS="$EXCLUDE_MOUNTS /usr"
|
||||||
|
fi
|
||||||
|
EXCLUDE_MOUNTS="${EXCLUDE_MOUNTS}${EXCLUDE_MOUNTS_AT_SHUTDOWN+ $EXCLUDE_MOUNTS_AT_SHUTDOWN}"
|
||||||
|
unset _EXCLUDE_MOUNTS
|
||||||
|
|
||||||
|
error_usage() {
|
||||||
|
echo "Usage: $0 [--dry-run | --timeout secs]" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout=0
|
||||||
|
|
||||||
|
if [ $# -gt 2 ] ; then
|
||||||
|
error_usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $# -eq 2 ] ; then
|
||||||
|
if [ x"$1"x != x"--timeout"x ] ; then
|
||||||
|
error_usage
|
||||||
|
fi
|
||||||
|
case "$2" in
|
||||||
|
(-1) timeout="$2" ;;
|
||||||
|
(*[!0-9]*|"") error_usage ;;
|
||||||
|
(*) timeout="$2" ;;
|
||||||
|
esac
|
||||||
|
elif [ $# -eq 1 ] ; then
|
||||||
|
if [ x"$1"x != x"--dry-run"x ] ; then
|
||||||
|
error_usage
|
||||||
|
fi
|
||||||
|
DRY_RUN=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# poor man's hash implementation using shell variables
|
||||||
|
hash_keys() {
|
||||||
|
_hash_keys_hash_key_prefix="${1}_"
|
||||||
|
(
|
||||||
|
IFS='='
|
||||||
|
set | while read var value ; do
|
||||||
|
if [ x"${var#$_hash_keys_hash_key_prefix}"x != x"${var}"x ] ; then
|
||||||
|
printf '%s\n' "${var#$_hash_keys_hash_key_prefix}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
hash_clear() {
|
||||||
|
for k in $(hash_keys "$1") ; do
|
||||||
|
unset "${1}_${k}"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_get() {
|
||||||
|
_hash_get_var="$2_$(printf '%s' "$3" | sed 's%[^A-Za-z0-9_]%_%g')"
|
||||||
|
eval _hash_get_value=\$${_hash_get_var}
|
||||||
|
eval $1=\${_hash_get_value}
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_set() {
|
||||||
|
_hash_set_var="$1_$(printf '%s' "$2" | sed 's%[^A-Za-z0-9_]%_%g')"
|
||||||
|
eval ${_hash_set_var}=\${3}
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_unset() {
|
||||||
|
_hash_set_var="$1_$(printf '%s' "$2" | sed 's%[^A-Za-z0-9_]%_%g')"
|
||||||
|
unset ${_hash_set_var}
|
||||||
|
}
|
||||||
|
|
||||||
|
in_set() {
|
||||||
|
eval _set=\$$1
|
||||||
|
case "${_set}" in
|
||||||
|
("$2"|*" $2"|"$2 "*|*" $2 "*) return 0 ;;
|
||||||
|
(*) return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
_add_to_set() {
|
||||||
|
eval _set=\$$1
|
||||||
|
case "${_set}" in
|
||||||
|
("$2"|*" $2"|"$2 "*|*" $2 "*) ;;
|
||||||
|
("") _set="$2" ;;
|
||||||
|
(*) _set="${_set} $2" ;;
|
||||||
|
esac
|
||||||
|
eval $1=\${_set}
|
||||||
|
}
|
||||||
|
|
||||||
|
add_to_set() {
|
||||||
|
_add_to_set_set="$1"
|
||||||
|
shift
|
||||||
|
for _add_to_set_val in "$@" ; do
|
||||||
|
_add_to_set "${_add_to_set_set}" "${_add_to_set_val}"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_add_to_set() {
|
||||||
|
_hash_add_to_set_var="$1_$(printf '%s' "$2" | sed 's%[^A-Za-z0-9_]%_%g')"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
add_to_set "${_hash_add_to_set_var}" "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
device_majmin() {
|
||||||
|
eval $1=\"\"
|
||||||
|
_majmin_dec=$(LC_ALL=C ls -lnd /dev/"$2" | while read _perms _links _uid _gid _majcomma _min _rest ; do
|
||||||
|
if [ x"${_majcomma%,}"x != x"${_majcomma}"x ] ; then
|
||||||
|
printf '%s' ${_majcomma%,}:${_min}
|
||||||
|
fi
|
||||||
|
break
|
||||||
|
done)
|
||||||
|
[ -n "${_majmin_dec}" ] || return
|
||||||
|
eval $1=\${_majmin_dec}
|
||||||
|
}
|
||||||
|
|
||||||
|
get_lvm_vgs() {
|
||||||
|
# handle the case where we didn't get passed any PVs
|
||||||
|
# at all
|
||||||
|
[ $# -gt 0 ] || return 0
|
||||||
|
# subshell for pwd change
|
||||||
|
(
|
||||||
|
cd /dev
|
||||||
|
$PVS --noheadings -o vg_name "$@" 2>/dev/null
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
enumerate_luks() {
|
||||||
|
hash_clear LUKS_DEVICES_REVERSE_MAP
|
||||||
|
|
||||||
|
_all_crypt_devices=$($DMSETUP info --noheadings -o name -c -S subsystem=CRYPT 2>/dev/null || :)
|
||||||
|
for _crypt_device in ${_all_crypt_devices} ; do
|
||||||
|
[ -b "/dev/mapper/${_crypt_device}" ] || continue
|
||||||
|
_crypt_device="$(readlink -fe "/dev/mapper/${_crypt_device}" 2>/dev/null || :)"
|
||||||
|
_crypt_device="${_crypt_device#/dev/}"
|
||||||
|
[ -b "/dev/${_crypt_device}" ] || continue
|
||||||
|
# dmsetup deps is weird, it outputs the following:
|
||||||
|
# 1 dependencies : (XYZ)
|
||||||
|
_dep=$($DMSETUP deps -o blkdevname "/dev/${_crypt_device}" | sed -n '1s%.*: (\(.*\)).*%\1%p')
|
||||||
|
if [ -n "$_dep" ] && [ -b "/dev/${_dep}" ] ; then
|
||||||
|
_dep="$(readlink -fe "/dev/$_dep" 2>/dev/null || :)"
|
||||||
|
_dep="${_dep#/dev/}"
|
||||||
|
fi
|
||||||
|
if [ -n "$_dep" ] && [ -b "/dev/${_dep}" ] ; then
|
||||||
|
hash_set LUKS_DEVICES_REVERSE_MAP "${_dep}" "${_crypt_device}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
enumerate_iscsi_devices() {
|
||||||
|
# Empty arrays
|
||||||
|
iscsi_disks=""
|
||||||
|
iscsi_partitions=""
|
||||||
|
iscsi_multipath_disks=""
|
||||||
|
iscsi_multipath_disk_aliases=""
|
||||||
|
iscsi_multipath_partitions=""
|
||||||
|
iscsi_lvm_vgs=""
|
||||||
|
iscsi_lvm_lvs=""
|
||||||
|
iscsi_potential_mount_sources=""
|
||||||
|
iscsi_luks_pass1=""
|
||||||
|
iscsi_luks_pass2=""
|
||||||
|
|
||||||
|
hash_clear ISCSI_DEVICE_SESSIONS
|
||||||
|
hash_clear ISCSI_MPALIAS_SESSIONS
|
||||||
|
hash_clear ISCSI_LVMVG_SESSIONS
|
||||||
|
hash_clear ISCSI_NUMDEVICE_SESSIONS
|
||||||
|
ISCSI_EXCLUDED_SESSIONS=""
|
||||||
|
|
||||||
|
# We first need to generate a global reverse mapping of all
|
||||||
|
# cryptsetup (e.g. LUKS) devices, because there's no easy way
|
||||||
|
# to query "is this the encrypted backing of an active crypto
|
||||||
|
# mapping?
|
||||||
|
enumerate_luks
|
||||||
|
|
||||||
|
# Look for all iscsi disks
|
||||||
|
for _host_dir in /sys/devices/platform/host* /sys/devices/pci*/*/*/host* ; do
|
||||||
|
if ! [ -d "$_host_dir"/iscsi_host* ] || ! [ -d "$_host_dir"/iscsi_host/host* ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
for _session_dir in "$_host_dir"/session* ; do
|
||||||
|
[ -d "$_session_dir"/target* ] || continue
|
||||||
|
for _block_dev_dir in "$_session_dir"/target*/*\:*/block/* ; do
|
||||||
|
_block_dev=${_block_dev_dir##*/}
|
||||||
|
[ x"${_block_dev}"x != x"*"x ] || continue
|
||||||
|
add_to_set iscsi_disks "${_block_dev}"
|
||||||
|
hash_add_to_set ISCSI_DEVICE_SESSIONS "${_block_dev}" ${_session_dir}
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Look for all partitions on those disks
|
||||||
|
for _disk in $iscsi_disks ; do
|
||||||
|
hash_get _disk_sessions ISCSI_DEVICE_SESSIONS "${_disk}"
|
||||||
|
for _part_dir in /sys/class/block/"${_disk}"/"${_disk}"?* ; do
|
||||||
|
_part="${_part_dir##*/}"
|
||||||
|
[ x"${_part}"x != x"${_disk}?*"x ] || continue
|
||||||
|
add_to_set iscsi_partitions "${_part}"
|
||||||
|
hash_set ISCSI_DEVICE_SESSIONS "${_part}" "${_disk_sessions}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -x $MULTIPATH ] ; then
|
||||||
|
# Look for all multipath disks
|
||||||
|
for _disk in $iscsi_disks ; do
|
||||||
|
hash_get _disk_sessions ISCSI_DEVICE_SESSIONS "${_disk}"
|
||||||
|
for _alias in $($MULTIPATH -v1 -l /dev/"$_disk") ; do
|
||||||
|
_mp_dev="$(readlink -fe "/dev/mapper/${_alias}" || :)"
|
||||||
|
[ -n "${_mp_dev}" ] || continue
|
||||||
|
add_to_set iscsi_multipath_disks "${_mp_dev#/dev/}"
|
||||||
|
add_to_set iscsi_multipath_disk_aliases "${_alias}"
|
||||||
|
hash_add_to_set ISCSI_DEVICE_SESSIONS "${_mp_dev#/dev/}" ${_disk_sessions}
|
||||||
|
hash_add_to_set ISCSI_MPALIAS_SESSIONS "${_alias}" ${_disk_sessions}
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Look for partitions on these multipath disks
|
||||||
|
for _alias in $iscsi_multipath_disk_aliases ; do
|
||||||
|
hash_get _mp_sessions ISCSI_MPALIAS_SESSIONS "${_alias}"
|
||||||
|
for _part_name in /dev/mapper/"${_alias}"-part* ; do
|
||||||
|
_part="$(readlink -fe "$_part_name" 2>/dev/null || :)"
|
||||||
|
[ -n "${_part}" ] || continue
|
||||||
|
add_to_set iscsi_multipath_partitions "${_part#/dev/}"
|
||||||
|
hash_set ISCSI_DEVICE_SESSIONS "${_part#/dev/}" "${_mp_sessions}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $HAVE_LUKS -eq 1 ] ; then
|
||||||
|
# Look for all LUKS devices.
|
||||||
|
for _dev in $iscsi_disks $iscsi_partitions $iscsi_multipath_disks $iscsi_multipath_partitions ; do
|
||||||
|
hash_get _luksDev LUKS_DEVICES_REVERSE_MAP "${_dev}"
|
||||||
|
[ -n "${_luksDev}" ] || continue
|
||||||
|
add_to_set iscsi_luks_pass1 "${_luksDev}"
|
||||||
|
hash_get _currentSession ISCSI_DEVICE_SESSIONS "${_dev}"
|
||||||
|
if [ -n "${_currentSession}" ] ; then
|
||||||
|
hash_set ISCSI_DEVICE_SESSIONS "${_luksDev}" "${_currentSession}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $HAVE_LVM -eq 1 ] ; then
|
||||||
|
# Look for all LVM volume groups that have a backing store
|
||||||
|
# on any iSCSI device we found. Also, add $LVMGROUPS set in
|
||||||
|
# /etc/default/open-iscsi (for more complicated stacking
|
||||||
|
# configurations we don't automatically detect).
|
||||||
|
for _vg in $(get_lvm_vgs $iscsi_disks $iscsi_partitions $iscsi_multipath_disks $iscsi_multipath_partitions $iscsi_luks_pass1) $LVMGROUPS ; do
|
||||||
|
add_to_set iscsi_lvm_vgs "$_vg"
|
||||||
|
done
|
||||||
|
|
||||||
|
# $iscsi_lvm_vgs is now unique list
|
||||||
|
for _vg in $iscsi_lvm_vgs ; do
|
||||||
|
# get PVs to track iSCSI sessions
|
||||||
|
for _pv in $($VGS --noheadings -o pv_name "$_vg" 2>/dev/null) ; do
|
||||||
|
_pv_dev="$(readlink -fe "$_pv" 2>/dev/null || :)"
|
||||||
|
[ -n "${_pv_dev}" ] || continue
|
||||||
|
hash_get _pv_sessions ISCSI_DEVICE_SESSIONS "${_pv_dev#/dev/}"
|
||||||
|
hash_add_to_set ISCSI_LVMVG_SESSIONS "${_vg}" ${_pv_sessions}
|
||||||
|
done
|
||||||
|
|
||||||
|
# now we collected all sessions belonging to this VG
|
||||||
|
hash_get _vg_sessions ISCSI_LVMVG_SESSIONS "${_vg}"
|
||||||
|
|
||||||
|
# find all LVs
|
||||||
|
for _lv in $($VGS --noheadings -o lv_name "$_vg" 2>/dev/null) ; do
|
||||||
|
_dev="$(readlink -fe "/dev/${_vg}/${_lv}" 2>/dev/null || :)"
|
||||||
|
[ -n "${_dev}" ] || continue
|
||||||
|
iscsi_lvm_lvs="$iscsi_lvm_lvs ${_dev#/dev/}"
|
||||||
|
hash_set ISCSI_DEVICE_SESSIONS "${_dev#/dev/}" "${_vg_sessions}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $HAVE_LUKS -eq 1 ] ; then
|
||||||
|
# Look for all LUKS devices.
|
||||||
|
for _dev in $iscsi_lvm_lvs ; do
|
||||||
|
hash_get _luksDev LUKS_DEVICES_REVERSE_MAP "${_dev}"
|
||||||
|
[ -n "${_luksDev}" ] || continue
|
||||||
|
add_to_set iscsi_luks_pass2 "${_luksDev}"
|
||||||
|
hash_get _currentSession ISCSI_DEVICE_SESSIONS "${_dev}"
|
||||||
|
if [ -n "${_currentSession}" ] ; then
|
||||||
|
hash_set ISCSI_DEVICE_SESSIONS "${_luksDev}" "${_currentSession}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gather together all mount sources
|
||||||
|
iscsi_potential_mount_sources="$iscsi_potential_mount_sources $iscsi_disks $iscsi_partitions"
|
||||||
|
iscsi_potential_mount_sources="$iscsi_potential_mount_sources $iscsi_multipath_disks $iscsi_multipath_partitions"
|
||||||
|
iscsi_potential_mount_sources="$iscsi_potential_mount_sources $iscsi_lvm_lvs"
|
||||||
|
iscsi_potential_mount_sources="$iscsi_potential_mount_sources $iscsi_luks_pass1 $iscsi_luks_pass2"
|
||||||
|
|
||||||
|
# Convert them to numerical representation
|
||||||
|
iscsi_potential_mount_sources_majmin=""
|
||||||
|
for _src in $iscsi_potential_mount_sources ; do
|
||||||
|
device_majmin _src_majmin "$_src"
|
||||||
|
[ -n "$_src_majmin" ] || continue
|
||||||
|
iscsi_potential_mount_sources_majmin="${iscsi_potential_mount_sources_majmin} ${_src_majmin}"
|
||||||
|
hash_get _dev_sessions ISCSI_DEVICE_SESSIONS "${_src}"
|
||||||
|
hash_set ISCSI_NUMDEVICE_SESSIONS "${_src_majmin}" "${_dev_sessions}"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Enumerate mount points
|
||||||
|
iscsi_mount_points=""
|
||||||
|
iscsi_mount_point_ids=""
|
||||||
|
while read _mpid _mppid _mpdev _mpdevpath _mppath _mpopts _other ; do
|
||||||
|
if in_set iscsi_potential_mount_sources_majmin "$_mpdev" ; then
|
||||||
|
if in_set EXCLUDE_MOUNTS "${_mppath}" ; then
|
||||||
|
hash_get _dev_sessions ISCSI_NUMDEVICE_SESSIONS "${_mpdev}"
|
||||||
|
add_to_set ISCSI_EXCLUDED_SESSIONS $_dev_sessions
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
# list mountpoints in reverse order (in case
|
||||||
|
# some are stacked) mount --move may cause the
|
||||||
|
# order of /proc/self/mountinfo to not always
|
||||||
|
# reflect the stacking order, so this is not
|
||||||
|
# fool-proof, but it's better than nothing
|
||||||
|
iscsi_mount_points="$_mppath $iscsi_mount_points"
|
||||||
|
iscsi_mount_point_ids="$_mpid $iscsi_mount_points"
|
||||||
|
fi
|
||||||
|
done < /proc/self/mountinfo
|
||||||
|
}
|
||||||
|
|
||||||
|
try_umount() {
|
||||||
|
# in order to handle stacking try twice; together with the fact
|
||||||
|
# that the list of mount points is in reverse order of the
|
||||||
|
# contents /proc/self/mountinfo this should catch most cases
|
||||||
|
for retry in 1 2 ; do
|
||||||
|
for path in $iscsi_mount_points ; do
|
||||||
|
# first try to see if it really is a mountpoint
|
||||||
|
# still (might be the second round this is done
|
||||||
|
# and the mount is already gone, or something
|
||||||
|
# else umounted it first)
|
||||||
|
if ! fstab-decode mountpoint -q "$path" ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# try to umount it
|
||||||
|
if ! fstab-decode umount "$path" ; then
|
||||||
|
# unfortunately, umount's exit code
|
||||||
|
# may be a false negative, i.e. it
|
||||||
|
# might give a failure exit code, even
|
||||||
|
# though it succeeded, so check again
|
||||||
|
if fstab-decode mountpoint -q "$path" ; then
|
||||||
|
echo "Could not unmount $path" >&2
|
||||||
|
any_umount_failed=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
try_deactivate_lvm() {
|
||||||
|
[ $HAVE_LVM -eq 1 ] || return
|
||||||
|
|
||||||
|
for vg in $iscsi_lvm_vgs ; do
|
||||||
|
vg_excluded=0
|
||||||
|
hash_get vg_sessions ISCSI_LVMVG_SESSIONS "$vg"
|
||||||
|
for vg_session in $vg_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$vg_session" ; then
|
||||||
|
vg_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $vg_excluded -eq 1 ] ; then
|
||||||
|
# volume group on same iSCSI session as excluded
|
||||||
|
# mount, don't disable it
|
||||||
|
# (FIXME: we should only exclude VGs that contain
|
||||||
|
# those mounts, not also those that happen to be
|
||||||
|
# in the same iSCSI session)
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if ! $VGCHANGE --available=n $vg ; then
|
||||||
|
# Make sure the volume group (still) exists. If
|
||||||
|
# it doesn't we count that as deactivated, so
|
||||||
|
# don't fail then.
|
||||||
|
_vg_test=$(vgs -o vg_name --noheadings $vg 2>/dev/null || :)
|
||||||
|
if [ -n "${_vg_test}" ] ; then
|
||||||
|
echo "Cannot deactivate Volume Group $vg" >&2
|
||||||
|
any_umount_failed=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
try_dismantle_multipath() {
|
||||||
|
[ -x $MULTIPATH ] || return
|
||||||
|
|
||||||
|
for mpalias in $iscsi_multipath_disk_aliases ; do
|
||||||
|
mp_excluded=0
|
||||||
|
hash_get mp_sessions ISCSI_MPALIAS_SESSIONS "$mpalias"
|
||||||
|
for mp_session in $mp_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$mp_session" ; then
|
||||||
|
mp_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $mp_excluded -eq 1 ] ; then
|
||||||
|
# multipath device on same iSCSI session as
|
||||||
|
# excluded mount, don't disable it
|
||||||
|
# (FIXME: we should only exclude multipath mounts
|
||||||
|
# that contain those mounts, not also those that
|
||||||
|
# happen to be in the same iSCSI session)
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if ! $MULTIPATH -f $mpalias ; then
|
||||||
|
echo "Cannot dismantle Multipath Device $mpalias" >&2
|
||||||
|
any_umount_failed=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
try_dismantle_luks() {
|
||||||
|
[ $HAVE_LUKS -eq 1 ] || return
|
||||||
|
case "$1" in
|
||||||
|
1) iscsi_luks_current_pass="$iscsi_luks_pass1" ;;
|
||||||
|
2|*) iscsi_luks_current_pass="$iscsi_luks_pass2" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
for luksDev in $iscsi_luks_current_pass ; do
|
||||||
|
luks_excluded=0
|
||||||
|
hash_get device_sessions ISCSI_DEVICE_SESSIONS "$luksDev"
|
||||||
|
for device_session in $device_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$device_session" ; then
|
||||||
|
luks_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $luks_excluded -eq 1 ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
_luksName="$($DMSETUP info -c --noheadings -o name /dev/"$luksDev" 2>/dev/null || :)"
|
||||||
|
[ -n "${_luksName}" ] || continue
|
||||||
|
if ! $CRYPTSETUP close "${_luksName}" ; then
|
||||||
|
echo "Cannot dismantle cryptsetup device ${_luksName}" >&2
|
||||||
|
any_umount_failed=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Don't do this if we are using systemd as init system, since systemd
|
||||||
|
# takes care of network filesystems (including those marked _netdev) by
|
||||||
|
# itself.
|
||||||
|
if ! [ -d /run/systemd/system ] && [ $HANDLE_NETDEV -eq 1 ] && [ $DRY_RUN -eq 0 ]; then
|
||||||
|
echo "Unmounting all devices marked _netdev";
|
||||||
|
umount -a -O _netdev >/dev/null 2>&1
|
||||||
|
fi
|
||||||
|
|
||||||
|
enumerate_iscsi_devices
|
||||||
|
|
||||||
|
# Dry run? Just print what we want to do (useful for administrator to check).
|
||||||
|
if [ $DRY_RUN -eq 1 ] ; then
|
||||||
|
echo "$0: would umount the following mount points:"
|
||||||
|
had_mount=0
|
||||||
|
if [ -n "$iscsi_mount_points" ] ; then
|
||||||
|
for v in $iscsi_mount_points ; do
|
||||||
|
echo " $v"
|
||||||
|
had_mount=1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
[ $had_mount -eq 1 ] || echo " (none)"
|
||||||
|
|
||||||
|
echo "$0: would disable the following LUKS devices (second pass):"
|
||||||
|
had_luks=0
|
||||||
|
if [ -n "$iscsi_luks_pass2" ] ; then
|
||||||
|
for v in ${iscsi_luks_pass2} ; do
|
||||||
|
luks_excluded=0
|
||||||
|
hash_get device_sessions ISCSI_DEVICE_SESSIONS "$v"
|
||||||
|
for device_session in $device_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$device_session" ; then
|
||||||
|
luks_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $luks_excluded -eq 1 ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
_luksName="$($DMSETUP info -c --noheadings -o name /dev/"$v" 2>/dev/null || :)"
|
||||||
|
[ -n "${_luksName}" ] || continue
|
||||||
|
echo " ${_luksName}"
|
||||||
|
had_luks=1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
[ $had_luks -eq 1 ] || echo " (none)"
|
||||||
|
|
||||||
|
echo "$0: would deactivate the following LVM Volume Groups:"
|
||||||
|
had_vg=0
|
||||||
|
if [ -n "$iscsi_lvm_vgs" ] ; then
|
||||||
|
for v in $iscsi_lvm_vgs ; do
|
||||||
|
# sync this exclusion logic with try_deactivate_lvm
|
||||||
|
vg_excluded=0
|
||||||
|
hash_get vg_sessions ISCSI_LVMVG_SESSIONS "$v"
|
||||||
|
for vg_session in $vg_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$vg_session" ; then
|
||||||
|
vg_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $vg_excluded -eq 1 ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
echo " $v"
|
||||||
|
had_vg=1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
[ $had_vg -eq 1 ] || echo " (none)"
|
||||||
|
|
||||||
|
echo "$0: would disable the following LUKS devices (first pass):"
|
||||||
|
had_luks=0
|
||||||
|
if [ -n "$iscsi_luks_pass1" ] ; then
|
||||||
|
for v in ${iscsi_luks_pass1} ; do
|
||||||
|
luks_excluded=0
|
||||||
|
hash_get device_sessions ISCSI_DEVICE_SESSIONS "$v"
|
||||||
|
for device_session in $device_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$device_session" ; then
|
||||||
|
luks_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $luks_excluded -eq 1 ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
_luksName="$($DMSETUP info -c --noheadings -o name /dev/"$v" 2>/dev/null || :)"
|
||||||
|
[ -n "${_luksName}" ] || continue
|
||||||
|
echo " ${_luksName}"
|
||||||
|
had_luks=1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
[ $had_luks -eq 1 ] || echo " (none)"
|
||||||
|
|
||||||
|
echo "$0: would deactivate the following multipath volumes:"
|
||||||
|
had_mp=0
|
||||||
|
if [ -n "$iscsi_multipath_disk_aliases" ] ; then
|
||||||
|
for v in $iscsi_multipath_disk_aliases ; do
|
||||||
|
# sync this exclusion logic with try_dismantle_multipath
|
||||||
|
mp_excluded=0
|
||||||
|
hash_get mp_sessions ISCSI_MPALIAS_SESSIONS "$v"
|
||||||
|
for mp_session in $mp_sessions ; do
|
||||||
|
if in_set ISCSI_EXCLUDED_SESSIONS "$mp_session" ; then
|
||||||
|
mp_excluded=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $mp_excluded -eq 1 ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
echo " $v"
|
||||||
|
had_mp=1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
[ $had_mp -eq 1 ] || echo " (none)"
|
||||||
|
|
||||||
|
if [ -n "$ISCSI_EXCLUDED_SESSIONS" ] ; then
|
||||||
|
echo "$0: the following sessions are excluded from disconnection (because / or another excluded mount is on them):"
|
||||||
|
for v in $ISCSI_EXCLUDED_SESSIONS ; do
|
||||||
|
echo " $v"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# after our first enumeration, write out a list of sessions that
|
||||||
|
# shouldn't be terminated because excluded mounts are on those
|
||||||
|
# sessions
|
||||||
|
if [ -n "$ISCSI_EXCLUDED_SESSIONS" ] ; then
|
||||||
|
mkdir -p -m 0700 /run/open-iscsi
|
||||||
|
for session in $ISCSI_EXCLUDED_SESSIONS ; do
|
||||||
|
printf '%s\n' $session
|
||||||
|
done > /run/open-iscsi/shutdown-keep-sessions
|
||||||
|
else
|
||||||
|
# make sure there's no leftover from a previous call
|
||||||
|
rm -f /run/open-iscsi/shutdown-keep-sessions
|
||||||
|
fi
|
||||||
|
|
||||||
|
any_umount_failed=0
|
||||||
|
try_umount
|
||||||
|
try_dismantle_luks 2
|
||||||
|
try_deactivate_lvm
|
||||||
|
try_dismantle_luks 1
|
||||||
|
try_dismantle_multipath
|
||||||
|
|
||||||
|
while [ $any_umount_failed -ne 0 ] && ( [ $timeout -gt 0 ] || [ $timeout -eq -1 ] ) ; do
|
||||||
|
# wait a bit, perhaps there was still a program that
|
||||||
|
# was terminating
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
# try again and decrease timeout
|
||||||
|
enumerate_iscsi_devices
|
||||||
|
any_umount_failed=0
|
||||||
|
try_umount
|
||||||
|
try_dismantle_luks 2
|
||||||
|
try_deactivate_lvm
|
||||||
|
try_dismantle_luks 1
|
||||||
|
try_dismantle_multipath
|
||||||
|
if [ $timeout -gt 0 ] ; then
|
||||||
|
timeout=$((timeout - 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Create signaling file (might be useful)
|
||||||
|
if [ $any_umount_failed -eq 1 ] ; then
|
||||||
|
touch /run/open-iscsi/some_umount_failed
|
||||||
|
else
|
||||||
|
rm -f /run/open-iscsi/some_umount_failed
|
||||||
|
fi
|
||||||
|
exit $any_umount_failed
|
||||||
@ -0,0 +1,19 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=iSCSI initiator daemon (iscsid)
|
||||||
|
Documentation=man:iscsid(8)
|
||||||
|
Wants=network-online.target remote-fs-pre.target
|
||||||
|
Before=remote-fs-pre.target
|
||||||
|
After=network.target network-online.target
|
||||||
|
DefaultDependencies=no
|
||||||
|
Conflicts=shutdown.target
|
||||||
|
Before=shutdown.target
|
||||||
|
ConditionVirtualization=!private-users
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=forking
|
||||||
|
PIDFile=/run/iscsid.pid
|
||||||
|
ExecStartPre=/usr/lib/open-iscsi/startup-checks.sh
|
||||||
|
ExecStart=/usr/sbin/iscsid
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=sysinit.target
|
||||||
@ -0,0 +1,9 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Open-iSCSI iscsid Socket
|
||||||
|
Documentation=man:iscsid(8) man:iscsiadm(8)
|
||||||
|
|
||||||
|
[Socket]
|
||||||
|
ListenStream=@ISCSIADM_ABSTRACT_NAMESPACE
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=sockets.target
|
||||||
@ -0,0 +1,31 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Login to default iSCSI targets
|
||||||
|
Documentation=man:iscsiadm(8) man:iscsid(8)
|
||||||
|
Wants=network-online.target remote-fs-pre.target
|
||||||
|
After=network-online.target iscsid.service
|
||||||
|
Before=remote-fs-pre.target
|
||||||
|
DefaultDependencies=no
|
||||||
|
Conflicts=shutdown.target
|
||||||
|
Before=shutdown.target
|
||||||
|
# Must have some pre-defined targets to login to
|
||||||
|
ConditionDirectoryNotEmpty=|/etc/iscsi/nodes
|
||||||
|
# or have a session to use via iscsid
|
||||||
|
ConditionDirectoryNotEmpty=|/sys/class/iscsi_session
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=true
|
||||||
|
# iscsiadm --login will return 21 if no nodes are configured,
|
||||||
|
# and 15 if a session is alread logged in (which we do not
|
||||||
|
# consider an error)
|
||||||
|
SuccessExitStatus=15 21
|
||||||
|
# Note: iscsid will be socket activated by iscsiadm
|
||||||
|
ExecStart=/usr/sbin/iscsiadm -m node --loginall=automatic
|
||||||
|
ExecStart=/usr/lib/open-iscsi/activate-storage.sh
|
||||||
|
ExecStop=/usr/lib/open-iscsi/umountiscsi.sh
|
||||||
|
ExecStop=/bin/sync
|
||||||
|
ExecStop=/usr/lib/open-iscsi/logout-all.sh
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=sysinit.target
|
||||||
|
Alias=iscsi.service
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
# run before 80-networking.rules to run before ifupdown
|
||||||
|
SUBSYSTEM=="net", ACTION=="add", RUN+="/usr/lib/open-iscsi/net-interface-handler start"
|
||||||
|
SUBSYSTEM=="net", ACTION=="remove", RUN+="/usr/lib/open-iscsi/net-interface-handler stop"
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
# When iscsi disks are present, iscsid.service should be running. LP: #1802354
|
||||||
|
# ID_PATH looks like ip-<ipv4-dotted-quad>:<port>-iscsi-<target>-lun-<lun>
|
||||||
|
SUBSYSTEM=="block", ACTION=="add", ENV{ID_PATH}=="*-iscsi-*", ENV{SYSTEMD_WANTS}+="iscsid.service"
|
||||||
BIN
overlays/rpi4-armbian-longhorn-root/usr/local/bin/k3s
Executable file
BIN
overlays/rpi4-armbian-longhorn-root/usr/local/bin/k3s
Executable file
Binary file not shown.
77
overlays/rpi4-armbian-longhorn-root/usr/local/bin/k3s-agent-uninstall.sh
Executable file
77
overlays/rpi4-armbian-longhorn-root/usr/local/bin/k3s-agent-uninstall.sh
Executable file
@ -0,0 +1,77 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -x
|
||||||
|
[ $(id -u) -eq 0 ] || exec sudo --preserve-env=K3S_DATA_DIR $0 $@
|
||||||
|
|
||||||
|
K3S_DATA_DIR=${K3S_DATA_DIR:-/var/lib/rancher/k3s}
|
||||||
|
|
||||||
|
/usr/local/bin/k3s-killall.sh
|
||||||
|
|
||||||
|
if command -v systemctl; then
|
||||||
|
systemctl disable k3s-agent
|
||||||
|
systemctl reset-failed k3s-agent
|
||||||
|
systemctl daemon-reload
|
||||||
|
fi
|
||||||
|
if command -v rc-update; then
|
||||||
|
rc-update delete k3s-agent default
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f /etc/systemd/system/k3s-agent.service
|
||||||
|
rm -f /etc/systemd/system/k3s-agent.service.env
|
||||||
|
|
||||||
|
remove_uninstall() {
|
||||||
|
rm -f /usr/local/bin/k3s-agent-uninstall.sh
|
||||||
|
}
|
||||||
|
trap remove_uninstall EXIT
|
||||||
|
|
||||||
|
if (ls /etc/systemd/system/k3s*.service || ls /etc/init.d/k3s*) >/dev/null 2>&1; then
|
||||||
|
set +x; echo 'Additional k3s services installed, skipping uninstall of k3s'; set -x
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
for cmd in kubectl crictl ctr; do
|
||||||
|
if [ -L /usr/local/bin/$cmd ]; then
|
||||||
|
rm -f /usr/local/bin/$cmd
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
clean_mounted_directory() {
|
||||||
|
if ! grep -q " $1" /proc/mounts; then
|
||||||
|
rm -rf "$1"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for path in "$1"/*; do
|
||||||
|
if [ -d "$path" ]; then
|
||||||
|
if grep -q " $path" /proc/mounts; then
|
||||||
|
clean_mounted_directory "$path"
|
||||||
|
else
|
||||||
|
rm -rf "$path"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
rm "$path"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
rm -rf /etc/rancher/k3s
|
||||||
|
rm -rf /run/k3s
|
||||||
|
rm -rf /run/flannel
|
||||||
|
clean_mounted_directory ${K3S_DATA_DIR}
|
||||||
|
rm -rf /var/lib/kubelet
|
||||||
|
rm -f /usr/local/bin/k3s
|
||||||
|
rm -f /usr/local/bin/k3s-killall.sh
|
||||||
|
|
||||||
|
if type yum >/dev/null 2>&1; then
|
||||||
|
yum remove -y k3s-selinux
|
||||||
|
rm -f /etc/yum.repos.d/rancher-k3s-common*.repo
|
||||||
|
elif type rpm-ostree >/dev/null 2>&1; then
|
||||||
|
rpm-ostree uninstall k3s-selinux
|
||||||
|
rm -f /etc/yum.repos.d/rancher-k3s-common*.repo
|
||||||
|
elif type zypper >/dev/null 2>&1; then
|
||||||
|
uninstall_cmd="zypper remove -y k3s-selinux"
|
||||||
|
if [ "${TRANSACTIONAL_UPDATE=false}" != "true" ] && [ -x /usr/sbin/transactional-update ]; then
|
||||||
|
uninstall_cmd="transactional-update --no-selfupdate -d run $uninstall_cmd"
|
||||||
|
fi
|
||||||
|
sudo $uninstall_cmd
|
||||||
|
rm -f /etc/zypp/repos.d/rancher-k3s-common*.repo
|
||||||
|
fi
|
||||||
91
overlays/rpi4-armbian-longhorn-root/usr/local/bin/k3s-killall.sh
Executable file
91
overlays/rpi4-armbian-longhorn-root/usr/local/bin/k3s-killall.sh
Executable file
@ -0,0 +1,91 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
[ $(id -u) -eq 0 ] || exec sudo --preserve-env=K3S_DATA_DIR $0 $@
|
||||||
|
|
||||||
|
K3S_DATA_DIR=${K3S_DATA_DIR:-/var/lib/rancher/k3s}
|
||||||
|
|
||||||
|
for bin in ${K3S_DATA_DIR}/data/**/bin/; do
|
||||||
|
[ -d $bin ] && export PATH=$PATH:$bin:$bin/aux
|
||||||
|
done
|
||||||
|
|
||||||
|
set -x
|
||||||
|
|
||||||
|
for service in /etc/systemd/system/k3s*.service; do
|
||||||
|
[ -s $service ] && systemctl stop $(basename $service)
|
||||||
|
done
|
||||||
|
|
||||||
|
for service in /etc/init.d/k3s*; do
|
||||||
|
[ -x $service ] && $service stop
|
||||||
|
done
|
||||||
|
|
||||||
|
pschildren() {
|
||||||
|
ps -e -o ppid= -o pid= | \
|
||||||
|
sed -e 's/^\s*//g; s/\s\s*/\t/g;' | \
|
||||||
|
grep -w "^$1" | \
|
||||||
|
cut -f2
|
||||||
|
}
|
||||||
|
|
||||||
|
pstree() {
|
||||||
|
for pid in $@; do
|
||||||
|
echo $pid
|
||||||
|
for child in $(pschildren $pid); do
|
||||||
|
pstree $child
|
||||||
|
done
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
killtree() {
|
||||||
|
kill -9 $(
|
||||||
|
{ set +x; } 2>/dev/null;
|
||||||
|
pstree $@;
|
||||||
|
set -x;
|
||||||
|
) 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_interfaces() {
|
||||||
|
# Delete network interface(s) that match 'master cni0'
|
||||||
|
ip link show 2>/dev/null | grep 'master cni0' | while read ignore iface ignore; do
|
||||||
|
iface=${iface%%@*}
|
||||||
|
[ -z "$iface" ] || ip link delete $iface
|
||||||
|
done
|
||||||
|
|
||||||
|
# Delete cni related interfaces
|
||||||
|
ip link delete cni0
|
||||||
|
ip link delete flannel.1
|
||||||
|
ip link delete flannel-v6.1
|
||||||
|
ip link delete kube-ipvs0
|
||||||
|
ip link delete flannel-wg
|
||||||
|
ip link delete flannel-wg-v6
|
||||||
|
|
||||||
|
# Restart tailscale
|
||||||
|
if [ -n "$(command -v tailscale)" ]; then
|
||||||
|
tailscale set --advertise-routes=
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
getshims() {
|
||||||
|
ps -e -o pid= -o args= | sed -e 's/^ *//; s/\s\s*/\t/;' | grep -w "${K3S_DATA_DIR}"'/data/[^/]*/bin/containerd-shim' | cut -f1
|
||||||
|
}
|
||||||
|
|
||||||
|
killtree $({ set +x; } 2>/dev/null; getshims; set -x)
|
||||||
|
|
||||||
|
do_unmount_and_remove() {
|
||||||
|
set +x
|
||||||
|
while read -r _ path _; do
|
||||||
|
case "$path" in $1*) echo "$path" ;; esac
|
||||||
|
done < /proc/self/mounts | sort -r | xargs -r -t -n 1 sh -c 'umount -f "$0" && rm -rf "$0"'
|
||||||
|
set -x
|
||||||
|
}
|
||||||
|
|
||||||
|
do_unmount_and_remove '/run/k3s'
|
||||||
|
do_unmount_and_remove '/var/lib/kubelet/pods'
|
||||||
|
do_unmount_and_remove '/var/lib/kubelet/plugins'
|
||||||
|
do_unmount_and_remove '/run/netns/cni-'
|
||||||
|
|
||||||
|
# Remove CNI namespaces
|
||||||
|
ip netns show 2>/dev/null | grep cni- | xargs -r -t -n 1 ip netns delete
|
||||||
|
|
||||||
|
remove_interfaces
|
||||||
|
|
||||||
|
rm -rf /var/lib/cni/
|
||||||
|
iptables-save | grep -v KUBE- | grep -v CNI- | grep -iv flannel | iptables-restore
|
||||||
|
ip6tables-save | grep -v KUBE- | grep -v CNI- | grep -iv flannel | ip6tables-restore
|
||||||
@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
marker="/var/lib/metis/rpi4-longhorn-firstboot.done"
|
||||||
|
env_file="/etc/metis/firstboot.env"
|
||||||
|
key_file="/etc/metis/authorized_keys"
|
||||||
|
fstab_append="/etc/metis/fstab.append"
|
||||||
|
default_groups=(tty disk dialout sudo audio video plugdev games users systemd-journal input render netdev)
|
||||||
|
|
||||||
|
exec > >(tee -a /var/log/metis-rpi4-longhorn-firstboot.log) 2>&1
|
||||||
|
|
||||||
|
retry_cmd() {
|
||||||
|
local attempts="$1"
|
||||||
|
shift
|
||||||
|
local try=1
|
||||||
|
until "$@"; do
|
||||||
|
if [ "${try}" -ge "${attempts}" ]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
try=$((try + 1))
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_network_access() {
|
||||||
|
retry_cmd 12 sh -c 'ip route get 1.1.1.1 >/dev/null 2>&1'
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ -f "${marker}" ]; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p /var/lib/metis /mnt/astreae /mnt/asteria
|
||||||
|
|
||||||
|
if [ -f "${env_file}" ]; then
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
. "${env_file}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
metis_hostname="${METIS_HOSTNAME:-}"
|
||||||
|
metis_ssh_user="${METIS_SSH_USER:-atlas}"
|
||||||
|
metis_k3s_version="${METIS_K3S_VERSION:-}"
|
||||||
|
|
||||||
|
if [ -n "${metis_hostname}" ]; then
|
||||||
|
hostnamectl set-hostname "${metis_hostname}" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v nmcli >/dev/null 2>&1; then
|
||||||
|
retry_cmd 10 sh -c 'nmcli general status >/dev/null 2>&1'
|
||||||
|
nmcli connection reload || true
|
||||||
|
while IFS=: read -r name type device; do
|
||||||
|
[ "${device}" = "end0" ] || continue
|
||||||
|
[ "${name}" = "end0-static" ] && continue
|
||||||
|
case "${type}" in
|
||||||
|
ethernet|802-3-ethernet)
|
||||||
|
nmcli connection modify "${name}" connection.autoconnect no || true
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done < <(nmcli -t -f NAME,TYPE,DEVICE connection show 2>/dev/null || true)
|
||||||
|
nmcli connection up end0-static || true
|
||||||
|
elif [ -f /etc/systemd/network/10-end0-static.network ]; then
|
||||||
|
systemctl enable systemd-networkd.service || true
|
||||||
|
systemctl restart systemd-networkd.service || true
|
||||||
|
systemctl restart systemd-networkd-wait-online.service || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f "${fstab_append}" ]; then
|
||||||
|
while IFS= read -r line; do
|
||||||
|
[ -z "${line}" ] && continue
|
||||||
|
grep -Fqx "${line}" /etc/fstab || printf '%s\n' "${line}" >> /etc/fstab
|
||||||
|
done < "${fstab_append}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
mount -a || true
|
||||||
|
|
||||||
|
packages=()
|
||||||
|
if ! command -v sshd >/dev/null 2>&1; then
|
||||||
|
packages+=("openssh-server")
|
||||||
|
fi
|
||||||
|
if ! command -v mount.nfs >/dev/null 2>&1; then
|
||||||
|
packages+=("nfs-common")
|
||||||
|
fi
|
||||||
|
if ! command -v iscsiadm >/dev/null 2>&1; then
|
||||||
|
packages+=("open-iscsi")
|
||||||
|
fi
|
||||||
|
if [ "${#packages[@]}" -gt 0 ]; then
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
ensure_network_access
|
||||||
|
retry_cmd 5 apt-get update
|
||||||
|
retry_cmd 5 apt-get install -y --no-install-recommends "${packages[@]}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl enable ssh.socket || systemctl enable ssh.service || true
|
||||||
|
systemctl restart ssh.socket || systemctl restart ssh.service || systemctl start ssh.socket || systemctl start ssh.service || true
|
||||||
|
mkdir -p /etc/iscsi /etc/iscsi/nodes /etc/iscsi/send_targets
|
||||||
|
if [ ! -s /etc/iscsi/initiatorname.iscsi ] && command -v iscsi-iname >/dev/null 2>&1; then
|
||||||
|
printf 'InitiatorName=%s\n' "$(iscsi-iname)" > /etc/iscsi/initiatorname.iscsi
|
||||||
|
fi
|
||||||
|
systemctl enable --now iscsid.socket || true
|
||||||
|
systemctl enable --now open-iscsi.service || true
|
||||||
|
|
||||||
|
if [ -s "${key_file}" ]; then
|
||||||
|
install -d -m 700 /root/.ssh
|
||||||
|
install -m 600 "${key_file}" /root/.ssh/authorized_keys
|
||||||
|
|
||||||
|
if [ -n "${metis_ssh_user}" ]; then
|
||||||
|
group_list=()
|
||||||
|
for group_name in "${default_groups[@]}"; do
|
||||||
|
if getent group "${group_name}" >/dev/null 2>&1; then
|
||||||
|
group_list+=("${group_name}")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "${#group_list[@]}" -gt 0 ]; then
|
||||||
|
group_csv="$(IFS=,; printf '%s' "${group_list[*]}")"
|
||||||
|
else
|
||||||
|
group_csv=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! id "${metis_ssh_user}" >/dev/null 2>&1; then
|
||||||
|
if [ -n "${group_csv}" ]; then
|
||||||
|
useradd -m -s /bin/bash -G "${group_csv}" "${metis_ssh_user}"
|
||||||
|
else
|
||||||
|
useradd -m -s /bin/bash "${metis_ssh_user}"
|
||||||
|
fi
|
||||||
|
elif [ -n "${group_csv}" ]; then
|
||||||
|
usermod -a -G "${group_csv}" "${metis_ssh_user}" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
install -d -m 700 -o "${metis_ssh_user}" -g "${metis_ssh_user}" "/home/${metis_ssh_user}/.ssh"
|
||||||
|
install -m 600 -o "${metis_ssh_user}" -g "${metis_ssh_user}" "${key_file}" "/home/${metis_ssh_user}/.ssh/authorized_keys"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f /root/.not_logged_in_yet
|
||||||
|
|
||||||
|
if ! command -v k3s >/dev/null 2>&1; then
|
||||||
|
installer_env=("INSTALL_K3S_EXEC=agent")
|
||||||
|
if [ -n "${metis_k3s_version}" ]; then
|
||||||
|
installer_env+=("INSTALL_K3S_VERSION=${metis_k3s_version}")
|
||||||
|
fi
|
||||||
|
ensure_network_access
|
||||||
|
retry_cmd 5 env "${installer_env[@]}" sh -c 'curl -sfL https://get.k3s.io | sh -'
|
||||||
|
fi
|
||||||
|
|
||||||
|
systemctl enable k3s-agent
|
||||||
|
systemctl restart k3s-agent || systemctl start k3s-agent
|
||||||
|
|
||||||
|
touch "${marker}"
|
||||||
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsi-iname
Executable file
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsi-iname
Executable file
Binary file not shown.
195
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsi_discovery
Executable file
195
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsi_discovery
Executable file
@ -0,0 +1,195 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (C) Voltaire Ltd. 2006. ALL RIGHTS RESERVED.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
#
|
||||||
|
# Author: Dan Bar Dov <danb@voltaire.com>
|
||||||
|
|
||||||
|
# iscsi_discovery:
|
||||||
|
# * does a send-targets discovery to the given IP
|
||||||
|
# * set the transport type to the preferred transport (or tcp is -t flag is not used)
|
||||||
|
# * tries to login
|
||||||
|
# * if succeeds,
|
||||||
|
# o logout,
|
||||||
|
# o mark record autmatic (unless -m flag is used)
|
||||||
|
# * else
|
||||||
|
# o reset transport type to TCP
|
||||||
|
# o try to login
|
||||||
|
# o if succeeded
|
||||||
|
# + logout
|
||||||
|
# + mark record automatic (unless -m flag is used)
|
||||||
|
#
|
||||||
|
|
||||||
|
usage()
|
||||||
|
{
|
||||||
|
echo "Usage: $0 <IP> [-p <port>] [-d] [-t <tcp|iser> [-f]] [-m] [-l]"
|
||||||
|
echo "Options:"
|
||||||
|
echo "-p set the port number (default is 3260)."
|
||||||
|
echo "-d print debugging information"
|
||||||
|
echo "-t set transport (default is tcp)."
|
||||||
|
echo "-f force specific transport -disable the fallback to tcp (default is fallback enabled)."
|
||||||
|
echo " force the transport specified by the argument of the -t flag."
|
||||||
|
echo "-m manual startup - will set manual startup (default is automatic startup)."
|
||||||
|
echo "-l login to the new discovered nodes (default is false)."
|
||||||
|
}
|
||||||
|
|
||||||
|
dbg()
|
||||||
|
{
|
||||||
|
$debug && echo $@
|
||||||
|
}
|
||||||
|
|
||||||
|
initialize()
|
||||||
|
{
|
||||||
|
trap "exit" 2
|
||||||
|
debug=false
|
||||||
|
force="0"
|
||||||
|
log_out="1"
|
||||||
|
startup_manual="0"
|
||||||
|
#set default transport to tcp
|
||||||
|
transport=tcp
|
||||||
|
#set default port to 3260
|
||||||
|
port=3260;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_cmdline()
|
||||||
|
{
|
||||||
|
if [ $# -lt 1 ]; then
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# check if the IP address is valid
|
||||||
|
ip=`echo $1 | awk -F'.' '$1 != "" && $1 <=255 && $2 != "" && $2 <= 255 && $3 != "" && $3 <= 255 && $4 != "" && $4 <= 255 {print $0}'`
|
||||||
|
if [ -z "$ip" ]; then
|
||||||
|
echo "$1 is not a vaild IP address!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
while getopts "dfmlt:p:" options; do
|
||||||
|
case $options in
|
||||||
|
d ) debug=true;;
|
||||||
|
f ) force="1";;
|
||||||
|
t ) transport=$OPTARG;;
|
||||||
|
p ) port=$OPTARG;;
|
||||||
|
m ) startup_manual="1";;
|
||||||
|
l ) log_out=0;;
|
||||||
|
\? ) usage
|
||||||
|
exit 1;;
|
||||||
|
* ) usage
|
||||||
|
exit 1;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
discover()
|
||||||
|
{
|
||||||
|
# If open-iscsi is already logged in to the portal, exit
|
||||||
|
if [ $(iscsiadm -m session | grep -c ${ip}:${port}) -ne 0 ]; then
|
||||||
|
echo "Please logout from all targets on ${ip}:${port} before trying to run discovery on that portal"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
connected=0
|
||||||
|
discovered=0
|
||||||
|
|
||||||
|
dbg "starting discovery to $ip"
|
||||||
|
disc="$(iscsiadm -m discovery --type sendtargets --portal ${ip}:${port})"
|
||||||
|
echo "${disc}" | while read portal target
|
||||||
|
do
|
||||||
|
portal=${portal%,*}
|
||||||
|
select_transport
|
||||||
|
done
|
||||||
|
|
||||||
|
discovered=$(echo "${disc}" | wc -l)
|
||||||
|
if [ ${discovered} = 0 ]; then
|
||||||
|
echo "failed to discover targets at ${ip}"
|
||||||
|
exit 2
|
||||||
|
else
|
||||||
|
echo "discovered ${discovered} targets at ${ip}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
try_login()
|
||||||
|
{
|
||||||
|
if [ "$startup_manual" != "1" ]; then
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} --op update -n node.conn[0].startup -v automatic
|
||||||
|
fi
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} --login >/dev/null 2>&1
|
||||||
|
ret=$?
|
||||||
|
if [ ${ret} = 0 ]; then
|
||||||
|
echo "Set target ${target} to automatic login over ${transport} to portal ${portal}"
|
||||||
|
((connected++))
|
||||||
|
if [ "$log_out" = "1" ]; then
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} --logout
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Cannot login over ${transport} to portal ${portal}"
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} --op update -n node.conn[0].startup -v manual
|
||||||
|
fi
|
||||||
|
return ${ret}
|
||||||
|
}
|
||||||
|
|
||||||
|
set_transport()
|
||||||
|
{
|
||||||
|
transport=$1
|
||||||
|
case "$transport" in
|
||||||
|
iser)
|
||||||
|
# iSER does not use digest
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} \
|
||||||
|
--op update -n node.conn[0].iscsi.HeaderDigest -v None
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} \
|
||||||
|
--op update -n node.conn[0].iscsi.DataDigest -v None
|
||||||
|
;;
|
||||||
|
cxgb3i)
|
||||||
|
# cxgb3i supports <= 16K packet (BHS + AHS + pdu payload + digests)
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} \
|
||||||
|
--op update -n node.conn[0].iscsi.MaxRecvDataSegmentLength \
|
||||||
|
-v 8192
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
transport_name=`iscsiadm -m node -p ${portal} -T ${target} |awk '/transport_name/ {print $1}'`
|
||||||
|
iscsiadm -m node --targetname ${target} --portal ${portal} \
|
||||||
|
--op update -n ${transport_name} -v ${transport}
|
||||||
|
}
|
||||||
|
|
||||||
|
select_transport()
|
||||||
|
{
|
||||||
|
set_transport $transport
|
||||||
|
dbg "Testing $transport-login to target ${target} portal ${portal}"
|
||||||
|
try_login;
|
||||||
|
if [ $? != 0 -a "$force" = "0" ]; then
|
||||||
|
set_transport tcp
|
||||||
|
dbg "starting to test tcp-login to target ${target} portal ${portal}"
|
||||||
|
try_login;
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check_iscsid()
|
||||||
|
{
|
||||||
|
#check if iscsid is running
|
||||||
|
pidof iscsid &>/dev/null
|
||||||
|
ret=$?
|
||||||
|
if [ $ret -ne 0 ]; then
|
||||||
|
echo "iscsid is not running"
|
||||||
|
echo "Exiting..."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check_iscsid
|
||||||
|
initialize
|
||||||
|
parse_cmdline "$@"
|
||||||
|
discover
|
||||||
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsiadm
Executable file
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsiadm
Executable file
Binary file not shown.
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsid
Executable file
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsid
Executable file
Binary file not shown.
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsistart
Executable file
BIN
overlays/rpi4-armbian-longhorn-root/usr/sbin/iscsistart
Executable file
Binary file not shown.
@ -16,11 +16,13 @@ type NodeConfig struct {
|
|||||||
Labels map[string]string `json:"labels,omitempty"`
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
Taints []string `json:"taints,omitempty"`
|
Taints []string `json:"taints,omitempty"`
|
||||||
Fstab []FstabEntry `json:"fstab,omitempty"`
|
Fstab []FstabEntry `json:"fstab,omitempty"`
|
||||||
|
Secrets map[string]string `json:"secrets,omitempty"` // optional key/values for local agent use
|
||||||
}
|
}
|
||||||
|
|
||||||
// K3sConfig includes role and token/url.
|
// K3sConfig includes role and token/url.
|
||||||
type K3sConfig struct {
|
type K3sConfig struct {
|
||||||
Role string `json:"role"`
|
Role string `json:"role"`
|
||||||
|
Version string `json:"version,omitempty"`
|
||||||
URL string `json:"url,omitempty"`
|
URL string `json:"url,omitempty"`
|
||||||
Token string `json:"token,omitempty"`
|
Token string `json:"token,omitempty"`
|
||||||
Args []string `json:"args,omitempty"`
|
Args []string `json:"args,omitempty"`
|
||||||
@ -51,6 +53,10 @@ func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
|
|||||||
}
|
}
|
||||||
taints := append([]string{}, cls.DefaultTaints...)
|
taints := append([]string{}, cls.DefaultTaints...)
|
||||||
taints = append(taints, n.Taints...)
|
taints = append(taints, n.Taints...)
|
||||||
|
k3sVersion := cls.K3sVersion
|
||||||
|
if n.K3sVersion != "" {
|
||||||
|
k3sVersion = n.K3sVersion
|
||||||
|
}
|
||||||
|
|
||||||
fstab := []FstabEntry{}
|
fstab := []FstabEntry{}
|
||||||
for _, d := range n.LonghornDisks {
|
for _, d := range n.LonghornDisks {
|
||||||
@ -76,6 +82,7 @@ func Build(inv *inventory.Inventory, nodeName string) (*NodeConfig, error) {
|
|||||||
Fstab: fstab,
|
Fstab: fstab,
|
||||||
K3s: K3sConfig{
|
K3s: K3sConfig{
|
||||||
Role: n.K3sRole,
|
Role: n.K3sRole,
|
||||||
|
Version: k3sVersion,
|
||||||
URL: n.K3sURL,
|
URL: n.K3sURL,
|
||||||
Token: n.K3sToken,
|
Token: n.K3sToken,
|
||||||
Labels: labels,
|
Labels: labels,
|
||||||
|
|||||||
63
pkg/facts/aggregate.go
Normal file
63
pkg/facts/aggregate.go
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ClassSummary captures aggregated sentinel facts per class.
|
||||||
|
type ClassSummary struct {
|
||||||
|
Class string `json:"class"`
|
||||||
|
Nodes []string `json:"nodes"`
|
||||||
|
Kernels map[string]int `json:"kernels,omitempty"`
|
||||||
|
OSImages map[string]int `json:"os_images,omitempty"`
|
||||||
|
Containerd map[string]int `json:"containerd,omitempty"`
|
||||||
|
K3sVersions map[string]int `json:"k3s_versions,omitempty"`
|
||||||
|
PackageStats map[string]map[string]int `json:"package_stats,omitempty"` // pkg -> version -> count
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregate groups snapshots by inventory class and tallies version drift.
|
||||||
|
func Aggregate(inv *inventory.Inventory, snaps []Snapshot) map[string]*ClassSummary {
|
||||||
|
result := map[string]*ClassSummary{}
|
||||||
|
for _, s := range snaps {
|
||||||
|
class := "unknown"
|
||||||
|
if inv != nil {
|
||||||
|
if node, cls, err := inv.FindNode(s.Hostname); err == nil && cls != nil && node != nil {
|
||||||
|
class = cls.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sum, ok := result[class]
|
||||||
|
if !ok {
|
||||||
|
sum = &ClassSummary{
|
||||||
|
Class: class,
|
||||||
|
Kernels: map[string]int{},
|
||||||
|
OSImages: map[string]int{},
|
||||||
|
Containerd: map[string]int{},
|
||||||
|
K3sVersions: map[string]int{},
|
||||||
|
PackageStats: map[string]map[string]int{},
|
||||||
|
}
|
||||||
|
result[class] = sum
|
||||||
|
}
|
||||||
|
sum.Nodes = append(sum.Nodes, s.Hostname)
|
||||||
|
if s.Kernel != "" {
|
||||||
|
sum.Kernels[s.Kernel]++
|
||||||
|
}
|
||||||
|
if s.OSImage != "" {
|
||||||
|
sum.OSImages[s.OSImage]++
|
||||||
|
}
|
||||||
|
if s.Containerd != "" {
|
||||||
|
sum.Containerd[s.Containerd]++
|
||||||
|
}
|
||||||
|
if s.K3sVersion != "" {
|
||||||
|
sum.K3sVersions[s.K3sVersion]++
|
||||||
|
}
|
||||||
|
for pkg, ver := range s.PackageSample {
|
||||||
|
if sum.PackageStats[pkg] == nil {
|
||||||
|
sum.PackageStats[pkg] = map[string]int{}
|
||||||
|
}
|
||||||
|
if ver != "" {
|
||||||
|
sum.PackageStats[pkg][ver]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
33
pkg/facts/aggregate_test.go
Normal file
33
pkg/facts/aggregate_test.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAggregateGroupsByClass(t *testing.T) {
|
||||||
|
inv := &inventory.Inventory{
|
||||||
|
Classes: []inventory.NodeClass{{Name: "c1"}, {Name: "c2"}},
|
||||||
|
Nodes: []inventory.NodeSpec{
|
||||||
|
{Name: "n1", Class: "c1"},
|
||||||
|
{Name: "n2", Class: "c2"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
snaps := []Snapshot{
|
||||||
|
{Hostname: "n1", Kernel: "k1", PackageSample: map[string]string{"containerd": "2.0"}},
|
||||||
|
{Hostname: "n2", Kernel: "k2", PackageSample: map[string]string{"containerd": "1.7"}},
|
||||||
|
{Hostname: "n1", Kernel: "k1"},
|
||||||
|
}
|
||||||
|
sum := Aggregate(inv, snaps)
|
||||||
|
if len(sum) != 2 {
|
||||||
|
t.Fatalf("expected 2 classes, got %d", len(sum))
|
||||||
|
}
|
||||||
|
c1 := sum["c1"]
|
||||||
|
if c1 == nil || c1.Kernels["k1"] != 2 {
|
||||||
|
t.Fatalf("expected k1 count 2, got %#v", c1)
|
||||||
|
}
|
||||||
|
if c1.PackageStats["containerd"]["2.0"] != 1 {
|
||||||
|
t.Fatalf("package stats not tallied: %#v", c1.PackageStats)
|
||||||
|
}
|
||||||
|
}
|
||||||
43
pkg/facts/load.go
Normal file
43
pkg/facts/load.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Snapshot mirrors sentinel output; kept minimal to avoid tight coupling.
|
||||||
|
type Snapshot struct {
|
||||||
|
Hostname string `json:"hostname,omitempty"`
|
||||||
|
Kernel string `json:"kernel,omitempty"`
|
||||||
|
OSImage string `json:"os_image,omitempty"`
|
||||||
|
K3sVersion string `json:"k3s_version,omitempty"`
|
||||||
|
Containerd string `json:"containerd,omitempty"`
|
||||||
|
PackageSample map[string]string `json:"package_sample,omitempty"`
|
||||||
|
DropInsSample map[string]string `json:"dropins_sample,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadDir reads all *.json under a directory and returns snapshots.
|
||||||
|
func LoadDir(dir string) ([]Snapshot, error) {
|
||||||
|
var snaps []Snapshot
|
||||||
|
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if d.IsDir() || filepath.Ext(path) != ".json" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var s Snapshot
|
||||||
|
if err := json.Unmarshal(b, &s); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
snaps = append(snaps, s)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return snaps, err
|
||||||
|
}
|
||||||
22
pkg/facts/load_test.go
Normal file
22
pkg/facts/load_test.go
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLoadDirReadsSnapshots(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
snap := `{"hostname":"n1","kernel":"k","containerd":"c","package_sample":{"a":"1"}}`
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "snap.json"), []byte(snap), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
got, err := LoadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadDir: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != 1 || got[0].Hostname != "n1" || got[0].PackageSample["a"] != "1" {
|
||||||
|
t.Fatalf("unexpected snapshot: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
13
pkg/facts/recommend.go
Normal file
13
pkg/facts/recommend.go
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import "metis/pkg/inventory"
|
||||||
|
|
||||||
|
// RecommendTargets builds per-class targets from snapshots.
|
||||||
|
func RecommendTargets(inv *inventory.Inventory, snaps []Snapshot) map[string]Targets {
|
||||||
|
sum := Aggregate(inv, snaps)
|
||||||
|
out := map[string]Targets{}
|
||||||
|
for cls, s := range sum {
|
||||||
|
out[cls] = ChooseTargets(s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
37
pkg/facts/recommend_test.go
Normal file
37
pkg/facts/recommend_test.go
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRecommendTargetsPerClass(t *testing.T) {
|
||||||
|
inv := &inventory.Inventory{
|
||||||
|
Classes: []inventory.NodeClass{{Name: "c1"}, {Name: "c2"}},
|
||||||
|
Nodes: []inventory.NodeSpec{
|
||||||
|
{Name: "n1", Class: "c1"},
|
||||||
|
{Name: "n2", Class: "c2"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
snaps := []Snapshot{
|
||||||
|
{Hostname: "n1", Kernel: "k1", Containerd: "2.0", PackageSample: map[string]string{"containerd": "2.0"}},
|
||||||
|
{Hostname: "n2", Kernel: "k2", Containerd: "1.7", PackageSample: map[string]string{"containerd": "1.7"}},
|
||||||
|
}
|
||||||
|
targets := RecommendTargets(inv, snaps)
|
||||||
|
if targets["c1"].Kernel != "k1" || targets["c1"].Containerd != "2.0" {
|
||||||
|
t.Fatalf("unexpected targets for c1: %+v", targets["c1"])
|
||||||
|
}
|
||||||
|
if targets["c2"].Kernel != "k2" || targets["c2"].Packages["containerd"] != "1.7" {
|
||||||
|
t.Fatalf("unexpected targets for c2: %+v", targets["c2"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecommendHandlesUnknownClass(t *testing.T) {
|
||||||
|
inv := &inventory.Inventory{}
|
||||||
|
snaps := []Snapshot{{Hostname: "ghost", Kernel: "k"}}
|
||||||
|
targets := RecommendTargets(inv, snaps)
|
||||||
|
if _, ok := targets["unknown"]; !ok {
|
||||||
|
t.Fatalf("expected unknown class entry")
|
||||||
|
}
|
||||||
|
}
|
||||||
43
pkg/facts/targets.go
Normal file
43
pkg/facts/targets.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
// Targets proposes normalized targets from a ClassSummary by picking the most common version.
|
||||||
|
type Targets struct {
|
||||||
|
Kernel string
|
||||||
|
OSImage string
|
||||||
|
Containerd string
|
||||||
|
K3sVersion string
|
||||||
|
Packages map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
// ChooseTargets picks the highest-count entry for each field. Ties are left empty.
|
||||||
|
func ChooseTargets(sum *ClassSummary) Targets {
|
||||||
|
t := Targets{Packages: map[string]string{}}
|
||||||
|
if sum == nil {
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
t.Kernel = topKey(sum.Kernels)
|
||||||
|
t.OSImage = topKey(sum.OSImages)
|
||||||
|
t.Containerd = topKey(sum.Containerd)
|
||||||
|
t.K3sVersion = topKey(sum.K3sVersions)
|
||||||
|
for pkg, versions := range sum.PackageStats {
|
||||||
|
if v := topKey(versions); v != "" {
|
||||||
|
t.Packages[pkg] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func topKey(m map[string]int) string {
|
||||||
|
best := ""
|
||||||
|
bestCount := 0
|
||||||
|
for k, c := range m {
|
||||||
|
if c > bestCount {
|
||||||
|
best = k
|
||||||
|
bestCount = c
|
||||||
|
} else if c == bestCount {
|
||||||
|
// tie: prefer empty to avoid arbitrary pick
|
||||||
|
best = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return best
|
||||||
|
}
|
||||||
26
pkg/facts/targets_test.go
Normal file
26
pkg/facts/targets_test.go
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestChooseTargetsPicksMostCommon(t *testing.T) {
|
||||||
|
sum := &ClassSummary{
|
||||||
|
Kernels: map[string]int{"k1": 2, "k2": 1},
|
||||||
|
OSImages: map[string]int{"os1": 1},
|
||||||
|
Containerd: map[string]int{"c1": 2, "c2": 2}, // tie -> empty
|
||||||
|
K3sVersions: map[string]int{"k3s1": 3},
|
||||||
|
PackageStats: map[string]map[string]int{
|
||||||
|
"containerd": {"1.7": 1, "2.0": 2},
|
||||||
|
"k3s": {"v1": 1},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
tg := ChooseTargets(sum)
|
||||||
|
if tg.Kernel != "k1" || tg.OSImage != "os1" || tg.K3sVersion != "k3s1" {
|
||||||
|
t.Fatalf("unexpected targets: %+v", tg)
|
||||||
|
}
|
||||||
|
if tg.Containerd != "" {
|
||||||
|
t.Fatalf("expected tie -> empty for containerd, got %q", tg.Containerd)
|
||||||
|
}
|
||||||
|
if tg.Packages["containerd"] != "2.0" {
|
||||||
|
t.Fatalf("package target wrong: %+v", tg.Packages)
|
||||||
|
}
|
||||||
|
}
|
||||||
21
pkg/facts/types.go
Normal file
21
pkg/facts/types.go
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
package facts
|
||||||
|
|
||||||
|
// ClassFacts captures driftable state collected by metis-sentinel.
|
||||||
|
type ClassFacts struct {
|
||||||
|
ClassName string `json:"class_name"`
|
||||||
|
Kernel string `json:"kernel,omitempty"`
|
||||||
|
K3sVersion string `json:"k3s_version,omitempty"`
|
||||||
|
Containerd string `json:"containerd,omitempty"`
|
||||||
|
Packages map[string]string `json:"packages,omitempty"` // name -> version
|
||||||
|
DropIns map[string]string `json:"dropins,omitempty"` // path -> content
|
||||||
|
Sysctl map[string]string `json:"sysctl,omitempty"` // key -> value
|
||||||
|
CGroupConfig map[string]string `json:"cgroup_config,omitempty"`// key -> value
|
||||||
|
Notes string `json:"notes,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeFacts captures per-node data (e.g., disk UUIDs) to verify drift.
|
||||||
|
type NodeFacts struct {
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
Disks map[string]string `json:"disks,omitempty"` // mount -> UUID
|
||||||
|
Notes string `json:"notes,omitempty"`
|
||||||
|
}
|
||||||
@ -8,6 +8,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@ -20,6 +21,18 @@ func Download(url, dest string) error {
|
|||||||
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if strings.HasSuffix(url, ".xz") {
|
||||||
|
tmp := dest + ".download.xz"
|
||||||
|
if err := downloadRaw(url, tmp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer os.Remove(tmp)
|
||||||
|
return decompressXZ(tmp, dest)
|
||||||
|
}
|
||||||
|
return downloadRaw(url, dest)
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadRaw(url, dest string) error {
|
||||||
if strings.HasPrefix(url, "file://") {
|
if strings.HasPrefix(url, "file://") {
|
||||||
src := strings.TrimPrefix(url, "file://")
|
src := strings.TrimPrefix(url, "file://")
|
||||||
in, err := os.Open(src)
|
in, err := os.Open(src)
|
||||||
@ -52,6 +65,22 @@ func Download(url, dest string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func decompressXZ(src, dest string) error {
|
||||||
|
out, err := os.Create(dest)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
cmd := exec.Command("xz", "-dc", src)
|
||||||
|
cmd.Stdout = out
|
||||||
|
var stderr strings.Builder
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
return fmt.Errorf("xz decompress %s: %w: %s", src, err, stderr.String())
|
||||||
|
}
|
||||||
|
return out.Sync()
|
||||||
|
}
|
||||||
|
|
||||||
// VerifyChecksum checks sha256 in the form "sha256:<hex>".
|
// VerifyChecksum checks sha256 in the form "sha256:<hex>".
|
||||||
func VerifyChecksum(path, checksum string) error {
|
func VerifyChecksum(path, checksum string) error {
|
||||||
if checksum == "" {
|
if checksum == "" {
|
||||||
|
|||||||
34
pkg/image/download_test.go
Normal file
34
pkg/image/download_test.go
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
package image
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDownloadDecompressesXZFileURLs(t *testing.T) {
|
||||||
|
if _, err := exec.LookPath("xz"); err != nil {
|
||||||
|
t.Skip("xz not available")
|
||||||
|
}
|
||||||
|
dir := t.TempDir()
|
||||||
|
raw := filepath.Join(dir, "base.img")
|
||||||
|
if err := os.WriteFile(raw, []byte("metis-xz-test"), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
compressed := raw + ".xz"
|
||||||
|
cmd := exec.Command("xz", "-zk", raw)
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
t.Fatalf("xz: %v: %s", err, string(out))
|
||||||
|
}
|
||||||
|
dest := filepath.Join(dir, "copy.img")
|
||||||
|
if err := Download("file://"+compressed, dest); err != nil {
|
||||||
|
t.Fatalf("Download: %v", err)
|
||||||
|
}
|
||||||
|
sum := sha256.Sum256([]byte("metis-xz-test"))
|
||||||
|
if err := VerifyChecksum(dest, "sha256:"+hex.EncodeToString(sum[:])); err != nil {
|
||||||
|
t.Fatalf("VerifyChecksum: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
257
pkg/image/rootfs.go
Normal file
257
pkg/image/rootfs.go
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
package image
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"metis/pkg/inject"
|
||||||
|
)
|
||||||
|
|
||||||
|
type partitionTable struct {
|
||||||
|
PartitionTable partitionTableData `json:"partitiontable"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type partitionTableData struct {
|
||||||
|
SectorSize uint64 `json:"sectorsize"`
|
||||||
|
Partitions []partitionTablePart `json:"partitions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type partitionTablePart struct {
|
||||||
|
Start uint64 `json:"start"`
|
||||||
|
Size uint64 `json:"size"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// InjectRootFS rewrites the Linux root partition inside a raw image file without
|
||||||
|
// requiring block-device mounts. Only rootfs-targeted files are written.
|
||||||
|
func InjectRootFS(imagePath string, files []inject.FileSpec) error {
|
||||||
|
rootFiles := make([]inject.FileSpec, 0, len(files))
|
||||||
|
for _, f := range files {
|
||||||
|
if f.RootFS {
|
||||||
|
rootFiles = append(rootFiles, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(rootFiles) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
part, sectorSize, err := findLinuxPartition(imagePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
workDir, err := os.MkdirTemp("", "metis-rootfs-")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(workDir)
|
||||||
|
|
||||||
|
rootImage := filepath.Join(workDir, "root.ext4")
|
||||||
|
if err := extractPartition(imagePath, rootImage, part, sectorSize); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := writeExt4Files(rootImage, rootFiles); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return replacePartition(imagePath, rootImage, part, sectorSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
func findLinuxPartition(imagePath string) (partitionTablePart, uint64, error) {
|
||||||
|
out, err := exec.Command("sfdisk", "-J", imagePath).Output()
|
||||||
|
if err != nil {
|
||||||
|
return partitionTablePart{}, 0, fmt.Errorf("sfdisk -J %s: %w", imagePath, err)
|
||||||
|
}
|
||||||
|
var table partitionTable
|
||||||
|
if err := json.Unmarshal(out, &table); err != nil {
|
||||||
|
return partitionTablePart{}, 0, fmt.Errorf("decode partition table: %w", err)
|
||||||
|
}
|
||||||
|
sectorSize := table.PartitionTable.SectorSize
|
||||||
|
if sectorSize == 0 {
|
||||||
|
sectorSize = 512
|
||||||
|
}
|
||||||
|
for i := len(table.PartitionTable.Partitions) - 1; i >= 0; i-- {
|
||||||
|
part := table.PartitionTable.Partitions[i]
|
||||||
|
if isLinuxPartitionType(part.Type) {
|
||||||
|
return part, sectorSize, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return partitionTablePart{}, 0, fmt.Errorf("no Linux root partition found in %s", imagePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLinuxPartitionType(partType string) bool {
|
||||||
|
normalized := strings.ToLower(strings.TrimSpace(partType))
|
||||||
|
switch normalized {
|
||||||
|
case "83", "8300":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return normalized == "0fc63daf-8483-4772-8e79-3d69d8477de4"
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractPartition(imagePath, outPath string, part partitionTablePart, sectorSize uint64) error {
|
||||||
|
sizeBytes := int64(part.Size * sectorSize)
|
||||||
|
offsetBytes := int64(part.Start * sectorSize)
|
||||||
|
|
||||||
|
src, err := os.Open(imagePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer src.Close()
|
||||||
|
if _, err := src.Seek(offsetBytes, io.SeekStart); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := os.Create(outPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
if _, err := io.CopyN(out, src, sizeBytes); err != nil {
|
||||||
|
return fmt.Errorf("extract root partition: %w", err)
|
||||||
|
}
|
||||||
|
return out.Sync()
|
||||||
|
}
|
||||||
|
|
||||||
|
func replacePartition(imagePath, rootImage string, part partitionTablePart, sectorSize uint64) error {
|
||||||
|
expectedSize := int64(part.Size * sectorSize)
|
||||||
|
info, err := os.Stat(rootImage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if info.Size() != expectedSize {
|
||||||
|
return fmt.Errorf("root partition size mismatch: expected %d got %d", expectedSize, info.Size())
|
||||||
|
}
|
||||||
|
|
||||||
|
in, err := os.Open(rootImage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer in.Close()
|
||||||
|
|
||||||
|
out, err := os.OpenFile(imagePath, os.O_WRONLY, 0)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
if _, err := out.Seek(int64(part.Start*sectorSize), io.SeekStart); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if _, err := io.Copy(out, in); err != nil {
|
||||||
|
return fmt.Errorf("write root partition: %w", err)
|
||||||
|
}
|
||||||
|
return out.Sync()
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeExt4Files(fsPath string, files []inject.FileSpec) error {
|
||||||
|
workDir, err := os.MkdirTemp("", "metis-ext4-")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(workDir)
|
||||||
|
|
||||||
|
stageDir := filepath.Join(workDir, "stage")
|
||||||
|
commandFile := filepath.Join(workDir, "commands.txt")
|
||||||
|
|
||||||
|
dirs := map[string]struct{}{}
|
||||||
|
commands := make([]string, 0, len(files)*4)
|
||||||
|
|
||||||
|
for _, f := range files {
|
||||||
|
localPath := filepath.Join(stageDir, filepath.FromSlash(f.Path))
|
||||||
|
if err := os.MkdirAll(filepath.Dir(localPath), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(localPath, f.Content, 0o644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, dir := range parentDirs(f.Path) {
|
||||||
|
dirs[dir] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dirList := make([]string, 0, len(dirs))
|
||||||
|
for dir := range dirs {
|
||||||
|
dirList = append(dirList, dir)
|
||||||
|
}
|
||||||
|
sort.Slice(dirList, func(i, j int) bool {
|
||||||
|
leftDepth := strings.Count(dirList[i], "/")
|
||||||
|
rightDepth := strings.Count(dirList[j], "/")
|
||||||
|
if leftDepth != rightDepth {
|
||||||
|
return leftDepth < rightDepth
|
||||||
|
}
|
||||||
|
return dirList[i] < dirList[j]
|
||||||
|
})
|
||||||
|
for _, dir := range dirList {
|
||||||
|
commands = append(commands, fmt.Sprintf("mkdir %s", dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range files {
|
||||||
|
destPath := "/" + strings.TrimPrefix(filepath.ToSlash(f.Path), "/")
|
||||||
|
localPath := filepath.Join(stageDir, filepath.FromSlash(f.Path))
|
||||||
|
commands = append(commands, fmt.Sprintf("rm %s", destPath))
|
||||||
|
commands = append(commands, fmt.Sprintf("write %s %s", localPath, destPath))
|
||||||
|
commands = append(commands, fmt.Sprintf("sif %s mode 0%o", destPath, uint32(0o100000|f.Mode.Perm())))
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(commandFile, []byte(strings.Join(commands, "\n")+"\n"), 0o644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.Command("debugfs", "-w", "-f", commandFile, fsPath)
|
||||||
|
var combined bytes.Buffer
|
||||||
|
cmd.Stdout = &combined
|
||||||
|
cmd.Stderr = &combined
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
return fmt.Errorf("debugfs write failed: %w: %s", err, combined.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range files {
|
||||||
|
if err := verifyExt4File(fsPath, f, workDir); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyExt4File(fsPath string, file inject.FileSpec, workDir string) error {
|
||||||
|
destPath := "/" + strings.TrimPrefix(filepath.ToSlash(file.Path), "/")
|
||||||
|
statOut, err := exec.Command("debugfs", "-R", "stat "+destPath, fsPath).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("verify %s: %w: %s", destPath, err, string(statOut))
|
||||||
|
}
|
||||||
|
expectedMode := fmt.Sprintf("Mode: %04o", file.Mode.Perm())
|
||||||
|
if !strings.Contains(string(statOut), expectedMode) {
|
||||||
|
return fmt.Errorf("verify %s mode: expected %s in %s", destPath, expectedMode, string(statOut))
|
||||||
|
}
|
||||||
|
|
||||||
|
readback := filepath.Join(workDir, strings.TrimPrefix(filepath.FromSlash(file.Path), string(filepath.Separator))+".readback")
|
||||||
|
if err := os.MkdirAll(filepath.Dir(readback), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
dumpOut, err := exec.Command("debugfs", "-R", fmt.Sprintf("dump %s %s", destPath, readback), fsPath).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("dump %s: %w: %s", destPath, err, string(dumpOut))
|
||||||
|
}
|
||||||
|
got, err := os.ReadFile(readback)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !bytes.Equal(got, file.Content) {
|
||||||
|
return fmt.Errorf("verify %s content mismatch", destPath)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parentDirs(path string) []string {
|
||||||
|
cleaned := "/" + strings.TrimPrefix(filepath.ToSlash(path), "/")
|
||||||
|
parts := strings.Split(cleaned, "/")
|
||||||
|
var dirs []string
|
||||||
|
for i := 2; i < len(parts); i++ {
|
||||||
|
dirs = append(dirs, strings.Join(parts[:i], "/"))
|
||||||
|
}
|
||||||
|
return dirs
|
||||||
|
}
|
||||||
68
pkg/image/rootfs_test.go
Normal file
68
pkg/image/rootfs_test.go
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
package image
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"metis/pkg/inject"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestWriteExt4Files(t *testing.T) {
|
||||||
|
if _, err := exec.LookPath("mkfs.ext4"); err != nil {
|
||||||
|
t.Skip("mkfs.ext4 not available")
|
||||||
|
}
|
||||||
|
if _, err := exec.LookPath("debugfs"); err != nil {
|
||||||
|
t.Skip("debugfs not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
workDir := t.TempDir()
|
||||||
|
fsPath := filepath.Join(workDir, "root.ext4")
|
||||||
|
f, err := os.Create(fsPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := f.Truncate(32 * 1024 * 1024); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.Command("mkfs.ext4", "-F", fsPath)
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
t.Fatalf("mkfs.ext4: %v: %s", err, string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
files := []inject.FileSpec{
|
||||||
|
{
|
||||||
|
Path: "etc/metis/firstboot.env",
|
||||||
|
Content: []byte("METIS_HOSTNAME='titan-13'\n"),
|
||||||
|
Mode: 0o600,
|
||||||
|
RootFS: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "usr/local/sbin/test.sh",
|
||||||
|
Content: []byte("#!/usr/bin/env bash\nexit 0\n"),
|
||||||
|
Mode: 0o755,
|
||||||
|
RootFS: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := writeExt4Files(fsPath, files); err != nil {
|
||||||
|
t.Fatalf("writeExt4Files: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParentDirs(t *testing.T) {
|
||||||
|
got := parentDirs("etc/metis/firstboot.env")
|
||||||
|
want := []string{"/etc", "/etc/metis"}
|
||||||
|
if len(got) != len(want) {
|
||||||
|
t.Fatalf("parentDirs length mismatch: got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
for i := range want {
|
||||||
|
if got[i] != want[i] {
|
||||||
|
t.Fatalf("parentDirs[%d] = %q want %q", i, got[i], want[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -20,10 +20,13 @@ type NodeClass struct {
|
|||||||
OS string `yaml:"os"`
|
OS string `yaml:"os"`
|
||||||
Image string `yaml:"image"`
|
Image string `yaml:"image"`
|
||||||
Checksum string `yaml:"checksum,omitempty"`
|
Checksum string `yaml:"checksum,omitempty"`
|
||||||
|
K3sVersion string `yaml:"k3s_version,omitempty"`
|
||||||
BootloaderNote string `yaml:"bootloader_note,omitempty"`
|
BootloaderNote string `yaml:"bootloader_note,omitempty"`
|
||||||
DefaultLabels map[string]string `yaml:"default_labels,omitempty"`
|
DefaultLabels map[string]string `yaml:"default_labels,omitempty"`
|
||||||
DefaultTaints []string `yaml:"default_taints,omitempty"`
|
DefaultTaints []string `yaml:"default_taints,omitempty"`
|
||||||
CloudInit string `yaml:"cloud_init,omitempty"`
|
CloudInit string `yaml:"cloud_init,omitempty"`
|
||||||
|
BootOverlay string `yaml:"boot_overlay,omitempty"` // path to overlay files for boot partition
|
||||||
|
RootOverlay string `yaml:"root_overlay,omitempty"` // path to overlay files for rootfs
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeSpec captures per-node overrides and identity.
|
// NodeSpec captures per-node overrides and identity.
|
||||||
@ -34,6 +37,7 @@ type NodeSpec struct {
|
|||||||
IP string `yaml:"ip"`
|
IP string `yaml:"ip"`
|
||||||
MAC string `yaml:"mac,omitempty"`
|
MAC string `yaml:"mac,omitempty"`
|
||||||
K3sRole string `yaml:"k3s_role"`
|
K3sRole string `yaml:"k3s_role"`
|
||||||
|
K3sVersion string `yaml:"k3s_version,omitempty"`
|
||||||
K3sToken string `yaml:"k3s_token,omitempty"`
|
K3sToken string `yaml:"k3s_token,omitempty"`
|
||||||
K3sURL string `yaml:"k3s_url,omitempty"`
|
K3sURL string `yaml:"k3s_url,omitempty"`
|
||||||
Labels map[string]string `yaml:"labels,omitempty"`
|
Labels map[string]string `yaml:"labels,omitempty"`
|
||||||
@ -61,9 +65,58 @@ func Load(path string) (*Inventory, error) {
|
|||||||
if err := yaml.Unmarshal(data, &inv); err != nil {
|
if err := yaml.Unmarshal(data, &inv); err != nil {
|
||||||
return nil, fmt.Errorf("parse inventory: %w", err)
|
return nil, fmt.Errorf("parse inventory: %w", err)
|
||||||
}
|
}
|
||||||
|
expandInventory(&inv)
|
||||||
return &inv, nil
|
return &inv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func expandInventory(inv *Inventory) {
|
||||||
|
for idx := range inv.Classes {
|
||||||
|
inv.Classes[idx].Name = os.ExpandEnv(inv.Classes[idx].Name)
|
||||||
|
inv.Classes[idx].Arch = os.ExpandEnv(inv.Classes[idx].Arch)
|
||||||
|
inv.Classes[idx].OS = os.ExpandEnv(inv.Classes[idx].OS)
|
||||||
|
inv.Classes[idx].Image = os.ExpandEnv(inv.Classes[idx].Image)
|
||||||
|
inv.Classes[idx].Checksum = os.ExpandEnv(inv.Classes[idx].Checksum)
|
||||||
|
inv.Classes[idx].K3sVersion = os.ExpandEnv(inv.Classes[idx].K3sVersion)
|
||||||
|
inv.Classes[idx].BootloaderNote = os.ExpandEnv(inv.Classes[idx].BootloaderNote)
|
||||||
|
inv.Classes[idx].CloudInit = os.ExpandEnv(inv.Classes[idx].CloudInit)
|
||||||
|
inv.Classes[idx].BootOverlay = os.ExpandEnv(inv.Classes[idx].BootOverlay)
|
||||||
|
inv.Classes[idx].RootOverlay = os.ExpandEnv(inv.Classes[idx].RootOverlay)
|
||||||
|
for key, value := range inv.Classes[idx].DefaultLabels {
|
||||||
|
inv.Classes[idx].DefaultLabels[key] = os.ExpandEnv(value)
|
||||||
|
}
|
||||||
|
for taintIdx, value := range inv.Classes[idx].DefaultTaints {
|
||||||
|
inv.Classes[idx].DefaultTaints[taintIdx] = os.ExpandEnv(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for idx := range inv.Nodes {
|
||||||
|
inv.Nodes[idx].Name = os.ExpandEnv(inv.Nodes[idx].Name)
|
||||||
|
inv.Nodes[idx].Class = os.ExpandEnv(inv.Nodes[idx].Class)
|
||||||
|
inv.Nodes[idx].Hostname = os.ExpandEnv(inv.Nodes[idx].Hostname)
|
||||||
|
inv.Nodes[idx].IP = os.ExpandEnv(inv.Nodes[idx].IP)
|
||||||
|
inv.Nodes[idx].MAC = os.ExpandEnv(inv.Nodes[idx].MAC)
|
||||||
|
inv.Nodes[idx].K3sRole = os.ExpandEnv(inv.Nodes[idx].K3sRole)
|
||||||
|
inv.Nodes[idx].K3sVersion = os.ExpandEnv(inv.Nodes[idx].K3sVersion)
|
||||||
|
inv.Nodes[idx].K3sToken = os.ExpandEnv(inv.Nodes[idx].K3sToken)
|
||||||
|
inv.Nodes[idx].K3sURL = os.ExpandEnv(inv.Nodes[idx].K3sURL)
|
||||||
|
inv.Nodes[idx].SSHUser = os.ExpandEnv(inv.Nodes[idx].SSHUser)
|
||||||
|
inv.Nodes[idx].Notes = os.ExpandEnv(inv.Nodes[idx].Notes)
|
||||||
|
for key, value := range inv.Nodes[idx].Labels {
|
||||||
|
inv.Nodes[idx].Labels[key] = os.ExpandEnv(value)
|
||||||
|
}
|
||||||
|
for taintIdx, value := range inv.Nodes[idx].Taints {
|
||||||
|
inv.Nodes[idx].Taints[taintIdx] = os.ExpandEnv(value)
|
||||||
|
}
|
||||||
|
for keyIdx, value := range inv.Nodes[idx].SSHAuthorized {
|
||||||
|
inv.Nodes[idx].SSHAuthorized[keyIdx] = os.ExpandEnv(value)
|
||||||
|
}
|
||||||
|
for diskIdx := range inv.Nodes[idx].LonghornDisks {
|
||||||
|
inv.Nodes[idx].LonghornDisks[diskIdx].Mountpoint = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].Mountpoint)
|
||||||
|
inv.Nodes[idx].LonghornDisks[diskIdx].UUID = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].UUID)
|
||||||
|
inv.Nodes[idx].LonghornDisks[diskIdx].FS = os.ExpandEnv(inv.Nodes[idx].LonghornDisks[diskIdx].FS)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// FindNode returns the node spec and class.
|
// FindNode returns the node spec and class.
|
||||||
func (i *Inventory) FindNode(name string) (*NodeSpec, *NodeClass, error) {
|
func (i *Inventory) FindNode(name string) (*NodeSpec, *NodeClass, error) {
|
||||||
var node *NodeSpec
|
var node *NodeSpec
|
||||||
|
|||||||
43
pkg/inventory/types_test.go
Normal file
43
pkg/inventory/types_test.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package inventory
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLoadExpandsEnvironmentVariables(t *testing.T) {
|
||||||
|
t.Setenv("METIS_IMAGE_PATH", "file:///tmp/rpi4.img")
|
||||||
|
t.Setenv("METIS_K3S_TOKEN", "secret-token")
|
||||||
|
invPath := filepath.Join(t.TempDir(), "inventory.yaml")
|
||||||
|
if err := os.WriteFile(invPath, []byte(`
|
||||||
|
classes:
|
||||||
|
- name: rpi4
|
||||||
|
image: ${METIS_IMAGE_PATH}
|
||||||
|
k3s_version: v1.31.5+k3s1
|
||||||
|
nodes:
|
||||||
|
- name: titan-13
|
||||||
|
class: rpi4
|
||||||
|
hostname: titan-13
|
||||||
|
ip: 192.168.22.41
|
||||||
|
k3s_role: agent
|
||||||
|
k3s_token: ${METIS_K3S_TOKEN}
|
||||||
|
`), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
inv, err := Load(invPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Load: %v", err)
|
||||||
|
}
|
||||||
|
node, class, err := inv.FindNode("titan-13")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FindNode: %v", err)
|
||||||
|
}
|
||||||
|
if class.Image != "file:///tmp/rpi4.img" {
|
||||||
|
t.Fatalf("image not expanded: %q", class.Image)
|
||||||
|
}
|
||||||
|
if node.K3sToken != "secret-token" {
|
||||||
|
t.Fatalf("token not expanded: %q", node.K3sToken)
|
||||||
|
}
|
||||||
|
}
|
||||||
87
pkg/mount/mount.go
Normal file
87
pkg/mount/mount.go
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
package mount
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"metis/pkg/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LoopMount describes a mounted image with boot/root paths.
|
||||||
|
type LoopMount struct {
|
||||||
|
LoopDevice string // only set when losetup created it
|
||||||
|
BootPath string
|
||||||
|
RootPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup attaches an image as a loop device with partitions (-P) OR mounts an existing /dev path
|
||||||
|
// by assuming p1=boot, p2=root. Intended for Linux hosts only.
|
||||||
|
func Setup(path string) (*LoopMount, error) {
|
||||||
|
device := path
|
||||||
|
loopDevice := ""
|
||||||
|
if !strings.HasPrefix(path, "/dev/") {
|
||||||
|
var err error
|
||||||
|
device, err = createLoop(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
loopDevice = device
|
||||||
|
}
|
||||||
|
bootDir, err := os.MkdirTemp("", "metis-boot-")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rootDir, err := os.MkdirTemp("", "metis-root-")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// Assume p1=boot, p2=root (Raspberry Pi style images)
|
||||||
|
if err := util.Run("mount", partitionPath(device, 1), bootDir); err != nil {
|
||||||
|
_ = Teardown(&LoopMount{LoopDevice: loopDevice, BootPath: bootDir, RootPath: rootDir})
|
||||||
|
return nil, fmt.Errorf("mount boot: %w", err)
|
||||||
|
}
|
||||||
|
if err := util.Run("mount", partitionPath(device, 2), rootDir); err != nil {
|
||||||
|
_ = util.Run("umount", bootDir)
|
||||||
|
_ = Teardown(&LoopMount{LoopDevice: loopDevice, BootPath: bootDir, RootPath: rootDir})
|
||||||
|
return nil, fmt.Errorf("mount root: %w", err)
|
||||||
|
}
|
||||||
|
return &LoopMount{LoopDevice: loopDevice, BootPath: bootDir, RootPath: rootDir}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Teardown unmounts and detaches the loop device.
|
||||||
|
func Teardown(m *LoopMount) error {
|
||||||
|
if m == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if m.BootPath != "" {
|
||||||
|
_ = util.Run("umount", m.BootPath)
|
||||||
|
_ = os.RemoveAll(m.BootPath)
|
||||||
|
}
|
||||||
|
if m.RootPath != "" {
|
||||||
|
_ = util.Run("umount", m.RootPath)
|
||||||
|
_ = os.RemoveAll(m.RootPath)
|
||||||
|
}
|
||||||
|
if m.LoopDevice != "" {
|
||||||
|
_ = util.Run("losetup", "-d", m.LoopDevice)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func partitionPath(base string, idx int) string {
|
||||||
|
p := fmt.Sprintf("%sp%d", base, idx)
|
||||||
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s%d", base, idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func createLoop(imagePath string) (string, error) {
|
||||||
|
// losetup -Pf --show <image>
|
||||||
|
out, err := util.RunLogged("losetup", "-Pf", "--show", filepath.Clean(imagePath))
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(out), nil
|
||||||
|
}
|
||||||
@ -1,12 +1,15 @@
|
|||||||
package plan
|
package plan
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"metis/pkg/image"
|
"metis/pkg/image"
|
||||||
"metis/pkg/inventory"
|
"metis/pkg/inventory"
|
||||||
|
"metis/pkg/mount"
|
||||||
|
"metis/pkg/writer"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Execute performs a burn if confirm is true. With confirm=false, it only downloads/verifies and returns the plan.
|
// Execute performs a burn if confirm is true. With confirm=false, it only downloads/verifies and returns the plan.
|
||||||
@ -28,12 +31,18 @@ func Execute(inv *inventory.Inventory, nodeName, device, cacheDir string, confir
|
|||||||
if device == "" || device == "/dev/sdX" {
|
if device == "" || device == "/dev/sdX" {
|
||||||
return p, fmt.Errorf("refusing to write to placeholder device")
|
return p, fmt.Errorf("refusing to write to placeholder device")
|
||||||
}
|
}
|
||||||
ddCmd := []string{"dd", fmt.Sprintf("if=%s", cacheImage), fmt.Sprintf("of=%s", device), "bs=4M", "status=progress", "conv=fsync"}
|
ctx := context.Background()
|
||||||
cmd := exec.Command(ddCmd[0], ddCmd[1:]...)
|
if err := writer.WriteImage(ctx, cacheImage, device); err != nil {
|
||||||
cmd.Stdout = nil
|
return p, fmt.Errorf("write image: %w", err)
|
||||||
cmd.Stderr = nil
|
}
|
||||||
if err := cmd.Run(); err != nil {
|
if err := maybeInject(inv, nodeName); err != nil {
|
||||||
return p, fmt.Errorf("dd failed: %w", err)
|
return p, fmt.Errorf("inject config: %w", err)
|
||||||
|
}
|
||||||
|
if auto := maybeAutoMount(device); auto != nil {
|
||||||
|
defer mount.Teardown(auto)
|
||||||
|
if err := maybeInject(inv, nodeName); err != nil {
|
||||||
|
return p, fmt.Errorf("inject (auto-mount): %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return p, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
@ -45,3 +54,22 @@ func checksumFromInventory(inv *inventory.Inventory, node string) string {
|
|||||||
}
|
}
|
||||||
return cls.Checksum
|
return cls.Checksum
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func maybeAutoMount(device string) *mount.LoopMount {
|
||||||
|
if os.Getenv("METIS_AUTO_MOUNT") == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Use mount helper against the written device partitions.
|
||||||
|
m, err := mount.Setup(device)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Propagate mount paths for injection.
|
||||||
|
if m.BootPath != "" {
|
||||||
|
_ = os.Setenv("METIS_BOOT_PATH", m.BootPath)
|
||||||
|
}
|
||||||
|
if m.RootPath != "" {
|
||||||
|
_ = os.Setenv("METIS_ROOT_PATH", m.RootPath)
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|||||||
43
pkg/plan/image_build.go
Normal file
43
pkg/plan/image_build.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"metis/pkg/image"
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
"metis/pkg/writer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BuildImageFile materializes a fully injected raw image for a node.
|
||||||
|
func BuildImageFile(ctx context.Context, inv *inventory.Inventory, nodeName, cacheDir, output string) error {
|
||||||
|
p, err := Build(inv, nodeName, output, cacheDir)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("build plan: %w", err)
|
||||||
|
}
|
||||||
|
_, class, err := inv.FindNode(nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("load node class: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cacheImage := filepath.Join(cacheDir, filepath.Base(p.Image))
|
||||||
|
if err := image.Download(p.Image, cacheImage); err != nil {
|
||||||
|
return fmt.Errorf("download image: %w", err)
|
||||||
|
}
|
||||||
|
if err := image.VerifyChecksum(cacheImage, class.Checksum); err != nil {
|
||||||
|
return fmt.Errorf("verify checksum: %w", err)
|
||||||
|
}
|
||||||
|
if err := writer.WriteImage(ctx, cacheImage, output); err != nil {
|
||||||
|
return fmt.Errorf("copy base image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := Files(inv, nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("resolve files: %w", err)
|
||||||
|
}
|
||||||
|
if err := image.InjectRootFS(output, files); err != nil {
|
||||||
|
return fmt.Errorf("inject rootfs: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
393
pkg/plan/inject.go
Normal file
393
pkg/plan/inject.go
Normal file
@ -0,0 +1,393 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"metis/pkg/config"
|
||||||
|
"metis/pkg/inject"
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
"metis/pkg/secrets"
|
||||||
|
)
|
||||||
|
|
||||||
|
// maybeInject writes node-specific config into mounted boot/root paths if the env
|
||||||
|
// vars METIS_BOOT_PATH or METIS_ROOT_PATH are set. When unset, injection is skipped.
|
||||||
|
func maybeInject(inv *inventory.Inventory, nodeName string) error {
|
||||||
|
boot := os.Getenv("METIS_BOOT_PATH")
|
||||||
|
root := os.Getenv("METIS_ROOT_PATH")
|
||||||
|
if boot == "" && root == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
files, err := Files(inv, nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
filtered := make([]inject.FileSpec, 0, len(files))
|
||||||
|
for _, f := range files {
|
||||||
|
if f.RootFS && root == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !f.RootFS && boot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered = append(filtered, f)
|
||||||
|
}
|
||||||
|
if len(filtered) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
inj := inject.Injector{BootPath: boot, RootPath: root}
|
||||||
|
return inj.Write(filtered)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Files resolves the full set of node-specific files, including overlays.
|
||||||
|
func Files(inv *inventory.Inventory, nodeName string) ([]inject.FileSpec, error) {
|
||||||
|
node, class, err := inv.FindNode(nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
cfg, err := config.Build(inv, nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
sec := fetchSecrets(node.Hostname)
|
||||||
|
if sec != nil {
|
||||||
|
if sec.K3sToken != "" {
|
||||||
|
cfg.K3s.Token = sec.K3sToken
|
||||||
|
}
|
||||||
|
if len(sec.Extra) > 0 {
|
||||||
|
cfg.Secrets = sec.Extra
|
||||||
|
}
|
||||||
|
}
|
||||||
|
files, err := buildFiles(cfg, sec)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
overlayFiles, err := collectOverlays(class)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
files = append(files, overlayFiles...)
|
||||||
|
_ = node // reserved for future per-node overlays
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inject writes node-specific config into caller-supplied boot/root mountpoints.
|
||||||
|
func Inject(inv *inventory.Inventory, nodeName, boot, root string) error {
|
||||||
|
oldBoot := os.Getenv("METIS_BOOT_PATH")
|
||||||
|
oldRoot := os.Getenv("METIS_ROOT_PATH")
|
||||||
|
defer func() {
|
||||||
|
if oldBoot == "" {
|
||||||
|
_ = os.Unsetenv("METIS_BOOT_PATH")
|
||||||
|
} else {
|
||||||
|
_ = os.Setenv("METIS_BOOT_PATH", oldBoot)
|
||||||
|
}
|
||||||
|
if oldRoot == "" {
|
||||||
|
_ = os.Unsetenv("METIS_ROOT_PATH")
|
||||||
|
} else {
|
||||||
|
_ = os.Setenv("METIS_ROOT_PATH", oldRoot)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
if boot == "" {
|
||||||
|
_ = os.Unsetenv("METIS_BOOT_PATH")
|
||||||
|
} else {
|
||||||
|
_ = os.Setenv("METIS_BOOT_PATH", boot)
|
||||||
|
}
|
||||||
|
if root == "" {
|
||||||
|
_ = os.Unsetenv("METIS_ROOT_PATH")
|
||||||
|
} else {
|
||||||
|
_ = os.Setenv("METIS_ROOT_PATH", root)
|
||||||
|
}
|
||||||
|
return maybeInject(inv, nodeName)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildFiles(cfg *config.NodeConfig, sec *secrets.NodeSecrets) ([]inject.FileSpec, error) {
|
||||||
|
files := []inject.FileSpec{
|
||||||
|
{Path: "etc/hostname", Content: []byte(cfg.Hostname + "\n"), Mode: 0o644, RootFS: true},
|
||||||
|
{Path: "etc/hosts", Content: []byte(hostsContent(cfg.Hostname)), Mode: 0o644, RootFS: true},
|
||||||
|
{Path: "etc/rancher/k3s/config.yaml", Content: []byte(k3sConfigContent(cfg)), Mode: 0o644, RootFS: true},
|
||||||
|
{Path: "etc/metis/firstboot.env", Content: []byte(firstbootEnvContent(cfg)), Mode: 0o600, RootFS: true},
|
||||||
|
}
|
||||||
|
if cfg.IP != "" {
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/NetworkManager/system-connections/end0-static.nmconnection",
|
||||||
|
Content: []byte(networkManagerConnectionContent(cfg.IP)),
|
||||||
|
Mode: 0o600,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/systemd/network/10-end0-static.network",
|
||||||
|
Content: []byte(systemdNetworkContent(cfg.IP)),
|
||||||
|
Mode: 0o644,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if len(cfg.SSHKeys) > 0 && cfg.SSHUser != "" {
|
||||||
|
auth := strings.Join(cfg.SSHKeys, "\n") + "\n"
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: fmt.Sprintf("home/%s/.ssh/authorized_keys", cfg.SSHUser),
|
||||||
|
Content: []byte(auth),
|
||||||
|
Mode: 0o600,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/metis/authorized_keys",
|
||||||
|
Content: []byte(auth),
|
||||||
|
Mode: 0o600,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if len(cfg.Fstab) > 0 {
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/metis/fstab.append",
|
||||||
|
Content: []byte(fstabAppendContent(cfg)),
|
||||||
|
Mode: 0o644,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the raw config for debugging/ops.
|
||||||
|
raw, err := json.MarshalIndent(cfg, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/metis/node.json",
|
||||||
|
Content: raw,
|
||||||
|
Mode: 0o644,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
if sec != nil {
|
||||||
|
secRaw, err := json.MarshalIndent(sec, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "etc/metis/secrets.json",
|
||||||
|
Content: secRaw,
|
||||||
|
Mode: 0o600,
|
||||||
|
RootFS: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional cloud-init for images that honor NoCloud.
|
||||||
|
userData := cloudInitUserData(cfg, sec)
|
||||||
|
if userData != "" {
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "user-data",
|
||||||
|
Content: []byte(userData),
|
||||||
|
Mode: 0o644,
|
||||||
|
RootFS: false,
|
||||||
|
})
|
||||||
|
files = append(files, inject.FileSpec{
|
||||||
|
Path: "meta-data",
|
||||||
|
Content: []byte(fmt.Sprintf("instance-id: %s\nlocal-hostname: %s\n", cfg.Hostname, cfg.Hostname)),
|
||||||
|
Mode: 0o644,
|
||||||
|
RootFS: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hostsContent(hostname string) string {
|
||||||
|
return fmt.Sprintf("127.0.0.1\tlocalhost\n127.0.1.1\t%s\n\n# Injected by metis\n", hostname)
|
||||||
|
}
|
||||||
|
|
||||||
|
func k3sConfigContent(cfg *config.NodeConfig) string {
|
||||||
|
var labelList []string
|
||||||
|
for k, v := range cfg.Labels {
|
||||||
|
labelList = append(labelList, fmt.Sprintf("%s=%s", k, v))
|
||||||
|
}
|
||||||
|
sort.Strings(labelList)
|
||||||
|
taints := append([]string{}, cfg.Taints...)
|
||||||
|
sort.Strings(taints)
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteString("write-kubeconfig-mode: \"0644\"\n")
|
||||||
|
if cfg.K3s.URL != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("server: %s\n", cfg.K3s.URL))
|
||||||
|
}
|
||||||
|
if cfg.K3s.Token != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("token: %s\n", cfg.K3s.Token))
|
||||||
|
}
|
||||||
|
b.WriteString(fmt.Sprintf("node-name: %s\n", cfg.Hostname))
|
||||||
|
if cfg.IP != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("node-ip: %s\n", cfg.IP))
|
||||||
|
}
|
||||||
|
if len(labelList) > 0 {
|
||||||
|
b.WriteString("node-label:\n")
|
||||||
|
for _, l := range labelList {
|
||||||
|
b.WriteString(fmt.Sprintf(" - %s\n", l))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(taints) > 0 {
|
||||||
|
b.WriteString("node-taint:\n")
|
||||||
|
for _, t := range taints {
|
||||||
|
b.WriteString(fmt.Sprintf(" - %s\n", t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloudInitUserData(cfg *config.NodeConfig, sec *secrets.NodeSecrets) string {
|
||||||
|
if cfg == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if sec != nil && sec.CloudInit != "" {
|
||||||
|
return sec.CloudInit
|
||||||
|
}
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteString("#cloud-config\n")
|
||||||
|
b.WriteString(fmt.Sprintf("hostname: %s\n", cfg.Hostname))
|
||||||
|
if len(cfg.SSHKeys) > 0 {
|
||||||
|
b.WriteString("ssh_authorized_keys:\n")
|
||||||
|
for _, k := range cfg.SSHKeys {
|
||||||
|
b.WriteString(fmt.Sprintf(" - %s\n", k))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstbootEnvContent(cfg *config.NodeConfig) string {
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_HOSTNAME=%s\n", shellQuote(cfg.Hostname)))
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_SSH_USER=%s\n", shellQuote(cfg.SSHUser)))
|
||||||
|
b.WriteString(fmt.Sprintf("METIS_K3S_VERSION=%s\n", shellQuote(cfg.K3s.Version)))
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func networkManagerConnectionContent(ip string) string {
|
||||||
|
gateway := ip
|
||||||
|
if lastDot := strings.LastIndex(gateway, "."); lastDot >= 0 {
|
||||||
|
gateway = gateway[:lastDot+1] + "1"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf(`[connection]
|
||||||
|
id=end0-static
|
||||||
|
type=ethernet
|
||||||
|
interface-name=end0
|
||||||
|
autoconnect=true
|
||||||
|
autoconnect-priority=100
|
||||||
|
|
||||||
|
[ethernet]
|
||||||
|
|
||||||
|
[ipv4]
|
||||||
|
method=manual
|
||||||
|
address1=%s/24,%s
|
||||||
|
dns=%s;
|
||||||
|
dns-search=titan;
|
||||||
|
may-fail=false
|
||||||
|
|
||||||
|
[ipv6]
|
||||||
|
method=ignore
|
||||||
|
|
||||||
|
[proxy]
|
||||||
|
`, ip, gateway, gateway)
|
||||||
|
}
|
||||||
|
|
||||||
|
func systemdNetworkContent(ip string) string {
|
||||||
|
gateway := ip
|
||||||
|
if lastDot := strings.LastIndex(gateway, "."); lastDot >= 0 {
|
||||||
|
gateway = gateway[:lastDot+1] + "1"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf(`[Match]
|
||||||
|
Name=end0
|
||||||
|
|
||||||
|
[Network]
|
||||||
|
Address=%s/24
|
||||||
|
Gateway=%s
|
||||||
|
DNS=%s
|
||||||
|
Domains=titan
|
||||||
|
DHCP=no
|
||||||
|
IPv6AcceptRA=no
|
||||||
|
LinkLocalAddressing=no
|
||||||
|
`, ip, gateway, gateway)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fstabAppendContent(cfg *config.NodeConfig) string {
|
||||||
|
var lines []string
|
||||||
|
for _, entry := range cfg.Fstab {
|
||||||
|
lines = append(lines, fmt.Sprintf(
|
||||||
|
"UUID=%s %s %s %s 0 0",
|
||||||
|
entry.UUID,
|
||||||
|
entry.Mountpoint,
|
||||||
|
entry.FS,
|
||||||
|
entry.Options,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
sort.Strings(lines)
|
||||||
|
return strings.Join(lines, "\n") + "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
func shellQuote(value string) string {
|
||||||
|
if value == "" {
|
||||||
|
return "''"
|
||||||
|
}
|
||||||
|
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchSecrets(hostname string) *secrets.NodeSecrets {
|
||||||
|
if os.Getenv("VAULT_ADDR") == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cli := secrets.NewFromEnv()
|
||||||
|
sec, err := cli.FetchNode(context.Background(), hostname)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return sec
|
||||||
|
}
|
||||||
|
|
||||||
|
func collectOverlays(class *inventory.NodeClass) ([]inject.FileSpec, error) {
|
||||||
|
var files []inject.FileSpec
|
||||||
|
if class == nil {
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
if class.BootOverlay != "" {
|
||||||
|
more, err := overlayFiles(class.BootOverlay, false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
files = append(files, more...)
|
||||||
|
}
|
||||||
|
if class.RootOverlay != "" {
|
||||||
|
more, err := overlayFiles(class.RootOverlay, true)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
files = append(files, more...)
|
||||||
|
}
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func overlayFiles(dir string, rootfs bool) ([]inject.FileSpec, error) {
|
||||||
|
var specs []inject.FileSpec
|
||||||
|
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if info.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rel, err := filepath.Rel(dir, path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
content, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
specs = append(specs, inject.FileSpec{
|
||||||
|
Path: rel,
|
||||||
|
Content: content,
|
||||||
|
Mode: info.Mode(),
|
||||||
|
RootFS: rootfs,
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return specs, err
|
||||||
|
}
|
||||||
125
pkg/plan/inject_test.go
Normal file
125
pkg/plan/inject_test.go
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"metis/pkg/config"
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
"metis/pkg/secrets"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuildFilesProducesK3sConfig(t *testing.T) {
|
||||||
|
cfg := &config.NodeConfig{
|
||||||
|
Hostname: "n1",
|
||||||
|
IP: "10.0.0.10",
|
||||||
|
SSHUser: "pi",
|
||||||
|
SSHKeys: []string{"ssh-rsa AAA"},
|
||||||
|
Fstab: []config.FstabEntry{
|
||||||
|
{
|
||||||
|
UUID: "disk-uuid",
|
||||||
|
Mountpoint: "/mnt/astreae",
|
||||||
|
FS: "ext4",
|
||||||
|
Options: "defaults,nofail",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Labels: map[string]string{"role": "worker", "zone": "a"},
|
||||||
|
Taints: []string{"gpu=true:NoSchedule"},
|
||||||
|
K3s: config.K3sConfig{
|
||||||
|
URL: "https://server:6443",
|
||||||
|
Token: "secret",
|
||||||
|
Version: "v1.31.5+k3s1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
files, err := buildFiles(cfg, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("buildFiles: %v", err)
|
||||||
|
}
|
||||||
|
pathMap := map[string]string{}
|
||||||
|
for _, f := range files {
|
||||||
|
pathMap[f.Path] = string(f.Content)
|
||||||
|
}
|
||||||
|
k3s, ok := pathMap["etc/rancher/k3s/config.yaml"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("missing k3s config")
|
||||||
|
}
|
||||||
|
if !strings.Contains(k3s, "server: https://server:6443") || !strings.Contains(k3s, "node-name: n1") {
|
||||||
|
t.Fatalf("unexpected k3s config: %s", k3s)
|
||||||
|
}
|
||||||
|
hostFile, ok := pathMap["etc/hostname"]
|
||||||
|
if !ok || strings.TrimSpace(hostFile) != "n1" {
|
||||||
|
t.Fatalf("hostname file missing/incorrect: %q", hostFile)
|
||||||
|
}
|
||||||
|
auth, ok := pathMap["home/pi/.ssh/authorized_keys"]
|
||||||
|
if !ok || !strings.Contains(auth, "ssh-rsa AAA") {
|
||||||
|
t.Fatalf("authorized_keys missing/incorrect: %s", auth)
|
||||||
|
}
|
||||||
|
firstboot, ok := pathMap["etc/metis/firstboot.env"]
|
||||||
|
if !ok || !strings.Contains(firstboot, "METIS_K3S_VERSION='v1.31.5+k3s1'") {
|
||||||
|
t.Fatalf("firstboot env missing/incorrect: %s", firstboot)
|
||||||
|
}
|
||||||
|
network, ok := pathMap["etc/NetworkManager/system-connections/end0-static.nmconnection"]
|
||||||
|
if !ok || !strings.Contains(network, "address1=10.0.0.10/24,10.0.0.1") {
|
||||||
|
t.Fatalf("networkmanager config missing/incorrect: %s", network)
|
||||||
|
}
|
||||||
|
networkd, ok := pathMap["etc/systemd/network/10-end0-static.network"]
|
||||||
|
if !ok || !strings.Contains(networkd, "Address=10.0.0.10/24") || !strings.Contains(networkd, "Gateway=10.0.0.1") {
|
||||||
|
t.Fatalf("systemd-networkd config missing/incorrect: %s", networkd)
|
||||||
|
}
|
||||||
|
fstab, ok := pathMap["etc/metis/fstab.append"]
|
||||||
|
if !ok || !strings.Contains(fstab, "UUID=disk-uuid /mnt/astreae ext4 defaults,nofail 0 0") {
|
||||||
|
t.Fatalf("fstab append missing/incorrect: %s", fstab)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOverlayFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
bootDir := filepath.Join(dir, "boot")
|
||||||
|
rootDir := filepath.Join(dir, "root")
|
||||||
|
if err := os.MkdirAll(filepath.Join(bootDir, "over"), 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Join(rootDir, "etc"), 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(bootDir, "over", "cmdline.txt"), []byte("console=tty1"), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(rootDir, "etc", "issue"), []byte("hello"), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
class := &inventory.NodeClass{
|
||||||
|
BootOverlay: bootDir,
|
||||||
|
RootOverlay: rootDir,
|
||||||
|
}
|
||||||
|
files, err := collectOverlays(class)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("collectOverlays: %v", err)
|
||||||
|
}
|
||||||
|
if len(files) != 2 {
|
||||||
|
t.Fatalf("expected 2 files, got %d", len(files))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSecretsWrite(t *testing.T) {
|
||||||
|
cfg := &config.NodeConfig{
|
||||||
|
Hostname: "n1",
|
||||||
|
IP: "10.0.0.1",
|
||||||
|
}
|
||||||
|
sec := &secrets.NodeSecrets{K3sToken: "tok", SSHPassword: "pw", Extra: map[string]string{"foo": "bar"}}
|
||||||
|
files, err := buildFiles(cfg, sec)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("buildFiles: %v", err)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, f := range files {
|
||||||
|
if f.Path == "etc/metis/secrets.json" && f.RootFS {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("secrets file not written")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -2,6 +2,7 @@ package plan
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -41,7 +42,15 @@ func Build(inv *inventory.Inventory, nodeName, device, cacheDir string) (*Plan,
|
|||||||
actions = append(actions, Action{Type: "verify", Detail: fmt.Sprintf("Verify checksum %s", class.Checksum)})
|
actions = append(actions, Action{Type: "verify", Detail: fmt.Sprintf("Verify checksum %s", class.Checksum)})
|
||||||
}
|
}
|
||||||
actions = append(actions, Action{Type: "write", Detail: fmt.Sprintf("Write image to %s", device), Command: fmt.Sprintf("dd if=%s of=%s bs=4M status=progress conv=fsync", cacheImage, device)})
|
actions = append(actions, Action{Type: "write", Detail: fmt.Sprintf("Write image to %s", device), Command: fmt.Sprintf("dd if=%s of=%s bs=4M status=progress conv=fsync", cacheImage, device)})
|
||||||
actions = append(actions, Action{Type: "inject", Detail: "Inject hostname/network/k3s config into boot or rootfs"})
|
if boot := os.Getenv("METIS_BOOT_PATH"); boot != "" {
|
||||||
|
actions = append(actions, Action{Type: "inject", Detail: fmt.Sprintf("Inject config into boot mount %s", boot)})
|
||||||
|
}
|
||||||
|
if root := os.Getenv("METIS_ROOT_PATH"); root != "" {
|
||||||
|
actions = append(actions, Action{Type: "inject", Detail: fmt.Sprintf("Inject config into root mount %s", root)})
|
||||||
|
}
|
||||||
|
if os.Getenv("METIS_BOOT_PATH") == "" && os.Getenv("METIS_ROOT_PATH") == "" {
|
||||||
|
actions = append(actions, Action{Type: "inject", Detail: "Inject hostname/network/k3s config (requires mounted boot/root; skipped if unset)"})
|
||||||
|
}
|
||||||
actions = append(actions, Action{Type: "finalize", Detail: fmt.Sprintf("Ready to insert SD for %s", node.Hostname)})
|
actions = append(actions, Action{Type: "finalize", Detail: fmt.Sprintf("Ready to insert SD for %s", node.Hostname)})
|
||||||
|
|
||||||
return &Plan{
|
return &Plan{
|
||||||
|
|||||||
39
pkg/plan/plan_env_test.go
Normal file
39
pkg/plan/plan_env_test.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
package plan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuildIncludesInjectWhenEnvSet(t *testing.T) {
|
||||||
|
defer os.Unsetenv("METIS_BOOT_PATH")
|
||||||
|
os.Setenv("METIS_BOOT_PATH", "/mnt/boot")
|
||||||
|
inv := &inventory.Inventory{
|
||||||
|
Classes: []inventory.NodeClass{{
|
||||||
|
Name: "c1",
|
||||||
|
Image: "file:///tmp/dummy",
|
||||||
|
}},
|
||||||
|
Nodes: []inventory.NodeSpec{{
|
||||||
|
Name: "n1",
|
||||||
|
Class: "c1",
|
||||||
|
Hostname: "n1",
|
||||||
|
IP: "10.0.0.1",
|
||||||
|
K3sRole: "agent",
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
p, err := Build(inv, "n1", "/dev/sdz", "/tmp/cache")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("build: %v", err)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, a := range p.Actions {
|
||||||
|
if a.Type == "inject" {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("expected inject action when METIS_BOOT_PATH set")
|
||||||
|
}
|
||||||
|
}
|
||||||
125
pkg/secrets/vault.go
Normal file
125
pkg/secrets/vault.go
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
package secrets
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NodeSecrets holds per-node secret material to inject at burn time.
|
||||||
|
// These should live in Vault at secret/data/nodes/<hostname>.
|
||||||
|
type NodeSecrets struct {
|
||||||
|
SSHPassword string `json:"ssh_password,omitempty"`
|
||||||
|
K3sToken string `json:"k3s_token,omitempty"`
|
||||||
|
CloudInit string `json:"cloud_init,omitempty"`
|
||||||
|
Extra map[string]string `json:"extra,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client fetches node secrets from Vault using either a token or AppRole.
|
||||||
|
type Client struct {
|
||||||
|
Addr string
|
||||||
|
Token string
|
||||||
|
RoleID string
|
||||||
|
SecretID string
|
||||||
|
Client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFromEnv builds a client from VAULT_ADDR, VAULT_TOKEN, VAULT_ROLE_ID, VAULT_SECRET_ID.
|
||||||
|
func NewFromEnv() *Client {
|
||||||
|
return &Client{
|
||||||
|
Addr: os.Getenv("VAULT_ADDR"),
|
||||||
|
Token: os.Getenv("VAULT_TOKEN"),
|
||||||
|
RoleID: os.Getenv("VAULT_ROLE_ID"),
|
||||||
|
SecretID: os.Getenv("VAULT_SECRET_ID"),
|
||||||
|
Client: &http.Client{
|
||||||
|
Timeout: 10 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoginIfNeeded performs AppRole login if no token is present.
|
||||||
|
func (c *Client) LoginIfNeeded(ctx context.Context) error {
|
||||||
|
if c.Token != "" || c.RoleID == "" || c.SecretID == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
body := map[string]string{"role_id": c.RoleID, "secret_id": c.SecretID}
|
||||||
|
var buf bytes.Buffer
|
||||||
|
if err := json.NewEncoder(&buf).Encode(body); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/v1/auth/approle/login", strings.TrimSuffix(c.Addr, "/")), &buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
resp, err := c.httpClient().Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("approle login failed: %s", resp.Status)
|
||||||
|
}
|
||||||
|
var r struct {
|
||||||
|
Auth struct {
|
||||||
|
ClientToken string `json:"client_token"`
|
||||||
|
} `json:"auth"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if r.Auth.ClientToken == "" {
|
||||||
|
return fmt.Errorf("approle login returned empty token")
|
||||||
|
}
|
||||||
|
c.Token = r.Auth.ClientToken
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchNode pulls secret/data/nodes/<hostname>.
|
||||||
|
func (c *Client) FetchNode(ctx context.Context, hostname string) (*NodeSecrets, error) {
|
||||||
|
if err := c.LoginIfNeeded(ctx); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
url := fmt.Sprintf("%s/v1/secret/data/nodes/%s", strings.TrimSuffix(c.Addr, "/"), hostname)
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if c.Token != "" {
|
||||||
|
req.Header.Set("X-Vault-Token", c.Token)
|
||||||
|
}
|
||||||
|
resp, err := c.httpClient().Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode == http.StatusNotFound {
|
||||||
|
return &NodeSecrets{}, nil
|
||||||
|
}
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
b, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("vault fetch %s: %s: %s", hostname, resp.Status, string(b))
|
||||||
|
}
|
||||||
|
var r struct {
|
||||||
|
Data struct {
|
||||||
|
Data NodeSecrets `json:"data"`
|
||||||
|
} `json:"data"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&r); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &r.Data.Data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) httpClient() *http.Client {
|
||||||
|
if c.Client != nil {
|
||||||
|
return c.Client
|
||||||
|
}
|
||||||
|
return http.DefaultClient
|
||||||
|
}
|
||||||
76
pkg/secrets/vault_test.go
Normal file
76
pkg/secrets/vault_test.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
package secrets
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFetchNodeReturnsData(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/v1/secret/data/nodes/n1":
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"data": map[string]any{
|
||||||
|
"data": map[string]any{
|
||||||
|
"ssh_password": "p1",
|
||||||
|
"k3s_token": "t1",
|
||||||
|
"cloud_init": "ci",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
default:
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := &Client{Addr: srv.URL, Token: "tok"}
|
||||||
|
sec, err := c.FetchNode(context.Background(), "n1")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("fetch: %v", err)
|
||||||
|
}
|
||||||
|
if sec.SSHPassword != "p1" || sec.K3sToken != "t1" || sec.CloudInit != "ci" {
|
||||||
|
t.Fatalf("unexpected secrets: %+v", sec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApproRoleLogin(t *testing.T) {
|
||||||
|
loginCalled := false
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/v1/auth/approle/login":
|
||||||
|
loginCalled = true
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"auth": map[string]any{
|
||||||
|
"client_token": "newtoken",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
case "/v1/secret/data/nodes/n1":
|
||||||
|
if r.Header.Get("X-Vault-Token") != "newtoken" {
|
||||||
|
t.Fatalf("missing token after approle login")
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"data": map[string]any{
|
||||||
|
"data": map[string]any{},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
default:
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := &Client{Addr: srv.URL, RoleID: "r", SecretID: "s", Client: srv.Client()}
|
||||||
|
if _, err := c.FetchNode(context.Background(), "n1"); err != nil {
|
||||||
|
t.Fatalf("fetch with approle: %v", err)
|
||||||
|
}
|
||||||
|
if !loginCalled {
|
||||||
|
t.Fatalf("approle login not called")
|
||||||
|
}
|
||||||
|
}
|
||||||
88
pkg/sentinel/collector.go
Normal file
88
pkg/sentinel/collector.go
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
package sentinel
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Snapshot captures host-level facts.
|
||||||
|
type Snapshot struct {
|
||||||
|
Hostname string `json:"hostname,omitempty"`
|
||||||
|
Kernel string `json:"kernel,omitempty"`
|
||||||
|
OSImage string `json:"os_image,omitempty"`
|
||||||
|
K3sVersion string `json:"k3s_version,omitempty"`
|
||||||
|
Containerd string `json:"containerd,omitempty"`
|
||||||
|
PackageSample map[string]string `json:"package_sample,omitempty"` // small subset to detect drift
|
||||||
|
DropInsSample map[string]string `json:"dropins_sample,omitempty"` // path->content hash/sample
|
||||||
|
Notes string `json:"notes,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect gathers a minimal set of facts; intended to run inside a DaemonSet pod with host mounts.
|
||||||
|
func Collect() *Snapshot {
|
||||||
|
return &Snapshot{
|
||||||
|
Hostname: runAndTrim("hostname"),
|
||||||
|
Kernel: runAndTrim("uname", "-r"),
|
||||||
|
OSImage: osRelease(),
|
||||||
|
K3sVersion: runAndTrim("k3s", "version"),
|
||||||
|
Containerd: runAndTrim("containerd", "--version"),
|
||||||
|
PackageSample: pkgSample(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAndTrim(cmd string, args ...string) string {
|
||||||
|
out, err := commandOutput(cmd, args...)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
func osRelease() string {
|
||||||
|
out, err := commandOutput("cat", "/etc/os-release")
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
if strings.HasPrefix(line, "PRETTY_NAME=") {
|
||||||
|
return strings.Trim(line[len("PRETTY_NAME="):], "\"")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// pkgSample grabs a tiny subset of package versions to detect drift without collecting everything.
|
||||||
|
func pkgSample() map[string]string {
|
||||||
|
names := []string{"containerd", "k3s", "nvidia-container-toolkit", "linux-image-raspi"}
|
||||||
|
result := map[string]string{}
|
||||||
|
for _, n := range names {
|
||||||
|
v := pkgVersion(n)
|
||||||
|
if v != "" {
|
||||||
|
result[n] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func pkgVersion(name string) string {
|
||||||
|
// Try dpkg-query first.
|
||||||
|
out, err := commandOutput("dpkg-query", "-W", "-f", "${Version}", name)
|
||||||
|
if err == nil && len(out) > 0 {
|
||||||
|
return strings.TrimSpace(string(out))
|
||||||
|
}
|
||||||
|
// Fallback rpm.
|
||||||
|
out, err = commandOutput("rpm", "-q", "--qf", "%{VERSION}-%{RELEASE}", name)
|
||||||
|
if err == nil && len(out) > 0 {
|
||||||
|
return strings.TrimSpace(string(out))
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func commandOutput(cmd string, args ...string) ([]byte, error) {
|
||||||
|
if os.Getenv("METIS_SENTINEL_NSENTER") == "1" {
|
||||||
|
nsenterArgs := []string{"-t", "1", "-m", "-u", "-n", "-i", "-p", "--", cmd}
|
||||||
|
nsenterArgs = append(nsenterArgs, args...)
|
||||||
|
return exec.Command("nsenter", nsenterArgs...).Output()
|
||||||
|
}
|
||||||
|
return exec.Command(cmd, args...).Output()
|
||||||
|
}
|
||||||
795
pkg/service/app.go
Normal file
795
pkg/service/app.go
Normal file
@ -0,0 +1,795 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
|
"crypto/x509"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/facts"
|
||||||
|
"metis/pkg/image"
|
||||||
|
"metis/pkg/inventory"
|
||||||
|
"metis/pkg/plan"
|
||||||
|
"metis/pkg/sentinel"
|
||||||
|
"metis/pkg/writer"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobStatus string
|
||||||
|
|
||||||
|
const (
|
||||||
|
JobQueued JobStatus = "queued"
|
||||||
|
JobRunning JobStatus = "running"
|
||||||
|
JobDone JobStatus = "done"
|
||||||
|
JobError JobStatus = "error"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Device describes a flashable block device.
|
||||||
|
type Device struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
Model string `json:"model,omitempty"`
|
||||||
|
Transport string `json:"transport,omitempty"`
|
||||||
|
Type string `json:"type,omitempty"`
|
||||||
|
Removable bool `json:"removable"`
|
||||||
|
Hotplug bool `json:"hotplug"`
|
||||||
|
SizeBytes int64 `json:"size_bytes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Job is a long-running Metis action visible in the UI.
|
||||||
|
type Job struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Kind string `json:"kind"`
|
||||||
|
Node string `json:"node,omitempty"`
|
||||||
|
Host string `json:"host,omitempty"`
|
||||||
|
Device string `json:"device,omitempty"`
|
||||||
|
Status JobStatus `json:"status"`
|
||||||
|
Stage string `json:"stage,omitempty"`
|
||||||
|
Message string `json:"message,omitempty"`
|
||||||
|
Artifact string `json:"artifact,omitempty"`
|
||||||
|
ProgressPct float64 `json:"progress_pct"`
|
||||||
|
Written int64 `json:"written_bytes,omitempty"`
|
||||||
|
Total int64 `json:"total_bytes,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
StartedAt time.Time `json:"started_at"`
|
||||||
|
FinishedAt time.Time `json:"finished_at,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Event is a user-facing activity item for recent changes and runs.
|
||||||
|
type Event struct {
|
||||||
|
Time time.Time `json:"time"`
|
||||||
|
Kind string `json:"kind"`
|
||||||
|
Summary string `json:"summary"`
|
||||||
|
Details map[string]any `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnapshotRecord stores the last fact snapshot pushed by a node sentinel.
|
||||||
|
type SnapshotRecord struct {
|
||||||
|
Node string `json:"node"`
|
||||||
|
CollectedAt time.Time `json:"collected_at"`
|
||||||
|
Snapshot sentinel.Snapshot `json:"snapshot"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PageState is the UI/API view model.
|
||||||
|
type PageState struct {
|
||||||
|
LocalHost string `json:"local_host"`
|
||||||
|
DefaultFlashHost string `json:"default_flash_host"`
|
||||||
|
FlashHosts []string `json:"flash_hosts"`
|
||||||
|
Nodes []inventory.NodeSpec `json:"nodes"`
|
||||||
|
Jobs []*Job `json:"jobs"`
|
||||||
|
Devices []Device `json:"devices"`
|
||||||
|
Events []Event `json:"events"`
|
||||||
|
Snapshots []SnapshotRecord `json:"snapshots"`
|
||||||
|
Targets map[string]facts.Targets `json:"targets"`
|
||||||
|
Artifacts map[string]ArtifactSummary `json:"artifacts"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ArtifactSummary describes the latest built image for a node.
|
||||||
|
type ArtifactSummary struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
UpdatedAt time.Time `json:"updated_at"`
|
||||||
|
SizeBytes int64 `json:"size_bytes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// App coordinates builds, flashes, sentinel snapshots, and the web UI state.
|
||||||
|
type App struct {
|
||||||
|
settings Settings
|
||||||
|
inventory *inventory.Inventory
|
||||||
|
metrics *Metrics
|
||||||
|
|
||||||
|
mu sync.RWMutex
|
||||||
|
jobs map[string]*Job
|
||||||
|
snapshots map[string]SnapshotRecord
|
||||||
|
targets map[string]facts.Targets
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewApp creates a Metis service app instance.
|
||||||
|
func NewApp(settings Settings) (*App, error) {
|
||||||
|
if err := os.MkdirAll(settings.CacheDir, 0o755); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(settings.ArtifactDir, 0o755); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(settings.HistoryPath), 0o755); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
inv, err := inventory.Load(settings.InventoryPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
app := &App{
|
||||||
|
settings: settings,
|
||||||
|
inventory: inv,
|
||||||
|
metrics: NewMetrics(),
|
||||||
|
jobs: map[string]*Job{},
|
||||||
|
snapshots: map[string]SnapshotRecord{},
|
||||||
|
targets: map[string]facts.Targets{},
|
||||||
|
}
|
||||||
|
_ = app.loadSnapshots()
|
||||||
|
_ = app.loadTargets()
|
||||||
|
return app, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// State returns the current UI/API snapshot.
|
||||||
|
func (a *App) State(deviceHost string) PageState {
|
||||||
|
a.mu.RLock()
|
||||||
|
jobs := make([]*Job, 0, len(a.jobs))
|
||||||
|
for _, job := range a.jobs {
|
||||||
|
copyJob := *job
|
||||||
|
jobs = append(jobs, ©Job)
|
||||||
|
}
|
||||||
|
sort.Slice(jobs, func(i, j int) bool {
|
||||||
|
return jobs[i].StartedAt.After(jobs[j].StartedAt)
|
||||||
|
})
|
||||||
|
|
||||||
|
snaps := make([]SnapshotRecord, 0, len(a.snapshots))
|
||||||
|
for _, snap := range a.snapshots {
|
||||||
|
snaps = append(snaps, snap)
|
||||||
|
}
|
||||||
|
aTargets := map[string]facts.Targets{}
|
||||||
|
for key, value := range a.targets {
|
||||||
|
aTargets[key] = value
|
||||||
|
}
|
||||||
|
a.mu.RUnlock()
|
||||||
|
|
||||||
|
sort.Slice(snaps, func(i, j int) bool {
|
||||||
|
return snaps[i].Node < snaps[j].Node
|
||||||
|
})
|
||||||
|
|
||||||
|
devices, _ := a.ListDevices(deviceHost)
|
||||||
|
return PageState{
|
||||||
|
LocalHost: a.settings.LocalHost,
|
||||||
|
DefaultFlashHost: a.settings.DefaultFlashHost,
|
||||||
|
FlashHosts: append([]string{}, a.settings.FlashHosts...),
|
||||||
|
Nodes: append([]inventory.NodeSpec{}, a.inventory.Nodes...),
|
||||||
|
Jobs: jobs,
|
||||||
|
Devices: devices,
|
||||||
|
Events: a.recentEvents(40),
|
||||||
|
Snapshots: snaps,
|
||||||
|
Targets: aTargets,
|
||||||
|
Artifacts: a.artifacts(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build starts a background image build for a node.
|
||||||
|
func (a *App) Build(node string) (*Job, error) {
|
||||||
|
if _, _, err := a.inventory.FindNode(node); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
job := a.newJob("build", node, "", "")
|
||||||
|
go a.runBuild(job, false)
|
||||||
|
return job, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace starts a background build+flash workflow for a node.
|
||||||
|
func (a *App) Replace(node, host, device string) (*Job, error) {
|
||||||
|
if host == "" {
|
||||||
|
host = a.settings.DefaultFlashHost
|
||||||
|
}
|
||||||
|
if host != a.settings.LocalHost && host != a.settings.DefaultFlashHost {
|
||||||
|
return nil, fmt.Errorf("flash host %s is not available on this Metis instance", host)
|
||||||
|
}
|
||||||
|
if _, _, err := a.inventory.FindNode(node); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if _, err := a.ensureDevice(device); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
job := a.newJob("replace", node, host, device)
|
||||||
|
go a.runBuild(job, true)
|
||||||
|
return job, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// StoreSnapshot records a pushed sentinel snapshot.
|
||||||
|
func (a *App) StoreSnapshot(record SnapshotRecord) error {
|
||||||
|
if record.Node == "" {
|
||||||
|
record.Node = record.Snapshot.Hostname
|
||||||
|
}
|
||||||
|
if record.CollectedAt.IsZero() {
|
||||||
|
record.CollectedAt = time.Now().UTC()
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(record.Node) == "" {
|
||||||
|
return fmt.Errorf("snapshot node required")
|
||||||
|
}
|
||||||
|
a.mu.Lock()
|
||||||
|
a.snapshots[record.Node] = record
|
||||||
|
a.mu.Unlock()
|
||||||
|
if err := a.persistSnapshots(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
a.metrics.RecordSnapshot(record.Node, "ok", record.CollectedAt)
|
||||||
|
a.appendEvent(Event{
|
||||||
|
Time: record.CollectedAt,
|
||||||
|
Kind: "sentinel.snapshot",
|
||||||
|
Summary: fmt.Sprintf("Captured sentinel snapshot for %s", record.Node),
|
||||||
|
Details: map[string]any{
|
||||||
|
"node": record.Node,
|
||||||
|
"kernel": record.Snapshot.Kernel,
|
||||||
|
"k3s_version": record.Snapshot.K3sVersion,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WatchSentinel recomputes class targets and logs meaningful drift.
|
||||||
|
func (a *App) WatchSentinel() (*Event, error) {
|
||||||
|
a.mu.RLock()
|
||||||
|
snaps := make([]facts.Snapshot, 0, len(a.snapshots))
|
||||||
|
for _, snap := range a.snapshots {
|
||||||
|
snaps = append(snaps, facts.Snapshot{
|
||||||
|
Hostname: snap.Node,
|
||||||
|
Kernel: snap.Snapshot.Kernel,
|
||||||
|
OSImage: snap.Snapshot.OSImage,
|
||||||
|
K3sVersion: firstLine(snap.Snapshot.K3sVersion),
|
||||||
|
Containerd: firstLine(snap.Snapshot.Containerd),
|
||||||
|
PackageSample: snap.Snapshot.PackageSample,
|
||||||
|
DropInsSample: snap.Snapshot.DropInsSample,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
prevTargets := map[string]facts.Targets{}
|
||||||
|
for key, value := range a.targets {
|
||||||
|
prevTargets[key] = value
|
||||||
|
}
|
||||||
|
a.mu.RUnlock()
|
||||||
|
|
||||||
|
nextTargets := facts.RecommendTargets(a.inventory, snaps)
|
||||||
|
changes := diffTargets(prevTargets, nextTargets)
|
||||||
|
|
||||||
|
a.mu.Lock()
|
||||||
|
a.targets = nextTargets
|
||||||
|
a.mu.Unlock()
|
||||||
|
if err := a.persistTargets(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
event := &Event{
|
||||||
|
Time: time.Now().UTC(),
|
||||||
|
Kind: "sentinel.watch",
|
||||||
|
Summary: "Metis sentinel watch completed with no template changes",
|
||||||
|
Details: map[string]any{
|
||||||
|
"classes": len(nextTargets),
|
||||||
|
"changes": 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if len(changes) > 0 {
|
||||||
|
event.Summary = fmt.Sprintf("Metis sentinel watch detected %d template change(s)", len(changes))
|
||||||
|
event.Details["changes"] = changes
|
||||||
|
}
|
||||||
|
a.appendEvent(*event)
|
||||||
|
a.metrics.RecordWatch("ok")
|
||||||
|
a.metrics.SetDriftTargets(nextTargets, len(changes))
|
||||||
|
return event, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListDevices returns locally attached removable media that are safe candidates for flashing.
|
||||||
|
func (a *App) ListDevices(host string) ([]Device, error) {
|
||||||
|
if host == "" {
|
||||||
|
host = a.settings.DefaultFlashHost
|
||||||
|
}
|
||||||
|
if host != a.settings.LocalHost && host != a.settings.DefaultFlashHost {
|
||||||
|
return nil, fmt.Errorf("flash host %s is not attached to this Metis instance", host)
|
||||||
|
}
|
||||||
|
cmd := exec.Command("lsblk", "-J", "-b", "-o", "NAME,PATH,RM,HOTPLUG,SIZE,MODEL,TRAN,TYPE")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var payload struct {
|
||||||
|
Blockdevices []struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
RM bool `json:"rm"`
|
||||||
|
Hotplug bool `json:"hotplug"`
|
||||||
|
Size any `json:"size"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
Tran string `json:"tran"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
} `json:"blockdevices"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(out, &payload); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
devices := make([]Device, 0)
|
||||||
|
for _, dev := range payload.Blockdevices {
|
||||||
|
if dev.Type != "disk" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
size := int64(0)
|
||||||
|
switch value := dev.Size.(type) {
|
||||||
|
case string:
|
||||||
|
size, _ = strconv.ParseInt(value, 10, 64)
|
||||||
|
case float64:
|
||||||
|
size = int64(value)
|
||||||
|
}
|
||||||
|
if size <= 0 || size > a.settings.MaxDeviceBytes {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dev.Tran != "usb" && !dev.RM && !dev.Hotplug {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devices = append(devices, Device{
|
||||||
|
Name: dev.Name,
|
||||||
|
Path: dev.Path,
|
||||||
|
Model: strings.TrimSpace(dev.Model),
|
||||||
|
Transport: dev.Tran,
|
||||||
|
Type: dev.Type,
|
||||||
|
Removable: dev.RM,
|
||||||
|
Hotplug: dev.Hotplug,
|
||||||
|
SizeBytes: size,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Slice(devices, func(i, j int) bool { return devices[i].Path < devices[j].Path })
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) runBuild(job *Job, flash bool) {
|
||||||
|
a.setJob(job.ID, func(j *Job) {
|
||||||
|
j.Status = JobRunning
|
||||||
|
j.Stage = "download"
|
||||||
|
j.Message = "Fetching base image"
|
||||||
|
j.ProgressPct = 5
|
||||||
|
})
|
||||||
|
output := a.artifactPath(job.Node)
|
||||||
|
cacheDir := a.settings.CacheDir
|
||||||
|
|
||||||
|
planData, err := plan.Build(a.inventory, job.Node, output, cacheDir)
|
||||||
|
if err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_, class, err := a.inventory.FindNode(job.Node)
|
||||||
|
if err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cacheImage := filepath.Join(cacheDir, filepath.Base(planData.Image))
|
||||||
|
if err := image.Download(planData.Image, cacheImage); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.setJob(job.ID, func(j *Job) {
|
||||||
|
j.Stage = "verify"
|
||||||
|
j.Message = "Verifying base image checksum"
|
||||||
|
j.ProgressPct = 18
|
||||||
|
})
|
||||||
|
if err := image.VerifyChecksum(cacheImage, class.Checksum); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.setJob(job.ID, func(j *Job) {
|
||||||
|
j.Stage = "copy"
|
||||||
|
j.Message = "Copying base image into artifact"
|
||||||
|
j.ProgressPct = 35
|
||||||
|
})
|
||||||
|
if err := writer.WriteImage(context.Background(), cacheImage, output); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
files, err := plan.Files(a.inventory, job.Node)
|
||||||
|
if err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.setJob(job.ID, func(j *Job) {
|
||||||
|
j.Stage = "inject"
|
||||||
|
j.Message = "Injecting node-specific rootfs config"
|
||||||
|
j.ProgressPct = 70
|
||||||
|
})
|
||||||
|
if err := image.InjectRootFS(output, files); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordBuild(job.Node, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.metrics.RecordBuild(job.Node, "ok")
|
||||||
|
a.appendEvent(Event{
|
||||||
|
Time: time.Now().UTC(),
|
||||||
|
Kind: "image.build",
|
||||||
|
Summary: fmt.Sprintf("Built replacement image for %s", job.Node),
|
||||||
|
Details: map[string]any{"node": job.Node, "artifact": output},
|
||||||
|
})
|
||||||
|
|
||||||
|
if !flash {
|
||||||
|
a.completeJob(job.ID, func(j *Job) {
|
||||||
|
j.Stage = "complete"
|
||||||
|
j.Message = "Image build complete"
|
||||||
|
j.ProgressPct = 100
|
||||||
|
j.Artifact = output
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
a.setJob(job.ID, func(j *Job) {
|
||||||
|
j.Stage = "preflight"
|
||||||
|
j.Message = "Validating device and deleting stale node object"
|
||||||
|
j.ProgressPct = 78
|
||||||
|
j.Artifact = output
|
||||||
|
})
|
||||||
|
if _, err := a.ensureDevice(job.Device); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordFlash(job.Node, job.Host, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := deleteNodeObject(job.Node); err != nil {
|
||||||
|
a.appendEvent(Event{
|
||||||
|
Time: time.Now().UTC(),
|
||||||
|
Kind: "node.delete.warning",
|
||||||
|
Summary: fmt.Sprintf("Could not delete stale Kubernetes node object for %s", job.Node),
|
||||||
|
Details: map[string]any{"node": job.Node, "error": err.Error()},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if err := a.flashArtifact(job.ID, output); err != nil {
|
||||||
|
a.failJob(job.ID, err)
|
||||||
|
a.metrics.RecordFlash(job.Node, job.Host, "error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.metrics.RecordFlash(job.Node, job.Host, "ok")
|
||||||
|
a.appendEvent(Event{
|
||||||
|
Time: time.Now().UTC(),
|
||||||
|
Kind: "image.flash",
|
||||||
|
Summary: fmt.Sprintf("Flashed %s image to %s on %s", job.Node, job.Device, job.Host),
|
||||||
|
Details: map[string]any{"node": job.Node, "device": job.Device, "host": job.Host},
|
||||||
|
})
|
||||||
|
a.completeJob(job.ID, func(j *Job) {
|
||||||
|
j.Stage = "complete"
|
||||||
|
j.Message = fmt.Sprintf("Flash complete. Move the card into %s and power-cycle it.", j.Node)
|
||||||
|
j.ProgressPct = 100
|
||||||
|
j.Artifact = output
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) flashArtifact(jobID, artifact string) error {
|
||||||
|
info, err := os.Stat(artifact)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
a.setJob(jobID, func(j *Job) {
|
||||||
|
j.Stage = "flash"
|
||||||
|
j.Message = "Writing image to removable media"
|
||||||
|
j.ProgressPct = 82
|
||||||
|
j.Total = info.Size()
|
||||||
|
})
|
||||||
|
err = writer.WriteImageWithProgress(context.Background(), artifact, a.job(jobID).Device, func(written, total int64) {
|
||||||
|
pct := 82.0
|
||||||
|
if total > 0 {
|
||||||
|
pct = 82.0 + (float64(written)/float64(total))*17.0
|
||||||
|
}
|
||||||
|
a.setJob(jobID, func(j *Job) {
|
||||||
|
j.Written = written
|
||||||
|
j.Total = total
|
||||||
|
j.ProgressPct = pct
|
||||||
|
j.Message = fmt.Sprintf("Flashing %s of %s", humanBytes(written), humanBytes(total))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) ensureDevice(path string) (*Device, error) {
|
||||||
|
devices, err := a.ListDevices(a.settings.DefaultFlashHost)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, device := range devices {
|
||||||
|
if device.Path == path {
|
||||||
|
return &device, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("device %s is not a current removable flash candidate", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) newJob(kind, node, host, device string) *Job {
|
||||||
|
job := &Job{
|
||||||
|
ID: fmt.Sprintf("%d", time.Now().UTC().UnixNano()),
|
||||||
|
Kind: kind,
|
||||||
|
Node: node,
|
||||||
|
Host: host,
|
||||||
|
Device: device,
|
||||||
|
Status: JobQueued,
|
||||||
|
ProgressPct: 0,
|
||||||
|
StartedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
a.mu.Lock()
|
||||||
|
a.jobs[job.ID] = job
|
||||||
|
a.mu.Unlock()
|
||||||
|
return job
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) job(id string) *Job {
|
||||||
|
a.mu.RLock()
|
||||||
|
defer a.mu.RUnlock()
|
||||||
|
return a.jobs[id]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) setJob(id string, update func(*Job)) {
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
job := a.jobs[id]
|
||||||
|
if job == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
update(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) failJob(id string, err error) {
|
||||||
|
a.completeJob(id, func(j *Job) {
|
||||||
|
j.Status = JobError
|
||||||
|
j.Error = err.Error()
|
||||||
|
j.Message = err.Error()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) completeJob(id string, update func(*Job)) {
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
job := a.jobs[id]
|
||||||
|
if job == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
update(job)
|
||||||
|
if job.Status != JobError {
|
||||||
|
job.Status = JobDone
|
||||||
|
}
|
||||||
|
job.FinishedAt = time.Now().UTC()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) appendEvent(event Event) {
|
||||||
|
line, err := json.Marshal(event)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f, err := os.OpenFile(a.settings.HistoryPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
_, _ = f.Write(append(line, '\n'))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) recentEvents(limit int) []Event {
|
||||||
|
f, err := os.Open(a.settings.HistoryPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
events := make([]Event, 0, limit)
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
for scanner.Scan() {
|
||||||
|
var event Event
|
||||||
|
if err := json.Unmarshal(scanner.Bytes(), &event); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
events = append(events, event)
|
||||||
|
}
|
||||||
|
if len(events) > limit {
|
||||||
|
events = events[len(events)-limit:]
|
||||||
|
}
|
||||||
|
for i, j := 0, len(events)-1; i < j; i, j = i+1, j-1 {
|
||||||
|
events[i], events[j] = events[j], events[i]
|
||||||
|
}
|
||||||
|
return events
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) artifacts() map[string]ArtifactSummary {
|
||||||
|
result := map[string]ArtifactSummary{}
|
||||||
|
for _, node := range a.inventory.Nodes {
|
||||||
|
path := a.artifactPath(node.Name)
|
||||||
|
info, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[node.Name] = ArtifactSummary{
|
||||||
|
Path: path,
|
||||||
|
UpdatedAt: info.ModTime().UTC(),
|
||||||
|
SizeBytes: info.Size(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) artifactPath(node string) string {
|
||||||
|
return filepath.Join(a.settings.ArtifactDir, fmt.Sprintf("%s.img", node))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) loadSnapshots() error {
|
||||||
|
data, err := os.ReadFile(a.settings.SnapshotsPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var snapshots map[string]SnapshotRecord
|
||||||
|
if err := json.Unmarshal(data, &snapshots); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
a.mu.Lock()
|
||||||
|
a.snapshots = snapshots
|
||||||
|
a.mu.Unlock()
|
||||||
|
for _, snap := range snapshots {
|
||||||
|
a.metrics.RecordSnapshot(snap.Node, "ok", snap.CollectedAt)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) persistSnapshots() error {
|
||||||
|
a.mu.RLock()
|
||||||
|
data, err := json.MarshalIndent(a.snapshots, "", " ")
|
||||||
|
a.mu.RUnlock()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(a.settings.SnapshotsPath), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return os.WriteFile(a.settings.SnapshotsPath, data, 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) loadTargets() error {
|
||||||
|
data, err := os.ReadFile(a.settings.TargetsPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var targets map[string]facts.Targets
|
||||||
|
if err := json.Unmarshal(data, &targets); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
a.mu.Lock()
|
||||||
|
a.targets = targets
|
||||||
|
a.mu.Unlock()
|
||||||
|
a.metrics.SetDriftTargets(targets, 0)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) persistTargets() error {
|
||||||
|
a.mu.RLock()
|
||||||
|
data, err := json.MarshalIndent(a.targets, "", " ")
|
||||||
|
a.mu.RUnlock()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(a.settings.TargetsPath), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return os.WriteFile(a.settings.TargetsPath, data, 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func diffTargets(prev, next map[string]facts.Targets) []string {
|
||||||
|
classes := map[string]struct{}{}
|
||||||
|
for class := range prev {
|
||||||
|
classes[class] = struct{}{}
|
||||||
|
}
|
||||||
|
for class := range next {
|
||||||
|
classes[class] = struct{}{}
|
||||||
|
}
|
||||||
|
out := make([]string, 0)
|
||||||
|
for class := range classes {
|
||||||
|
if !targetsEqual(prev[class], next[class]) {
|
||||||
|
out = append(out, class)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func targetsEqual(a, b facts.Targets) bool {
|
||||||
|
if a.Kernel != b.Kernel || a.OSImage != b.OSImage || a.Containerd != b.Containerd || a.K3sVersion != b.K3sVersion {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(a.Packages) != len(b.Packages) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for key, value := range a.Packages {
|
||||||
|
if b.Packages[key] != value {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func humanBytes(value int64) string {
|
||||||
|
const unit = 1024
|
||||||
|
if value < unit {
|
||||||
|
return fmt.Sprintf("%d B", value)
|
||||||
|
}
|
||||||
|
div, exp := int64(unit), 0
|
||||||
|
for n := value / unit; n >= unit; n /= unit {
|
||||||
|
div *= unit
|
||||||
|
exp++
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%.1f %ciB", float64(value)/float64(div), "KMGTPE"[exp])
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstLine(value string) string {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if idx := strings.IndexByte(value, '\n'); idx >= 0 {
|
||||||
|
return strings.TrimSpace(value[:idx])
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func deleteNodeObject(node string) error {
|
||||||
|
if err := deleteNodeObjectInCluster(node); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cmd := exec.Command("kubectl", "delete", "node", node, "--ignore-not-found")
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
return fmt.Errorf("delete node: %w: %s", err, strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func deleteNodeObjectInCluster(node string) error {
|
||||||
|
host := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_HOST"))
|
||||||
|
port := strings.TrimSpace(os.Getenv("KUBERNETES_SERVICE_PORT"))
|
||||||
|
if host == "" || port == "" {
|
||||||
|
return errors.New("not running in cluster")
|
||||||
|
}
|
||||||
|
token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
caPEM, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pool := x509.NewCertPool()
|
||||||
|
if !pool.AppendCertsFromPEM(caPEM) {
|
||||||
|
return errors.New("append kubernetes CA")
|
||||||
|
}
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: 15 * time.Second,
|
||||||
|
Transport: &http.Transport{
|
||||||
|
TLSClientConfig: &tls.Config{RootCAs: pool},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("https://%s:%s/api/v1/nodes/%s", host, port, node), nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token)))
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusAccepted {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return fmt.Errorf("delete node %s failed: %s: %s", node, resp.Status, strings.TrimSpace(string(body)))
|
||||||
|
}
|
||||||
188
pkg/service/metrics.go
Normal file
188
pkg/service/metrics.go
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/facts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Metrics captures the small Prometheus surface exported by Metis.
|
||||||
|
type Metrics struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
|
||||||
|
builds map[string]int
|
||||||
|
flashes map[string]int
|
||||||
|
snapshots map[string]int
|
||||||
|
lastSnapshotUnix map[string]float64
|
||||||
|
watches map[string]int
|
||||||
|
lastWatchSuccess float64
|
||||||
|
classDriftCounts map[string]int
|
||||||
|
lastWatchChangeSize float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMetrics builds a zero-value metrics registry.
|
||||||
|
func NewMetrics() *Metrics {
|
||||||
|
return &Metrics{
|
||||||
|
builds: map[string]int{},
|
||||||
|
flashes: map[string]int{},
|
||||||
|
snapshots: map[string]int{},
|
||||||
|
lastSnapshotUnix: map[string]float64{},
|
||||||
|
watches: map[string]int{},
|
||||||
|
classDriftCounts: map[string]int{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Metrics) RecordBuild(node, status string) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
m.builds[counterKey(node, status)]++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Metrics) RecordFlash(node, host, status string) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
m.flashes[counterKey(node, host, status)]++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Metrics) RecordSnapshot(node, status string, ts time.Time) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
m.snapshots[counterKey(node, status)]++
|
||||||
|
if !ts.IsZero() {
|
||||||
|
m.lastSnapshotUnix[node] = float64(ts.Unix())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Metrics) RecordWatch(status string) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
m.watches[counterKey(status)]++
|
||||||
|
if status == "ok" {
|
||||||
|
m.lastWatchSuccess = float64(time.Now().UTC().Unix())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Metrics) SetDriftTargets(targets map[string]facts.Targets, changed int) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
m.classDriftCounts = map[string]int{}
|
||||||
|
for class, target := range targets {
|
||||||
|
count := 0
|
||||||
|
if strings.TrimSpace(target.Kernel) != "" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(target.OSImage) != "" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(target.Containerd) != "" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(target.K3sVersion) != "" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
count += len(target.Packages)
|
||||||
|
m.classDriftCounts[class] = count
|
||||||
|
}
|
||||||
|
m.lastWatchChangeSize = float64(changed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Render writes a Prometheus text exposition response.
|
||||||
|
func (m *Metrics) Render(w io.Writer) {
|
||||||
|
m.mu.RLock()
|
||||||
|
defer m.mu.RUnlock()
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_builds_total Replacement image builds by node and status")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_builds_total counter")
|
||||||
|
for _, key := range sortedKeys(m.builds) {
|
||||||
|
parts := splitKey(key, 2)
|
||||||
|
node, status := parts[0], parts[1]
|
||||||
|
fmt.Fprintf(w, "metis_builds_total{node=%q,status=%q} %d\n", node, status, m.builds[key])
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_flashes_total Replacement flashes by node, host, and status")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_flashes_total counter")
|
||||||
|
for _, key := range sortedKeys(m.flashes) {
|
||||||
|
parts := splitKey(key, 3)
|
||||||
|
node, host, status := parts[0], parts[1], parts[2]
|
||||||
|
fmt.Fprintf(w, "metis_flashes_total{node=%q,host=%q,status=%q} %d\n", node, host, status, m.flashes[key])
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_sentinel_snapshots_total Sentinel snapshots accepted by node and status")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_sentinel_snapshots_total counter")
|
||||||
|
for _, key := range sortedKeys(m.snapshots) {
|
||||||
|
parts := splitKey(key, 2)
|
||||||
|
node, status := parts[0], parts[1]
|
||||||
|
fmt.Fprintf(w, "metis_sentinel_snapshots_total{node=%q,status=%q} %d\n", node, status, m.snapshots[key])
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_sentinel_snapshot_timestamp_seconds Last accepted sentinel snapshot timestamp by node")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_sentinel_snapshot_timestamp_seconds gauge")
|
||||||
|
for _, node := range sortedFloatKeys(m.lastSnapshotUnix) {
|
||||||
|
fmt.Fprintf(w, "metis_sentinel_snapshot_timestamp_seconds{node=%q} %.0f\n", node, m.lastSnapshotUnix[node])
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_sentinel_watch_total Sentinel watch runs by status")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_sentinel_watch_total counter")
|
||||||
|
for _, key := range sortedKeys(m.watches) {
|
||||||
|
status := splitKey(key, 1)[0]
|
||||||
|
fmt.Fprintf(w, "metis_sentinel_watch_total{status=%q} %d\n", status, m.watches[key])
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_sentinel_watch_last_success_timestamp_seconds Last successful sentinel watch timestamp")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_sentinel_watch_last_success_timestamp_seconds gauge")
|
||||||
|
fmt.Fprintf(w, "metis_sentinel_watch_last_success_timestamp_seconds %.0f\n", m.lastWatchSuccess)
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_sentinel_watch_changed_classes Number of class target sets changed by the last watch")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_sentinel_watch_changed_classes gauge")
|
||||||
|
fmt.Fprintf(w, "metis_sentinel_watch_changed_classes %.0f\n", m.lastWatchChangeSize)
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "# HELP metis_class_target_fields Count of populated target fields per class")
|
||||||
|
fmt.Fprintln(w, "# TYPE metis_class_target_fields gauge")
|
||||||
|
for _, class := range sortedFloatKeysInt(m.classDriftCounts) {
|
||||||
|
fmt.Fprintf(w, "metis_class_target_fields{class=%q} %d\n", class, m.classDriftCounts[class])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func counterKey(parts ...string) string {
|
||||||
|
return strings.Join(parts, "\x00")
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitKey(key string, want int) []string {
|
||||||
|
parts := strings.Split(key, "\x00")
|
||||||
|
for len(parts) < want {
|
||||||
|
parts = append(parts, "")
|
||||||
|
}
|
||||||
|
return parts
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortedKeys[T any](m map[string]T) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for key := range m {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortedFloatKeys(m map[string]float64) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for key := range m {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortedFloatKeysInt(m map[string]int) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for key := range m {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
628
pkg/service/server.go
Normal file
628
pkg/service/server.go
Normal file
@ -0,0 +1,628 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"html/template"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type userContext struct {
|
||||||
|
Name string
|
||||||
|
Groups []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type pageData struct {
|
||||||
|
State PageState
|
||||||
|
AllowedGroups []string
|
||||||
|
DefaultMessage string
|
||||||
|
BootJSON template.JS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handler returns the Metis HTTP handler.
|
||||||
|
func (a *App) Handler() http.Handler {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.HandleFunc("/healthz", a.handleHealth)
|
||||||
|
mux.HandleFunc("/metrics", a.handleMetrics)
|
||||||
|
mux.HandleFunc("/internal/sentinel/snapshot", a.handleInternalSnapshot)
|
||||||
|
mux.HandleFunc("/internal/sentinel/watch", a.handleInternalWatch)
|
||||||
|
mux.HandleFunc("/api/state", a.withUIAuth(a.handleState))
|
||||||
|
mux.HandleFunc("/api/devices", a.withUIAuth(a.handleDevices))
|
||||||
|
mux.HandleFunc("/api/jobs/build", a.withUIAuth(a.handleBuild))
|
||||||
|
mux.HandleFunc("/api/jobs/replace", a.withUIAuth(a.handleReplace))
|
||||||
|
mux.HandleFunc("/api/sentinel/watch", a.withUIAuth(a.handleWatch))
|
||||||
|
mux.HandleFunc("/", a.withUIAuth(a.handleIndex))
|
||||||
|
return mux
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleHealth(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
writeJSON(w, http.StatusOK, map[string]any{"status": "ok", "service": "metis"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleMetrics(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||||
|
a.metrics.Render(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleInternalSnapshot(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var record SnapshotRecord
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&record); err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := a.StoreSnapshot(record); err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, map[string]any{"status": "ok"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleInternalWatch(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
event, err := a.WatchSentinel()
|
||||||
|
if err != nil {
|
||||||
|
a.metrics.RecordWatch("error")
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, event)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleState(w http.ResponseWriter, r *http.Request) {
|
||||||
|
host := r.URL.Query().Get("host")
|
||||||
|
writeJSON(w, http.StatusOK, a.State(host))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleDevices(w http.ResponseWriter, r *http.Request) {
|
||||||
|
host := r.URL.Query().Get("host")
|
||||||
|
devices, err := a.ListDevices(host)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, map[string]any{"devices": devices})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleBuild(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
node := requestValue(r, "node")
|
||||||
|
job, err := a.Build(node)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusAccepted, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleReplace(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
node := requestValue(r, "node")
|
||||||
|
host := requestValue(r, "host")
|
||||||
|
device := requestValue(r, "device")
|
||||||
|
job, err := a.Replace(node, host, device)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusAccepted, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleWatch(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
event, err := a.WatchSentinel()
|
||||||
|
if err != nil {
|
||||||
|
a.metrics.RecordWatch("error")
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, event)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) handleIndex(w http.ResponseWriter, r *http.Request) {
|
||||||
|
state := a.State(a.settings.DefaultFlashHost)
|
||||||
|
payload, _ := json.Marshal(state)
|
||||||
|
data := pageData{
|
||||||
|
State: state,
|
||||||
|
AllowedGroups: append([]string{}, a.settings.AllowedGroups...),
|
||||||
|
BootJSON: template.JS(payload),
|
||||||
|
}
|
||||||
|
_ = metisPage.Execute(w, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) withUIAuth(next http.HandlerFunc) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
user, ok := a.authorize(r)
|
||||||
|
if !ok {
|
||||||
|
http.Error(w, "forbidden", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user.Name != "" {
|
||||||
|
w.Header().Set("X-Metis-User", user.Name)
|
||||||
|
}
|
||||||
|
next(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) authorize(r *http.Request) (userContext, bool) {
|
||||||
|
user := strings.TrimSpace(r.Header.Get("X-Auth-Request-User"))
|
||||||
|
if user == "" {
|
||||||
|
user = strings.TrimSpace(r.Header.Get("X-Forwarded-User"))
|
||||||
|
}
|
||||||
|
if user == "" {
|
||||||
|
return userContext{}, false
|
||||||
|
}
|
||||||
|
groups := splitHeaderList(r.Header.Get("X-Auth-Request-Groups"))
|
||||||
|
for _, allowedUser := range a.settings.AllowedUsers {
|
||||||
|
if allowedUser == user {
|
||||||
|
return userContext{Name: user, Groups: groups}, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, group := range groups {
|
||||||
|
for _, allowed := range a.settings.AllowedGroups {
|
||||||
|
if group == allowed {
|
||||||
|
return userContext{Name: user, Groups: groups}, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return userContext{Name: user, Groups: groups}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitHeaderList(raw string) []string {
|
||||||
|
if strings.TrimSpace(raw) == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
parts := strings.Split(raw, ",")
|
||||||
|
out := make([]string, 0, len(parts))
|
||||||
|
for _, part := range parts {
|
||||||
|
part = strings.TrimSpace(part)
|
||||||
|
if part != "" {
|
||||||
|
out = append(out, part)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func requestValue(r *http.Request, key string) string {
|
||||||
|
if err := r.ParseForm(); err == nil {
|
||||||
|
if value := strings.TrimSpace(r.Form.Get(key)); value != "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var payload map[string]any
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&payload); err == nil {
|
||||||
|
if value, ok := payload[key].(string); ok {
|
||||||
|
return strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeJSON(w http.ResponseWriter, status int, payload any) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
var metisPage = template.Must(template.New("metis").Parse(`<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
<title>Metis Control</title>
|
||||||
|
<style>
|
||||||
|
:root{
|
||||||
|
--ink:#111318;
|
||||||
|
--muted:#616778;
|
||||||
|
--line:rgba(17,19,24,.12);
|
||||||
|
--paper:rgba(255,255,255,.84);
|
||||||
|
--paper-strong:#ffffff;
|
||||||
|
--brand:#1d5f8c;
|
||||||
|
--brand-deep:#153b59;
|
||||||
|
--accent:#d47b37;
|
||||||
|
--success:#1b8f5a;
|
||||||
|
--danger:#a63c35;
|
||||||
|
--shadow:0 20px 60px rgba(17,19,24,.12);
|
||||||
|
}
|
||||||
|
*{box-sizing:border-box}
|
||||||
|
body{
|
||||||
|
margin:0;
|
||||||
|
min-height:100vh;
|
||||||
|
font-family:"Avenir Next","Trebuchet MS","Segoe UI",sans-serif;
|
||||||
|
color:var(--ink);
|
||||||
|
background:
|
||||||
|
radial-gradient(circle at top left, rgba(212,123,55,.18), transparent 30rem),
|
||||||
|
radial-gradient(circle at top right, rgba(29,95,140,.18), transparent 32rem),
|
||||||
|
linear-gradient(180deg, #f8f4ee 0%, #eef2f5 48%, #e4edf2 100%);
|
||||||
|
}
|
||||||
|
.frame{
|
||||||
|
max-width:1280px;
|
||||||
|
margin:0 auto;
|
||||||
|
padding:2rem 1.25rem 3rem;
|
||||||
|
}
|
||||||
|
.mast{
|
||||||
|
display:flex;
|
||||||
|
justify-content:space-between;
|
||||||
|
align-items:flex-end;
|
||||||
|
gap:1.5rem;
|
||||||
|
margin-bottom:1.5rem;
|
||||||
|
}
|
||||||
|
.eyebrow{
|
||||||
|
letter-spacing:.14em;
|
||||||
|
text-transform:uppercase;
|
||||||
|
font-size:.72rem;
|
||||||
|
color:var(--brand-deep);
|
||||||
|
margin-bottom:.35rem;
|
||||||
|
font-weight:700;
|
||||||
|
}
|
||||||
|
h1{
|
||||||
|
margin:0;
|
||||||
|
font-size:clamp(2rem,4vw,3.4rem);
|
||||||
|
line-height:1;
|
||||||
|
}
|
||||||
|
.sub{
|
||||||
|
max-width:54rem;
|
||||||
|
color:var(--muted);
|
||||||
|
margin-top:.7rem;
|
||||||
|
font-size:1rem;
|
||||||
|
}
|
||||||
|
.badge{
|
||||||
|
display:inline-flex;
|
||||||
|
align-items:center;
|
||||||
|
gap:.45rem;
|
||||||
|
padding:.7rem .95rem;
|
||||||
|
background:rgba(255,255,255,.72);
|
||||||
|
border:1px solid rgba(21,59,89,.12);
|
||||||
|
border-radius:999px;
|
||||||
|
box-shadow:var(--shadow);
|
||||||
|
font-size:.9rem;
|
||||||
|
}
|
||||||
|
.grid{
|
||||||
|
display:grid;
|
||||||
|
grid-template-columns:1.2fr .9fr;
|
||||||
|
gap:1rem;
|
||||||
|
}
|
||||||
|
.stack{
|
||||||
|
display:grid;
|
||||||
|
gap:1rem;
|
||||||
|
}
|
||||||
|
.card{
|
||||||
|
background:var(--paper);
|
||||||
|
backdrop-filter:blur(14px);
|
||||||
|
border:1px solid var(--line);
|
||||||
|
border-radius:1.25rem;
|
||||||
|
padding:1.1rem;
|
||||||
|
box-shadow:var(--shadow);
|
||||||
|
}
|
||||||
|
.card h2{
|
||||||
|
margin:0 0 .35rem;
|
||||||
|
font-size:1rem;
|
||||||
|
text-transform:uppercase;
|
||||||
|
letter-spacing:.1em;
|
||||||
|
color:var(--brand-deep);
|
||||||
|
}
|
||||||
|
.hint{
|
||||||
|
color:var(--muted);
|
||||||
|
font-size:.92rem;
|
||||||
|
margin-bottom:1rem;
|
||||||
|
}
|
||||||
|
.form-grid{
|
||||||
|
display:grid;
|
||||||
|
grid-template-columns:repeat(2,minmax(0,1fr));
|
||||||
|
gap:.85rem;
|
||||||
|
}
|
||||||
|
label{
|
||||||
|
display:grid;
|
||||||
|
gap:.35rem;
|
||||||
|
font-weight:600;
|
||||||
|
font-size:.92rem;
|
||||||
|
}
|
||||||
|
select, button{
|
||||||
|
width:100%;
|
||||||
|
border-radius:.85rem;
|
||||||
|
border:1px solid rgba(17,19,24,.14);
|
||||||
|
padding:.85rem .95rem;
|
||||||
|
font:inherit;
|
||||||
|
}
|
||||||
|
button{
|
||||||
|
cursor:pointer;
|
||||||
|
background:linear-gradient(135deg,var(--brand) 0%,var(--brand-deep) 100%);
|
||||||
|
color:#fff;
|
||||||
|
border:none;
|
||||||
|
font-weight:700;
|
||||||
|
letter-spacing:.03em;
|
||||||
|
box-shadow:0 14px 30px rgba(21,59,89,.18);
|
||||||
|
}
|
||||||
|
button.secondary{
|
||||||
|
background:#fff;
|
||||||
|
color:var(--ink);
|
||||||
|
border:1px solid rgba(17,19,24,.14);
|
||||||
|
box-shadow:none;
|
||||||
|
}
|
||||||
|
.actions{
|
||||||
|
display:grid;
|
||||||
|
grid-template-columns:repeat(3,minmax(0,1fr));
|
||||||
|
gap:.7rem;
|
||||||
|
margin-top:.9rem;
|
||||||
|
}
|
||||||
|
.list{
|
||||||
|
display:grid;
|
||||||
|
gap:.7rem;
|
||||||
|
max-height:30rem;
|
||||||
|
overflow:auto;
|
||||||
|
}
|
||||||
|
.item{
|
||||||
|
border:1px solid rgba(17,19,24,.1);
|
||||||
|
border-radius:1rem;
|
||||||
|
padding:.85rem .95rem;
|
||||||
|
background:rgba(255,255,255,.8);
|
||||||
|
}
|
||||||
|
.item-head{
|
||||||
|
display:flex;
|
||||||
|
justify-content:space-between;
|
||||||
|
gap:1rem;
|
||||||
|
margin-bottom:.35rem;
|
||||||
|
font-weight:700;
|
||||||
|
}
|
||||||
|
.meta{
|
||||||
|
color:var(--muted);
|
||||||
|
font-size:.85rem;
|
||||||
|
}
|
||||||
|
.bar{
|
||||||
|
height:.55rem;
|
||||||
|
background:rgba(17,19,24,.08);
|
||||||
|
border-radius:999px;
|
||||||
|
overflow:hidden;
|
||||||
|
margin-top:.7rem;
|
||||||
|
}
|
||||||
|
.bar > span{
|
||||||
|
display:block;
|
||||||
|
height:100%;
|
||||||
|
background:linear-gradient(90deg,var(--accent),var(--brand));
|
||||||
|
}
|
||||||
|
.pill{
|
||||||
|
display:inline-block;
|
||||||
|
padding:.2rem .55rem;
|
||||||
|
border-radius:999px;
|
||||||
|
font-size:.75rem;
|
||||||
|
text-transform:uppercase;
|
||||||
|
letter-spacing:.08em;
|
||||||
|
background:rgba(21,59,89,.08);
|
||||||
|
color:var(--brand-deep);
|
||||||
|
}
|
||||||
|
.pill.done{background:rgba(27,143,90,.12);color:var(--success)}
|
||||||
|
.pill.error{background:rgba(166,60,53,.12);color:var(--danger)}
|
||||||
|
.pill.running{background:rgba(212,123,55,.12);color:#9a5a20}
|
||||||
|
.mini{
|
||||||
|
display:grid;
|
||||||
|
grid-template-columns:repeat(2,minmax(0,1fr));
|
||||||
|
gap:.7rem;
|
||||||
|
}
|
||||||
|
.stat{
|
||||||
|
padding:.8rem .9rem;
|
||||||
|
border-radius:1rem;
|
||||||
|
background:rgba(255,255,255,.72);
|
||||||
|
border:1px solid rgba(17,19,24,.08);
|
||||||
|
}
|
||||||
|
.stat strong{display:block;font-size:1.35rem}
|
||||||
|
code{
|
||||||
|
font-family:"IBM Plex Mono","SFMono-Regular","Menlo",monospace;
|
||||||
|
font-size:.88em;
|
||||||
|
}
|
||||||
|
@media (max-width: 980px){
|
||||||
|
.grid,.form-grid,.actions,.mini{grid-template-columns:1fr}
|
||||||
|
.mast{align-items:flex-start;flex-direction:column}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<main class="frame">
|
||||||
|
<section class="mast">
|
||||||
|
<div>
|
||||||
|
<div class="eyebrow">Atlas Recovery Plane</div>
|
||||||
|
<h1>Metis Control</h1>
|
||||||
|
<p class="sub">Build replacement node images, verify removable media on the Texas flash host, and keep image templates fresh with sentinel-driven drift tracking.</p>
|
||||||
|
</div>
|
||||||
|
<div class="badge"><strong>Default flash host:</strong> <span id="default-host">{{.State.DefaultFlashHost}}</span></div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="grid">
|
||||||
|
<div class="stack">
|
||||||
|
<article class="card">
|
||||||
|
<h2>Replacement Run</h2>
|
||||||
|
<p class="hint">This UI is meant for the one-shot recovery path: build the node image, verify the card on the flash host, then write it and hand off only the physical swap.</p>
|
||||||
|
<div class="form-grid">
|
||||||
|
<label>Target node
|
||||||
|
<select id="node-select"></select>
|
||||||
|
</label>
|
||||||
|
<label>Flash host
|
||||||
|
<select id="host-select"></select>
|
||||||
|
</label>
|
||||||
|
<label style="grid-column:1 / -1">Detected removable media
|
||||||
|
<select id="device-select"></select>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="actions">
|
||||||
|
<button class="secondary" id="refresh-devices">Refresh media</button>
|
||||||
|
<button class="secondary" id="build-only">Build image only</button>
|
||||||
|
<button id="replace-run">Build and flash</button>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
|
||||||
|
<article class="card">
|
||||||
|
<h2>Live Jobs</h2>
|
||||||
|
<p class="hint">Progress updates stream from the running Metis operation. The replacement flow automatically tries to clear the stale Kubernetes node object before the card write.</p>
|
||||||
|
<div id="jobs" class="list"></div>
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="stack">
|
||||||
|
<article class="card">
|
||||||
|
<h2>Sentinel Watch</h2>
|
||||||
|
<p class="hint">Ariadne should hit the internal sentinel watch route on a schedule. You can also run it manually here when you want the latest template recommendations immediately.</p>
|
||||||
|
<div class="mini">
|
||||||
|
<div class="stat">
|
||||||
|
<span class="meta">Tracked nodes</span>
|
||||||
|
<strong id="snapshot-count">0</strong>
|
||||||
|
</div>
|
||||||
|
<div class="stat">
|
||||||
|
<span class="meta">Class targets</span>
|
||||||
|
<strong id="target-count">0</strong>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="actions" style="grid-template-columns:1fr">
|
||||||
|
<button id="sentinel-watch">Run sentinel watch now</button>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
|
||||||
|
<article class="card">
|
||||||
|
<h2>Recent Changes</h2>
|
||||||
|
<p class="hint">This stream keeps the image/template story digestible: builds, flashes, snapshot intake, and sentinel-driven target changes all land here.</p>
|
||||||
|
<div id="events" class="list"></div>
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
<script id="boot" type="application/json">{{.BootJSON}}</script>
|
||||||
|
<script>
|
||||||
|
const boot = JSON.parse(document.getElementById('boot').textContent);
|
||||||
|
let state = boot;
|
||||||
|
const nodeSelect = document.getElementById('node-select');
|
||||||
|
const hostSelect = document.getElementById('host-select');
|
||||||
|
const deviceSelect = document.getElementById('device-select');
|
||||||
|
const jobsEl = document.getElementById('jobs');
|
||||||
|
const eventsEl = document.getElementById('events');
|
||||||
|
const snapshotCountEl = document.getElementById('snapshot-count');
|
||||||
|
const targetCountEl = document.getElementById('target-count');
|
||||||
|
|
||||||
|
function fmtTime(value){
|
||||||
|
if(!value){ return 'pending'; }
|
||||||
|
const date = new Date(value);
|
||||||
|
return isNaN(date.getTime()) ? value : date.toLocaleString();
|
||||||
|
}
|
||||||
|
function fmtBytes(value){
|
||||||
|
if(!value){ return '0 B'; }
|
||||||
|
const units = ['B','KiB','MiB','GiB','TiB'];
|
||||||
|
let size = Number(value);
|
||||||
|
let idx = 0;
|
||||||
|
while(size >= 1024 && idx < units.length - 1){
|
||||||
|
size /= 1024;
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
return size.toFixed(size >= 10 || idx === 0 ? 0 : 1) + ' ' + units[idx];
|
||||||
|
}
|
||||||
|
function setOptions(select, values, labeler){
|
||||||
|
const current = select.value;
|
||||||
|
select.innerHTML = '';
|
||||||
|
values.forEach((value)=>{
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = value;
|
||||||
|
option.textContent = labeler ? labeler(value) : value;
|
||||||
|
select.appendChild(option);
|
||||||
|
});
|
||||||
|
if(current && values.includes(current)){ select.value = current; }
|
||||||
|
}
|
||||||
|
function render(){
|
||||||
|
setOptions(nodeSelect, state.nodes.map((n)=>n.name));
|
||||||
|
setOptions(hostSelect, state.flash_hosts);
|
||||||
|
if(!hostSelect.value){ hostSelect.value = state.default_flash_host; }
|
||||||
|
setOptions(deviceSelect, state.devices.map((d)=>d.path), (path)=>{
|
||||||
|
const dev = state.devices.find((item)=>item.path === path);
|
||||||
|
if(!dev){ return path; }
|
||||||
|
return dev.path + ' · ' + fmtBytes(dev.size_bytes) + ' · ' + (dev.model || dev.transport || 'removable media');
|
||||||
|
});
|
||||||
|
|
||||||
|
jobsEl.innerHTML = '';
|
||||||
|
const jobs = state.jobs.length ? state.jobs : [{kind:'idle',status:'done',message:'No active or recent Metis jobs yet.',progress_pct:100,started_at:new Date().toISOString(),finished_at:new Date().toISOString()}];
|
||||||
|
jobs.forEach((job)=>{
|
||||||
|
const wrap = document.createElement('div');
|
||||||
|
wrap.className = 'item';
|
||||||
|
const statusClass = job.status === 'error' ? 'error' : (job.status === 'done' ? 'done' : (job.status === 'running' ? 'running' : ''));
|
||||||
|
const title = job.kind.toUpperCase() + (job.node ? ' · ' + job.node : '');
|
||||||
|
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
|
||||||
|
const progress = job.written_bytes ? (fmtBytes(job.written_bytes) + ' / ' + fmtBytes(job.total_bytes)) : '';
|
||||||
|
const detail = progress + (job.artifact ? ' · ' + job.artifact : '') + (job.error ? ' · ' + job.error : '');
|
||||||
|
wrap.innerHTML =
|
||||||
|
'<div class="item-head">' +
|
||||||
|
'<span>' + title + '</span>' +
|
||||||
|
'<span class="pill ' + statusClass + '">' + job.status + '</span>' +
|
||||||
|
'</div>' +
|
||||||
|
'<div>' + (job.message || job.stage || 'queued') + '</div>' +
|
||||||
|
'<div class="meta">' + started + '</div>' +
|
||||||
|
'<div class="meta">' + detail + '</div>' +
|
||||||
|
'<div class="bar"><span style="width:' + Math.max(0, Math.min(100, job.progress_pct || 0)) + '%"></span></div>';
|
||||||
|
jobsEl.appendChild(wrap);
|
||||||
|
});
|
||||||
|
|
||||||
|
eventsEl.innerHTML = '';
|
||||||
|
state.events.forEach((event)=>{
|
||||||
|
const wrap = document.createElement('div');
|
||||||
|
wrap.className = 'item';
|
||||||
|
wrap.innerHTML =
|
||||||
|
'<div class="item-head">' +
|
||||||
|
'<span>' + event.summary + '</span>' +
|
||||||
|
'<span class="meta">' + fmtTime(event.time) + '</span>' +
|
||||||
|
'</div>' +
|
||||||
|
'<div class="meta"><code>' + event.kind + '</code></div>';
|
||||||
|
eventsEl.appendChild(wrap);
|
||||||
|
});
|
||||||
|
snapshotCountEl.textContent = state.snapshots.length;
|
||||||
|
targetCountEl.textContent = Object.keys(state.targets || {}).length;
|
||||||
|
}
|
||||||
|
async function refreshState(){
|
||||||
|
const host = hostSelect.value || state.default_flash_host;
|
||||||
|
const resp = await fetch('/api/state?host=' + encodeURIComponent(host));
|
||||||
|
if(resp.ok){
|
||||||
|
state = await resp.json();
|
||||||
|
render();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function post(path, body){
|
||||||
|
const resp = await fetch(path, {
|
||||||
|
method:'POST',
|
||||||
|
headers:{'Content-Type':'application/json'},
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
});
|
||||||
|
if(!resp.ok){
|
||||||
|
const text = await resp.text();
|
||||||
|
throw new Error(text || ('Request failed for ' + path));
|
||||||
|
}
|
||||||
|
return resp.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('refresh-devices').addEventListener('click', async ()=>{
|
||||||
|
await refreshState();
|
||||||
|
});
|
||||||
|
document.getElementById('build-only').addEventListener('click', async ()=>{
|
||||||
|
await post('/api/jobs/build', {node: nodeSelect.value});
|
||||||
|
await refreshState();
|
||||||
|
});
|
||||||
|
document.getElementById('replace-run').addEventListener('click', async ()=>{
|
||||||
|
await post('/api/jobs/replace', {node: nodeSelect.value, host: hostSelect.value, device: deviceSelect.value});
|
||||||
|
await refreshState();
|
||||||
|
});
|
||||||
|
document.getElementById('sentinel-watch').addEventListener('click', async ()=>{
|
||||||
|
await post('/api/sentinel/watch', {});
|
||||||
|
await refreshState();
|
||||||
|
});
|
||||||
|
hostSelect.addEventListener('change', refreshState);
|
||||||
|
render();
|
||||||
|
setInterval(refreshState, 5000);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>`))
|
||||||
146
pkg/service/server_test.go
Normal file
146
pkg/service/server_test.go
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"metis/pkg/sentinel"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUIAuthGuardsState(t *testing.T) {
|
||||||
|
app := newTestApp(t)
|
||||||
|
handler := app.Handler()
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/api/state", nil)
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(resp, req)
|
||||||
|
if resp.Code != http.StatusForbidden {
|
||||||
|
t.Fatalf("expected forbidden, got %d", resp.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
req = httptest.NewRequest(http.MethodGet, "/api/state", nil)
|
||||||
|
req.Header.Set("X-Auth-Request-User", "brad")
|
||||||
|
req.Header.Set("X-Auth-Request-Groups", "admin")
|
||||||
|
resp = httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(resp, req)
|
||||||
|
if resp.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected ok, got %d: %s", resp.Code, resp.Body.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInternalSnapshotAndWatch(t *testing.T) {
|
||||||
|
app := newTestApp(t)
|
||||||
|
handler := app.Handler()
|
||||||
|
|
||||||
|
payload := `{"node":"titan-15","collected_at":"2026-03-31T12:00:00Z","snapshot":{"hostname":"titan-15","kernel":"6.6.63","os_image":"Armbian","k3s_version":"v1.31.5+k3s1","containerd":"2.0.0","package_sample":{"containerd":"2.0.0"}}}`
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/internal/sentinel/snapshot", strings.NewReader(payload))
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(resp, req)
|
||||||
|
if resp.Code != http.StatusOK {
|
||||||
|
t.Fatalf("snapshot failed: %d %s", resp.Code, resp.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
req = httptest.NewRequest(http.MethodPost, "/internal/sentinel/watch", nil)
|
||||||
|
resp = httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(resp, req)
|
||||||
|
if resp.Code != http.StatusOK {
|
||||||
|
t.Fatalf("watch failed: %d %s", resp.Code, resp.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var event Event
|
||||||
|
if err := json.Unmarshal(resp.Body.Bytes(), &event); err != nil {
|
||||||
|
t.Fatalf("decode watch response: %v", err)
|
||||||
|
}
|
||||||
|
if event.Kind != "sentinel.watch" {
|
||||||
|
t.Fatalf("unexpected event kind: %s", event.Kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsReq := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
metricsResp := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(metricsResp, metricsReq)
|
||||||
|
body := metricsResp.Body.String()
|
||||||
|
if !strings.Contains(body, `metis_sentinel_snapshots_total{node="titan-15",status="ok"} 1`) {
|
||||||
|
t.Fatalf("missing snapshot metric: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `metis_sentinel_watch_total{status="ok"} 1`) {
|
||||||
|
t.Fatalf("missing watch metric: %s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestApp(t *testing.T) *App {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
baseImage := filepath.Join(dir, "base.img")
|
||||||
|
if err := os.WriteFile(baseImage, []byte("test-image"), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
sum := sha256.Sum256([]byte("test-image"))
|
||||||
|
inventoryPath := filepath.Join(dir, "inventory.yaml")
|
||||||
|
inv := `
|
||||||
|
classes:
|
||||||
|
- name: rpi4
|
||||||
|
arch: arm64
|
||||||
|
os: armbian
|
||||||
|
image: file://` + baseImage + `
|
||||||
|
checksum: sha256:` + hex.EncodeToString(sum[:]) + `
|
||||||
|
k3s_version: v1.31.5+k3s1
|
||||||
|
nodes:
|
||||||
|
- name: titan-15
|
||||||
|
class: rpi4
|
||||||
|
hostname: titan-15
|
||||||
|
ip: 192.168.22.43
|
||||||
|
k3s_role: agent
|
||||||
|
k3s_url: https://192.168.22.7:6443
|
||||||
|
k3s_token: token
|
||||||
|
ssh_user: atlas
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(inventoryPath, []byte(inv), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
settings := Settings{
|
||||||
|
BindAddr: ":0",
|
||||||
|
InventoryPath: inventoryPath,
|
||||||
|
CacheDir: filepath.Join(dir, "cache"),
|
||||||
|
ArtifactDir: filepath.Join(dir, "artifacts"),
|
||||||
|
HistoryPath: filepath.Join(dir, "history.jsonl"),
|
||||||
|
SnapshotsPath: filepath.Join(dir, "snapshots.json"),
|
||||||
|
TargetsPath: filepath.Join(dir, "targets.json"),
|
||||||
|
DefaultFlashHost: "titan-22",
|
||||||
|
FlashHosts: []string{"titan-22"},
|
||||||
|
LocalHost: "titan-22",
|
||||||
|
AllowedGroups: []string{"admin", "maintainer"},
|
||||||
|
MaxDeviceBytes: 300000000000,
|
||||||
|
}
|
||||||
|
app, err := NewApp(settings)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("new app: %v", err)
|
||||||
|
}
|
||||||
|
if err := app.StoreSnapshot(SnapshotRecord{
|
||||||
|
Node: "titan-17",
|
||||||
|
CollectedAt: time.Now().UTC().Add(-10 * time.Minute),
|
||||||
|
Snapshot: sentinelSnapshot("titan-17", "6.6.63"),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed snapshot: %v", err)
|
||||||
|
}
|
||||||
|
return app
|
||||||
|
}
|
||||||
|
|
||||||
|
func sentinelSnapshot(hostname, kernel string) sentinel.Snapshot {
|
||||||
|
return sentinel.Snapshot{
|
||||||
|
Hostname: hostname,
|
||||||
|
Kernel: kernel,
|
||||||
|
OSImage: "Armbian",
|
||||||
|
K3sVersion: "v1.31.5+k3s1",
|
||||||
|
Containerd: "2.0.0",
|
||||||
|
PackageSample: map[string]string{"containerd": "2.0.0"},
|
||||||
|
}
|
||||||
|
}
|
||||||
91
pkg/service/settings.go
Normal file
91
pkg/service/settings.go
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Settings configures the Metis service runtime.
|
||||||
|
type Settings struct {
|
||||||
|
BindAddr string
|
||||||
|
InventoryPath string
|
||||||
|
CacheDir string
|
||||||
|
ArtifactDir string
|
||||||
|
HistoryPath string
|
||||||
|
SnapshotsPath string
|
||||||
|
TargetsPath string
|
||||||
|
DefaultFlashHost string
|
||||||
|
FlashHosts []string
|
||||||
|
LocalHost string
|
||||||
|
AllowedUsers []string
|
||||||
|
AllowedGroups []string
|
||||||
|
MaxDeviceBytes int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// FromEnv builds service settings with sensible defaults for local dev and in-cluster use.
|
||||||
|
func FromEnv() Settings {
|
||||||
|
dataDir := getenvDefault("METIS_DATA_DIR", "/var/lib/metis")
|
||||||
|
localHost := getenvDefault("METIS_LOCAL_HOST", hostnameOr("unknown"))
|
||||||
|
defaultFlashHost := getenvDefault("METIS_DEFAULT_FLASH_HOST", localHost)
|
||||||
|
flashHosts := splitList(getenvDefault("METIS_FLASH_HOSTS", defaultFlashHost))
|
||||||
|
return Settings{
|
||||||
|
BindAddr: getenvDefault("METIS_BIND_ADDR", ":8080"),
|
||||||
|
InventoryPath: getenvDefault("METIS_INVENTORY_PATH", "inventory.titan-rpi4.yaml"),
|
||||||
|
CacheDir: getenvDefault("METIS_CACHE_DIR", filepath.Join(dataDir, "cache")),
|
||||||
|
ArtifactDir: getenvDefault("METIS_ARTIFACT_DIR", filepath.Join(dataDir, "artifacts")),
|
||||||
|
HistoryPath: getenvDefault("METIS_HISTORY_PATH", filepath.Join(dataDir, "history.jsonl")),
|
||||||
|
SnapshotsPath: getenvDefault("METIS_SNAPSHOTS_PATH", filepath.Join(dataDir, "snapshots.json")),
|
||||||
|
TargetsPath: getenvDefault("METIS_TARGETS_PATH", filepath.Join(dataDir, "targets.json")),
|
||||||
|
DefaultFlashHost: defaultFlashHost,
|
||||||
|
FlashHosts: flashHosts,
|
||||||
|
LocalHost: localHost,
|
||||||
|
AllowedUsers: splitList(getenvDefault("METIS_ALLOWED_USERS", "")),
|
||||||
|
AllowedGroups: splitList(getenvDefault("METIS_ALLOWED_GROUPS", "admin,maintainer")),
|
||||||
|
MaxDeviceBytes: getenvInt64("METIS_MAX_DEVICE_BYTES", 300000000000),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getenvDefault(key, fallback string) string {
|
||||||
|
if value := strings.TrimSpace(os.Getenv(key)); value != "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
|
||||||
|
func getenvInt64(key string, fallback int64) int64 {
|
||||||
|
raw := strings.TrimSpace(os.Getenv(key))
|
||||||
|
if raw == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
value, err := strconv.ParseInt(raw, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitList(raw string) []string {
|
||||||
|
if strings.TrimSpace(raw) == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
parts := strings.Split(raw, ",")
|
||||||
|
result := make([]string, 0, len(parts))
|
||||||
|
for _, part := range parts {
|
||||||
|
part = strings.TrimSpace(part)
|
||||||
|
if part == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result = append(result, part)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func hostnameOr(fallback string) string {
|
||||||
|
name, err := os.Hostname()
|
||||||
|
if err != nil || strings.TrimSpace(name) == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(name)
|
||||||
|
}
|
||||||
83
pkg/writer/writer.go
Normal file
83
pkg/writer/writer.go
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
package writer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProgressFunc receives write progress updates.
|
||||||
|
type ProgressFunc func(written int64, total int64)
|
||||||
|
|
||||||
|
// WriteImage writes src into dest using a direct buffered copy so callers can
|
||||||
|
// share the same codepath for files and block devices.
|
||||||
|
func WriteImage(ctx context.Context, src, dest string) error {
|
||||||
|
return WriteImageWithProgress(ctx, src, dest, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteImageWithProgress writes src into dest while invoking progress after each chunk.
|
||||||
|
func WriteImageWithProgress(ctx context.Context, src, dest string, progress ProgressFunc) error {
|
||||||
|
if dest == "" {
|
||||||
|
return fmt.Errorf("destination required")
|
||||||
|
}
|
||||||
|
srcInfo, err := os.Stat(src)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("source missing: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return copyFile(ctx, src, dest, srcInfo.Size(), progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDevicePath(path string) bool {
|
||||||
|
return strings.HasPrefix(filepath.Clean(path), "/dev/")
|
||||||
|
}
|
||||||
|
|
||||||
|
func copyFile(ctx context.Context, src, dest string, total int64, progress ProgressFunc) error {
|
||||||
|
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
in, err := os.Open(src)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer in.Close()
|
||||||
|
out, err := os.Create(dest)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
buf := make([]byte, 4*1024*1024)
|
||||||
|
var written int64
|
||||||
|
for {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
nr, readErr := in.Read(buf)
|
||||||
|
if nr > 0 {
|
||||||
|
nw, writeErr := out.Write(buf[:nr])
|
||||||
|
if writeErr != nil {
|
||||||
|
return writeErr
|
||||||
|
}
|
||||||
|
if nw != nr {
|
||||||
|
return io.ErrShortWrite
|
||||||
|
}
|
||||||
|
written += int64(nw)
|
||||||
|
if progress != nil {
|
||||||
|
progress(written, total)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if readErr != nil {
|
||||||
|
if readErr == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return readErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := out.Sync(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
28
pkg/writer/writer_test.go
Normal file
28
pkg/writer/writer_test.go
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package writer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestWriteImageCopiesFile(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
src := filepath.Join(dir, "src.img")
|
||||||
|
dest := filepath.Join(dir, "dest.img")
|
||||||
|
content := []byte("metis-test")
|
||||||
|
if err := os.WriteFile(src, content, 0o644); err != nil {
|
||||||
|
t.Fatalf("write src: %v", err)
|
||||||
|
}
|
||||||
|
if err := WriteImage(context.Background(), src, dest); err != nil {
|
||||||
|
t.Fatalf("write image: %v", err)
|
||||||
|
}
|
||||||
|
got, err := os.ReadFile(dest)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read dest: %v", err)
|
||||||
|
}
|
||||||
|
if string(got) != string(content) {
|
||||||
|
t.Fatalf("expected %q got %q", string(content), string(got))
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
scripts/__pycache__/publish_test_metrics.cpython-314.pyc
Normal file
BIN
scripts/__pycache__/publish_test_metrics.cpython-314.pyc
Normal file
Binary file not shown.
73
scripts/prepare_titan_rpi4_replacement.sh
Executable file
73
scripts/prepare_titan_rpi4_replacement.sh
Executable file
@ -0,0 +1,73 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<'EOF'
|
||||||
|
Usage: prepare_titan_rpi4_replacement.sh <node> [remote-host]
|
||||||
|
|
||||||
|
Build a node-specific recovery image for a Titan rpi4 Longhorn worker and
|
||||||
|
optionally copy it to a remote flashing station such as `tethys`.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
./scripts/prepare_titan_rpi4_replacement.sh titan-13
|
||||||
|
./scripts/prepare_titan_rpi4_replacement.sh titan-19 tethys
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "${1:-}" = "" ] || [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
node="$1"
|
||||||
|
remote_host="${2:-}"
|
||||||
|
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cache_dir="${METIS_CACHE_DIR:-${HOME}/.cache/metis}"
|
||||||
|
remote_dir="${METIS_REMOTE_DIR:-/tmp/metis-images}"
|
||||||
|
|
||||||
|
case "${node}" in
|
||||||
|
titan-13|titan-19)
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Refusing unknown replacement target: ${node}" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
cd "${repo_root}"
|
||||||
|
|
||||||
|
if [ -z "${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN:-}" ]; then
|
||||||
|
export METIS_IMAGE_RPI4_ARMBIAN_LONGHORN="file://${HOME}/Downloads/Armbian_25.8.1_Rpi4b_noble_current_6.12.41.img"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${METIS_K3S_TOKEN:-}" ]; then
|
||||||
|
export METIS_K3S_TOKEN="$(ssh titan-0a 'sudo cat /var/lib/rancher/k3s/server/node-token')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Deleting stale Kubernetes node object for ${node}..."
|
||||||
|
kubectl delete node "${node}" --ignore-not-found
|
||||||
|
|
||||||
|
echo "Building recovery image for ${node}..."
|
||||||
|
go run ./cmd/metis image \
|
||||||
|
--inventory inventory.titan-rpi4.yaml \
|
||||||
|
--node "${node}" \
|
||||||
|
--cache "${cache_dir}" \
|
||||||
|
--output "artifacts/${node}.img"
|
||||||
|
|
||||||
|
sha256sum "artifacts/${node}.img"
|
||||||
|
|
||||||
|
if [ -n "${remote_host}" ]; then
|
||||||
|
echo "Copying artifacts/${node}.img to ${remote_host}:${remote_dir}/ ..."
|
||||||
|
ssh "${remote_host}" "mkdir -p '${remote_dir}'"
|
||||||
|
scp "artifacts/${node}.img" "${remote_host}:${remote_dir}/${node}.img"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
Prepared artifacts/${node}.img
|
||||||
|
|
||||||
|
Next steps:
|
||||||
|
1. Ask for the SD card to be inserted into the flashing station.
|
||||||
|
2. Run ./scripts/remote_sd_candidates.sh ${remote_host:-tethys}
|
||||||
|
3. Run ./scripts/remote_flash_titan_image.sh ${remote_host:-tethys} ${node} /dev/sdX
|
||||||
|
EOF
|
||||||
115
scripts/publish_test_metrics.py
Normal file
115
scripts/publish_test_metrics.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_label(value: str) -> str:
|
||||||
|
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||||
|
|
||||||
|
|
||||||
|
def _label_str(labels: dict[str, str]) -> str:
|
||||||
|
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
|
||||||
|
return "{" + ",".join(parts) + "}" if parts else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _load_coverage(path: str) -> float:
|
||||||
|
with open(path, "r", encoding="utf-8") as handle:
|
||||||
|
payload = json.load(handle)
|
||||||
|
summary = payload.get("summary") or {}
|
||||||
|
percent = summary.get("percent_covered")
|
||||||
|
if isinstance(percent, (int, float)):
|
||||||
|
return float(percent)
|
||||||
|
raise RuntimeError("coverage summary missing percent_covered")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_junit(path: str) -> dict[str, int]:
|
||||||
|
tree = ET.parse(path)
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
def _as_int(node, name: str) -> int:
|
||||||
|
raw = node.attrib.get(name) or "0"
|
||||||
|
try:
|
||||||
|
return int(float(raw))
|
||||||
|
except ValueError:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
suites = []
|
||||||
|
if root.tag == "testsuite":
|
||||||
|
suites = [root]
|
||||||
|
elif root.tag == "testsuites":
|
||||||
|
suites = list(root.findall("testsuite"))
|
||||||
|
|
||||||
|
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||||
|
for suite in suites:
|
||||||
|
totals["tests"] += _as_int(suite, "tests")
|
||||||
|
totals["failures"] += _as_int(suite, "failures")
|
||||||
|
totals["errors"] += _as_int(suite, "errors")
|
||||||
|
totals["skipped"] += _as_int(suite, "skipped")
|
||||||
|
return totals
|
||||||
|
|
||||||
|
|
||||||
|
def _post_metrics(url: str, payload: str) -> None:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=payload.encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={"Content-Type": "text/plain"},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
if resp.status >= 400:
|
||||||
|
raise RuntimeError(f"metrics push failed status={resp.status}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
vm_url = os.getenv("VM_IMPORT_URL", "").strip()
|
||||||
|
if not vm_url:
|
||||||
|
print("VM_IMPORT_URL not set; skipping metrics push")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json")
|
||||||
|
junit_path = os.getenv("JUNIT_XML", "build/junit.xml")
|
||||||
|
|
||||||
|
if not os.path.exists(coverage_path):
|
||||||
|
raise RuntimeError(f"missing coverage file {coverage_path}")
|
||||||
|
if not os.path.exists(junit_path):
|
||||||
|
raise RuntimeError(f"missing junit file {junit_path}")
|
||||||
|
|
||||||
|
coverage = _load_coverage(coverage_path)
|
||||||
|
totals = _load_junit(junit_path)
|
||||||
|
passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
|
||||||
|
|
||||||
|
labels = {
|
||||||
|
"job": os.getenv("CI_JOB_NAME", "metis"),
|
||||||
|
"branch": os.getenv("BRANCH_NAME", ""),
|
||||||
|
"build_number": os.getenv("BUILD_NUMBER", ""),
|
||||||
|
"commit": os.getenv("GIT_COMMIT", ""),
|
||||||
|
"repo": os.getenv("REPO_NAME", "metis"),
|
||||||
|
}
|
||||||
|
|
||||||
|
prefix = os.getenv("METRICS_PREFIX", "ariadne_ci")
|
||||||
|
lines = [
|
||||||
|
f"{prefix}_coverage_percent{_label_str(labels)} {coverage:.3f}",
|
||||||
|
f"{prefix}_tests_total{_label_str({**labels, 'result': 'passed'})} {passed}",
|
||||||
|
f"{prefix}_tests_total{_label_str({**labels, 'result': 'failed'})} {totals['failures']}",
|
||||||
|
f"{prefix}_tests_total{_label_str({**labels, 'result': 'error'})} {totals['errors']}",
|
||||||
|
f"{prefix}_tests_total{_label_str({**labels, 'result': 'skipped'})} {totals['skipped']}",
|
||||||
|
f"{prefix}_build_info{_label_str(labels)} 1",
|
||||||
|
]
|
||||||
|
|
||||||
|
payload = "\n".join(lines) + "\n"
|
||||||
|
_post_metrics(vm_url, payload)
|
||||||
|
print("metrics push complete")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
sys.exit(main())
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"metrics push failed: {exc}")
|
||||||
|
sys.exit(1)
|
||||||
86
scripts/remote_flash_titan_image.sh
Executable file
86
scripts/remote_flash_titan_image.sh
Executable file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<'EOF'
|
||||||
|
Usage: remote_flash_titan_image.sh <remote-host> <node> <device>
|
||||||
|
|
||||||
|
Copy a prepared Titan replacement image to a remote flashing station and write it
|
||||||
|
to the specified removable block device.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
./scripts/remote_flash_titan_image.sh tethys titan-13 /dev/sdd
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "${3:-}" = "" ] || [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
remote_host="$1"
|
||||||
|
node="$2"
|
||||||
|
device="$3"
|
||||||
|
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
remote_dir="${METIS_REMOTE_DIR:-/tmp/metis-images}"
|
||||||
|
image_path="${repo_root}/artifacts/${node}.img"
|
||||||
|
max_bytes="${METIS_SD_MAX_BYTES:-300000000000}"
|
||||||
|
|
||||||
|
if [ ! -f "${image_path}" ]; then
|
||||||
|
echo "Missing local image: ${image_path}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "${device}" in
|
||||||
|
/dev/sd*|/dev/mmcblk*|/dev/nvme*n1)
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Refusing suspicious device path: ${device}" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
device_info="$(ssh "${remote_host}" "lsblk -b -dn -o NAME,TRAN,RM,HOTPLUG,SIZE '${device}' 2>/dev/null" || true)"
|
||||||
|
if [ -z "${device_info}" ]; then
|
||||||
|
echo "Could not inspect remote device ${device} on ${remote_host}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
read -r remote_name remote_tran remote_rm remote_hotplug remote_size <<<"${device_info}"
|
||||||
|
if [ "/dev/${remote_name}" != "${device}" ]; then
|
||||||
|
echo "Remote device mismatch: expected ${device}, got /dev/${remote_name}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ "${remote_size}" -gt "${max_bytes}" ]; then
|
||||||
|
echo "Refusing to flash ${device}: size ${remote_size} is larger than ${max_bytes} bytes" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ "${remote_tran}" != "usb" ] && [ "${remote_rm}" != "1" ] && [ "${remote_hotplug}" != "1" ]; then
|
||||||
|
echo "Refusing to flash ${device}: not detected as removable/hotplug media (${device_info})" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Copying ${image_path} to ${remote_host}:${remote_dir}/${node}.img ..."
|
||||||
|
ssh "${remote_host}" "mkdir -p '${remote_dir}'"
|
||||||
|
scp "${image_path}" "${remote_host}:${remote_dir}/${node}.img"
|
||||||
|
|
||||||
|
local_sha="$(sha256sum "${image_path}" | awk '{print $1}')"
|
||||||
|
remote_sha="$(ssh "${remote_host}" "sha256sum '${remote_dir}/${node}.img' | awk '{print \\$1}'")"
|
||||||
|
if [ "${local_sha}" != "${remote_sha}" ]; then
|
||||||
|
echo "Checksum mismatch after copy: local=${local_sha} remote=${remote_sha}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "About to flash ${node}.img to ${device} on ${remote_host}."
|
||||||
|
echo "You will be prompted for the remote sudo password."
|
||||||
|
ssh -t "${remote_host}" "lsblk '${device}' && sudo dd if='${remote_dir}/${node}.img' of='${device}' bs=4M conv=fsync status=progress && sync && sudo blockdev --flushbufs '${device}'"
|
||||||
|
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
Flash complete for ${node} on ${remote_host}:${device}
|
||||||
|
|
||||||
|
Next steps:
|
||||||
|
1. Tell your helper to remove the flashed card and swap it into ${node}.
|
||||||
|
2. Tell them to restore power to the Pi.
|
||||||
|
3. Watch the node with: kubectl get nodes -w
|
||||||
|
EOF
|
||||||
12
scripts/remote_sd_candidates.sh
Executable file
12
scripts/remote_sd_candidates.sh
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
remote_host="${1:-tethys}"
|
||||||
|
max_bytes="${METIS_SD_MAX_BYTES:-300000000000}"
|
||||||
|
|
||||||
|
ssh "${remote_host}" "lsblk -S -b -dn -o NAME,TRAN,RM,HOTPLUG,SIZE,MODEL,SERIAL | while read -r name tran rm hotplug size model serial; do
|
||||||
|
if [ \"\${tran}\" = usb ] && [ \"\${hotplug}\" = 1 ] && [ \"\${size}\" -le ${max_bytes} ]; then
|
||||||
|
human=\$(numfmt --to=iec --suffix=B \"\${size}\" 2>/dev/null || printf '%sB' \"\${size}\")
|
||||||
|
printf '/dev/%s\t%s\t%s\t%s\n' \"\${name}\" \"\${human}\" \"\${model}\" \"\${serial}\"
|
||||||
|
fi
|
||||||
|
done"
|
||||||
Loading…
x
Reference in New Issue
Block a user