From ae85dcfeaa6c44277c22be3962fceb0ed3c8ac01 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 7 Oct 2025 23:26:27 -0500 Subject: [PATCH] monitoring add, jellyfin/pegasus update, and traefik tweaks --- .../flux-system/kustomization-monitoring.yaml | 14 ++ .../flux-system/kustomization-traefik.yaml | 18 ++ infrastructure/flux-system/kustomization.yaml | 2 + infrastructure/sources/helm/grafana.yaml | 1 + infrastructure/sources/helm/hashicorp.yaml | 1 + infrastructure/sources/helm/jetstack.yaml | 1 + infrastructure/sources/helm/prometheus.yaml | 1 + .../sources/helm/victoria-metrics.yaml | 9 + infrastructure/traefik/deployment.yaml | 9 + infrastructure/traefik/kustomization.yaml | 1 + infrastructure/traefik/service.yaml | 20 ++ scripts/longhorn_volume_usage.fish | 2 + scripts/styx_kioskification.sh | 218 ++++++++++++++++++ scripts/styx_prep.sh | 195 ++++++++++++++++ services/jellyfin/deployment.yaml | 2 +- services/jellyfin/pvc.yaml | 14 ++ services/monitoring/helmrelease.yaml | 206 +++++++++++++++++ services/monitoring/kustomization.yaml | 8 + services/monitoring/namespace.yaml | 4 + services/monitoring/rbac.yaml | 33 +++ services/pegasus/deployment.yaml | 2 +- 21 files changed, 759 insertions(+), 2 deletions(-) create mode 100644 infrastructure/flux-system/kustomization-monitoring.yaml create mode 100644 infrastructure/flux-system/kustomization-traefik.yaml create mode 100644 infrastructure/sources/helm/victoria-metrics.yaml create mode 100644 infrastructure/traefik/service.yaml mode change 100644 => 100755 scripts/longhorn_volume_usage.fish create mode 100644 scripts/styx_kioskification.sh create mode 100755 scripts/styx_prep.sh create mode 100644 services/monitoring/helmrelease.yaml create mode 100644 services/monitoring/kustomization.yaml create mode 100644 services/monitoring/namespace.yaml create mode 100644 services/monitoring/rbac.yaml diff --git a/infrastructure/flux-system/kustomization-monitoring.yaml b/infrastructure/flux-system/kustomization-monitoring.yaml new file mode 100644 index 0000000..ffe7dcd --- /dev/null +++ b/infrastructure/flux-system/kustomization-monitoring.yaml @@ -0,0 +1,14 @@ +# infrastructure/flux-system/kustomization-monitoring.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: monitoring + namespace: flux-system +spec: + interval: 10m + path: ./services/monitoring + prune: true + sourceRef: + kind: GitRepository + name: flux-system + wait: true diff --git a/infrastructure/flux-system/kustomization-traefik.yaml b/infrastructure/flux-system/kustomization-traefik.yaml new file mode 100644 index 0000000..db0239f --- /dev/null +++ b/infrastructure/flux-system/kustomization-traefik.yaml @@ -0,0 +1,18 @@ +# infrastructure/flux-system/kustomization-traefik.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: traefik + namespace: flux-system +spec: + interval: 10m + path: ./infrastructure/traefik + targetNamespace: traefik + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + dependsOn: + - name: core + wait: true diff --git a/infrastructure/flux-system/kustomization.yaml b/infrastructure/flux-system/kustomization.yaml index e6e5dd1..3707daf 100644 --- a/infrastructure/flux-system/kustomization.yaml +++ b/infrastructure/flux-system/kustomization.yaml @@ -11,8 +11,10 @@ resources: - kustomization-vault.yaml - kustomization-jitsi.yaml - kustomization-crypto.yaml +- kustomization-traefik.yaml - kustomization-monerod.yaml - kustomization-pegasus.yaml - kustomization-jellyfin.yaml - kustomization-xmr-miner.yaml +- kustomization-monitoring.yaml - kustomization-longhorn-ui.yaml diff --git a/infrastructure/sources/helm/grafana.yaml b/infrastructure/sources/helm/grafana.yaml index 1235012..7102fac 100644 --- a/infrastructure/sources/helm/grafana.yaml +++ b/infrastructure/sources/helm/grafana.yaml @@ -1,3 +1,4 @@ +# infrastructure/sources/helm/grafana.yaml apiVersion: source.toolkit.fluxcd.io/v1 kind: HelmRepository metadata: diff --git a/infrastructure/sources/helm/hashicorp.yaml b/infrastructure/sources/helm/hashicorp.yaml index edf563e..925c2c9 100644 --- a/infrastructure/sources/helm/hashicorp.yaml +++ b/infrastructure/sources/helm/hashicorp.yaml @@ -1,3 +1,4 @@ +# infrastructure/sources/helm/hashicorp.yaml apiVersion: source.toolkit.fluxcd.io/v1 kind: HelmRepository metadata: diff --git a/infrastructure/sources/helm/jetstack.yaml b/infrastructure/sources/helm/jetstack.yaml index e842402..f571b29 100644 --- a/infrastructure/sources/helm/jetstack.yaml +++ b/infrastructure/sources/helm/jetstack.yaml @@ -1,3 +1,4 @@ +# infrastructure/sources/helm/jetstack.yaml apiVersion: source.toolkit.fluxcd.io/v1 kind: HelmRepository metadata: diff --git a/infrastructure/sources/helm/prometheus.yaml b/infrastructure/sources/helm/prometheus.yaml index 33109dc..39f700c 100644 --- a/infrastructure/sources/helm/prometheus.yaml +++ b/infrastructure/sources/helm/prometheus.yaml @@ -1,3 +1,4 @@ +# infrastructure/sources/helm/prometheus.yaml apiVersion: source.toolkit.fluxcd.io/v1 kind: HelmRepository metadata: diff --git a/infrastructure/sources/helm/victoria-metrics.yaml b/infrastructure/sources/helm/victoria-metrics.yaml new file mode 100644 index 0000000..cf2f3ee --- /dev/null +++ b/infrastructure/sources/helm/victoria-metrics.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/victoria-metrics.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: victoria-metrics + namespace: flux-system +spec: + interval: 1h + url: https://victoriametrics.github.io/helm-charts/ diff --git a/infrastructure/traefik/deployment.yaml b/infrastructure/traefik/deployment.yaml index 35a1922..ba16909 100644 --- a/infrastructure/traefik/deployment.yaml +++ b/infrastructure/traefik/deployment.yaml @@ -35,6 +35,12 @@ items: - --entrypoints.web.address=:80 - --entrypoints.websecure.address=:443 - --api.dashboard=true + - --metrics.prometheus=true + - --metrics.prometheus.addEntryPointsLabels=true + - --metrics.prometheus.addRoutersLabels=true + - --metrics.prometheus.addServicesLabels=true + - --entrypoints.metrics.address=:9100 + - --metrics.prometheus.entryPoint=metrics image: traefik:v3.3.3 imagePullPolicy: IfNotPresent name: traefik @@ -48,6 +54,9 @@ items: - containerPort: 8080 name: admin protocol: TCP + - containerPort: 9100 + name: metrics + protocol: TCP terminationMessagePath: /dev/termination-log terminationMessagePolicy: File dnsPolicy: ClusterFirst diff --git a/infrastructure/traefik/kustomization.yaml b/infrastructure/traefik/kustomization.yaml index ac7c74d..1dce445 100644 --- a/infrastructure/traefik/kustomization.yaml +++ b/infrastructure/traefik/kustomization.yaml @@ -9,3 +9,4 @@ resources: - serviceaccount.yaml - clusterrole.yaml - clusterrolebinding.yaml + - service.yaml diff --git a/infrastructure/traefik/service.yaml b/infrastructure/traefik/service.yaml new file mode 100644 index 0000000..8d0f078 --- /dev/null +++ b/infrastructure/traefik/service.yaml @@ -0,0 +1,20 @@ +# infrastructure/traefik/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: traefik-metrics + namespace: traefik + labels: + app: traefik + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app: traefik + ports: + - name: metrics + port: 9100 + targetPort: metrics diff --git a/scripts/longhorn_volume_usage.fish b/scripts/longhorn_volume_usage.fish old mode 100644 new mode 100755 index 309e692..b2f9f7b --- a/scripts/longhorn_volume_usage.fish +++ b/scripts/longhorn_volume_usage.fish @@ -1,3 +1,5 @@ +#!/usr/bin/env fish + function pvc-usage --description "Show Longhorn PVC usage (human-readable) mapped to namespace/name" begin kubectl -n longhorn-system get volumes.longhorn.io -o json \ diff --git a/scripts/styx_kioskification.sh b/scripts/styx_kioskification.sh new file mode 100644 index 0000000..ac0c2db --- /dev/null +++ b/scripts/styx_kioskification.sh @@ -0,0 +1,218 @@ +#!/usr/bin/env bash +set -euo pipefail + +# 0) Create dedicated user if it doesn't exist +if ! id -u styx >/dev/null 2>&1; then + sudo useradd -m -s /bin/bash styx + echo "Created user 'styx'" +fi + +# 1) App directory +sudo mkdir -p /opt/styx-kiosk/keys +sudo chown -R styx:styx /opt/styx-kiosk + +# 2) Drop the kiosk app (written below) into place +sudo tee /opt/styx-kiosk/kiosk.py >/dev/null <<'PY' +#!/usr/bin/env python3 +import base64, json, os, subprocess, threading, tempfile +from datetime import datetime +import tkinter as tk +from tkinter import ttk, messagebox + +APP_TITLE = "STYX Airgap Signer" +CAMERA_DEV = os.environ.get("ZBAR_DEV", "/dev/video0") +KEY_PATH = os.environ.get("STYX_KEY", "/vault/keys/signer_ed25519.pem") # in the LUKS vault +ALGO = os.environ.get("STYX_ALGO", "ed25519") # or 'secp256r1' +QR_TMP = "/tmp/styx_signed.png" + +def zbar_scan_oneshot(): + # --raw -> data only; --nodisplay -> no preview window; --oneshot -> exit after first code + # (zbarcam supports --oneshot; prints one code and exits). :contentReference[oaicite:2]{index=2} + cmd = ["zbarcam", "--raw", "--nodisplay", "--oneshot", CAMERA_DEV] + try: + out = subprocess.check_output(cmd, text=True, timeout=30) + out = out.strip() + return out if out else None + except Exception as e: + return None + +def openssl_pub_der_b64(key_path): + der = subprocess.check_output(["openssl","pkey","-in",key_path,"-pubout","-outform","DER"]) + return base64.b64encode(der).decode() + +def sign_bytes(msg: bytes, key_path: str, algo: str) -> bytes: + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(msg) + msg_path = f.name + try: + if algo.lower() == "ed25519": + # Ed25519 expects raw message; OpenSSL handles hashing internally. + sig = subprocess.check_output( + ["openssl","pkeyutl","-sign","-inkey",key_path,"-rawin","-in",msg_path] + ) + return sig + elif algo.lower() in ("secp256r1","prime256v1","p256"): + # ECDSA over P-256; hash with SHA-256; OpenSSL returns DER-encoded (r,s) + sig = subprocess.check_output( + ["openssl","dgst","-sha256","-sign",key_path,msg_path] + ) + return sig + else: + raise RuntimeError(f"Unsupported algo: {algo}") + finally: + try: os.unlink(msg_path) + except: pass + +def make_signed_envelope(scanned_text: str, key_path: str, algo: str) -> dict: + # Accept either raw string or JSON with 'tx_bytes' (base64) or 'message' + try: + obj = json.loads(scanned_text) + if "tx_bytes" in obj: + msg = base64.b64decode(obj["tx_bytes"]) + elif "message" in obj: + msg = obj["message"].encode() + else: + # If it's JSON but doesn't carry known fields, sign canonical JSON bytes + msg = json.dumps(obj, sort_keys=True, separators=(",",":")).encode() + request_id = obj.get("request_id") + except Exception: + # Non-JSON → treat the scanned text as the message to sign + msg = scanned_text.encode() + request_id = None + + sig = sign_bytes(msg, key_path, algo) + env = { + "algo": algo.lower(), + "signature_b64": base64.b64encode(sig).decode(), + "pubkey_spki_der_b64": openssl_pub_der_b64(key_path), + "payload_sha256_b64": base64.b64encode(subprocess.check_output(["openssl","dgst","-sha256","-binary"], input=msg)).decode(), + "quote_raw": scanned_text, + "request_id": request_id, + "device": os.uname().nodename, + "ts_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z", + } + return env + +def qrencode_to_file(text: str, path: str): + # Use qrencode CLI to render a PNG we can display. + subprocess.run(["qrencode","-l","M","-s","16","-t","PNG","-o",path], input=text.encode(), check=True) + +class App(tk.Tk): + def __init__(self): + super().__init__() + self.title(APP_TITLE) + self.attributes("-fullscreen", True) + self.configure(background="black") + self.bind("", lambda e: self.quit()) # for maintenance only + + s = ttk.Style(self) + s.configure("Big.TButton", font=("DejaVu Sans", 48), padding=24) + s.configure("Big.TLabel", font=("DejaVu Sans", 32), foreground="white", background="black") + + self.container = tk.Frame(self, bg="black") + self.container.pack(expand=True, fill="both") + + self.status = ttk.Label(self.container, text="Ready", style="Big.TLabel") + self.status.pack(pady=20) + + self.scan_btn = ttk.Button(self.container, text="SCAN", style="Big.TButton", command=self.start_scan) + self.scan_btn.pack(pady=20) + + self.image_label = tk.Label(self.container, bg="black") + self.image_label.pack(pady=10) + + self.new_btn = ttk.Button(self.container, text="NEW SCAN", style="Big.TButton", command=self.reset) + self.new_btn.pack_forget() + + self.note = ttk.Label(self.container, text="", style="Big.TLabel") + self.note.pack(pady=10) + + if not os.path.exists(KEY_PATH): + self.status.config(text=f"Key not found at {KEY_PATH}\nInsert/unlock vault to proceed.") + + def reset(self): + self.image_label.configure(image="") + self.image_label.image = None + self.new_btn.pack_forget() + self.note.config(text="") + self.status.config(text="Ready") + self.scan_btn.config(state="normal") + + def start_scan(self): + if not os.path.exists(KEY_PATH): + messagebox.showerror("Key missing", f"Signing key not found at:\n{KEY_PATH}\nUnlock your vault.") + return + self.status.config(text="Scanning…") + self.scan_btn.config(state="disabled") + threading.Thread(target=self._do_scan_and_sign, daemon=True).start() + + def _do_scan_and_sign(self): + scanned = zbar_scan_oneshot() + if not scanned: + self.after(0, self._scan_failed) + return + try: + envelope = make_signed_envelope(scanned, KEY_PATH, ALGO) + payload = json.dumps(envelope, separators=(",",":")) + qrencode_to_file(payload, QR_TMP) + self.after(0, self._show_qr, envelope) + except Exception as e: + self.after(0, lambda: self._error(str(e))) + + def _scan_failed(self): + self.status.config(text="No QR detected. Try again.") + self.scan_btn.config(state="normal") + + def _show_qr(self, envelope): + # Display the PNG produced by qrencode + try: + img = tk.PhotoImage(file=QR_TMP) + self.image_label.configure(image=img) + self.image_label.image = img + except Exception as e: + self.status.config(text=f"QR render failed: {e}") + self.scan_btn.config(state="normal") + return + self.status.config(text="Signed. Show this QR to your online box.") + self.note.config(text=f"Algo: {envelope['algo']} Host: {envelope['device']}") + self.new_btn.pack(pady=20) + +if __name__ == "__main__": + App().mainloop() +PY +sudo chmod +x /opt/styx-kiosk/kiosk.py +sudo chown -R styx:styx /opt/styx-kiosk + +# 3) Minimal X session: openbox + kiosk; no mouse pointer +sudo -u styx tee /home/styx/.xinitrc >/dev/null <<'XRC' +xset -dpms +xset s off +xset s noblank +# If 'unclutter' is installed, uncomment the next line to hide cursor: +# unclutter -idle 0 -root & +openbox-session & +/opt/styx-kiosk/kiosk.py +XRC +sudo chown styx:styx /home/styx/.xinitrc +sudo chmod 0755 /home/styx/.xinitrc + +# 4) Autologin the 'styx' user on tty1, auto-start X +sudo mkdir -p /etc/systemd/system/getty@tty1.service.d +sudo tee /etc/systemd/system/getty@tty1.service.d/override.conf >/dev/null <<'OVR' +[Service] +ExecStart= +ExecStart=-/sbin/agetty --autologin styx --noclear %I $TERM +Type=idle +OVR + +sudo -u styx tee -a /home/styx/.bash_profile >/dev/null <<'BRC' +# Start X on the first tty automatically, headless +if [ -z "$DISPLAY" ] && [ "$(tty)" = "/dev/tty1" ]; then + exec startx -- -nocursor +fi +BRC + +sudo systemctl daemon-reload +sudo systemctl enable getty@tty1.service + +echo "Done. Reboot to try the kiosk." diff --git a/scripts/styx_prep.sh b/scripts/styx_prep.sh new file mode 100755 index 0000000..0dfcf7f --- /dev/null +++ b/scripts/styx_prep.sh @@ -0,0 +1,195 @@ +#!/usr/bin/env bash +set -euo pipefail + +# === CONFIG === +STYX_USER="styx" +STYX_PASS="TempPass#123" # change at first login +STYX_HOSTNAME="styx" +SSH_PUBKEY="" # e.g., 'ssh-ed25519 AAAA... your@host' (optional) + +# === helpers === +require_root() { + if [[ $EUID -ne 0 ]]; then exec sudo -E "$0" "$@"; fi +} + +ensure_binfmt_arm64() { + # If binfmt for arm64 isn't registered, register it via Docker (idempotent). + if [[ ! -e /proc/sys/fs/binfmt_misc/qemu-aarch64 ]]; then + command -v docker >/dev/null || { echo "Docker required to register binfmt (sudo pacman -S docker)"; exit 1; } + sudo systemctl enable --now docker >/dev/null 2>&1 || true + sudo docker run --rm --privileged tonistiigi/binfmt --install arm64 + fi +} + +find_parts() { + BOOT=$(lsblk -o LABEL,PATH -nr | awk '$1=="system-boot"{print $2}' | head -n1) + ROOT=$(lsblk -o LABEL,PATH -nr | awk '$1=="writable"{print $2}' | head -n1) + if [[ -z "${BOOT:-}" || -z "${ROOT:-}" ]]; then + echo "Could not find 'system-boot'/'writable' on any device." + lsblk -o NAME,SIZE,FSTYPE,LABEL,PATH -nr + exit 1 + fi +} + +mount_parts() { + mkdir -p /mnt/pi-boot /mnt/pi-root + mount "$ROOT" /mnt/pi-root + mount "$BOOT" /mnt/pi-boot + + # Bind only what we need (avoid /run to prevent postinst fights) + for d in dev dev/pts proc sys; do mount --bind "/$d" "/mnt/pi-root/$d"; done + + # Ubuntu images use a resolv.conf symlink—replace with a real file + if [[ -L /mnt/pi-root/etc/resolv.conf || ! -e /mnt/pi-root/etc/resolv.conf ]]; then + rm -f /mnt/pi-root/etc/resolv.conf + cat /etc/resolv.conf > /mnt/pi-root/etc/resolv.conf + fi +} + +prep_chroot() { + # Block service starts inside chroot (no systemd there) + cat >/mnt/pi-root/usr/sbin/policy-rc.d <<'EOF' +#!/bin/sh +exit 101 +EOF + chmod +x /mnt/pi-root/usr/sbin/policy-rc.d + + # All the work happens inside the ARM64 rootfs + CHCMD=$(cat <<'EOS' +set -euo pipefail +export DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC +# Ensure sbin is in PATH so user/group tools work +export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + +apt-get update +apt-get -y full-upgrade + +# Remove snaps and keep them gone (Ubuntu for Pi ships with snaps) +apt-get -y purge snapd || true +rm -rf /snap /var/snap /var/lib/snapd /home/*/snap || true +mkdir -p /etc/apt/preferences.d +printf 'Package: snapd\nPin: release *\nPin-Priority: -10\n' > /etc/apt/preferences.d/nosnap.pref + +# Ensure user/group tools exist +apt-get install -y passwd adduser || true +getent group i2c >/dev/null || /usr/sbin/groupadd i2c + +# Base packages +BASE_PKGS="openssh-server git i2c-tools python3-smbus python3-pil zbar-tools qrencode lm-sensors" +apt-get install -y $BASE_PKGS + +# ------- OLED (Luma) ------- +# Prefer distro package; fall back to pip if not present in this release +if ! dpkg -s python3-luma.oled >/dev/null 2>&1; then + apt-get update + if ! apt-get install -y python3-luma.oled; then + apt-get install -y python3-pip + pip3 install --no-input --break-system-packages luma.oled + fi +fi + +# ------- Camera apps ------- +# Ubuntu renamed libcamera-apps -> rpicam-apps for Raspberry Pi. +# Try in order; tolerate absence (the box might be display-only). +apt-get update +if ! apt-get install -y rpicam-apps; then + apt-get install -y libcamera-apps || apt-get install -y libcamera-tools || true +fi + +# Enable SSH on boot (no systemctl in chroot) +mkdir -p /etc/systemd/system/multi-user.target.wants +ln -sf /lib/systemd/system/ssh.service /etc/systemd/system/multi-user.target.wants/ssh.service + +# Create user and set password +if ! id -u STYX_USER >/dev/null 2>&1; then + /usr/sbin/useradd -m -s /bin/bash -G sudo,video,i2c STYX_USER +fi +echo 'STYX_USER:STYX_PASS' | /usr/sbin/chpasswd + +# Optional: preload SSH key +if [ -n 'SSH_PUBKEY' ] && echo 'SSH_PUBKEY' | grep -q 'ssh-'; then + install -d -m700 /home/STYX_USER/.ssh + echo 'SSH_PUBKEY' >> /home/STYX_USER/.ssh/authorized_keys + chmod 600 /home/STYX_USER/.ssh/authorized_keys + chown -R STYX_USER:STYX_USER /home/STYX_USER/.ssh +fi + +# Freenove code +git clone https://github.com/Freenove/Freenove_Computer_Case_Kit_for_Raspberry_Pi.git /opt/freenove || true + +# Hostname +echo 'STYX_HOSTNAME' > /etc/hostname +if grep -q '^127\.0\.1\.1' /etc/hosts; then + sed -i 's/^127\.0\.1\.1.*/127.0.1.1\tSTYX_HOSTNAME/' /etc/hosts +else + echo -e '127.0.1.1\tSTYX_HOSTNAME' >> /etc/hosts +fi + +apt-get clean +EOS +) + # Inject config values safely + CHCMD="${CHCMD//STYX_USER/${STYX_USER}}" + CHCMD="${CHCMD//STYX_PASS/${STYX_PASS}}" + CHCMD="${CHCMD//STYX_HOSTNAME/${STYX_HOSTNAME}}" + CHCMD="${CHCMD//SSH_PUBKEY/${SSH_PUBKEY}}" + + chroot /mnt/pi-root /bin/bash -lc "$CHCMD" +} + +install_service_host() { + # Systemd unit for the Freenove example app + mkdir -p /mnt/pi-root/etc/systemd/system/multi-user.target.wants + cat >/mnt/pi-root/etc/systemd/system/freenove-case.service <<'SERVICE' +[Unit] +Description=Freenove Case OLED/Fans/LEDs +After=multi-user.target + +[Service] +Type=simple +ExecStart=/usr/bin/python3 /opt/freenove/Code/application.py +Restart=on-failure + +[Install] +WantedBy=multi-user.target +SERVICE + ln -sf /etc/systemd/system/freenove-case.service \ + /mnt/pi-root/etc/systemd/system/multi-user.target.wants/freenove-case.service || true +} + +boot_tweaks() { + # Enable I2C and set DSI panel on the BOOT partition + grep -q 'dtparam=i2c_arm=on' /mnt/pi-boot/config.txt || echo 'dtparam=i2c_arm=on' >> /mnt/pi-boot/config.txt + # Append kernel cmdline only once + if ! grep -q 'DSI-1:800x480@60D' /mnt/pi-boot/cmdline.txt 2>/dev/null; then + sed -i '1 s#$# video=DSI-1:800x480@60D video=HDMI-A-1:off video=HDMI-A-2:off#' /mnt/pi-boot/cmdline.txt || true + fi +} + +cleanup() { + rm -f /mnt/pi-root/usr/sbin/policy-rc.d || true + for d in dev/pts dev proc sys; do umount -lf "/mnt/pi-root/$d" 2>/dev/null || true; done + umount -lf /mnt/pi-boot 2>/dev/null || true + umount -lf /mnt/pi-root 2>/dev/null || true + sync || true +} + +main() { + require_root + ensure_binfmt_arm64 + find_parts + trap 'echo "ERROR at line $LINENO" >&2; cleanup' ERR INT + mount_parts + prep_chroot + install_service_host + boot_tweaks + cleanup + echo "✅ Done. Move the NVMe to the Pi and boot." + echo " Login: user '${STYX_USER}' pass '${STYX_PASS}' (change with 'passwd')." + echo " Quick checks on the Pi:" + echo " sudo i2cdetect -y 1" + echo " rpicam-still -n -o test.jpg # (if rpicam-apps installed)" + echo " libcamera-still -n -o test.jpg # (if legacy libcamera-apps installed)" + echo " systemctl status freenove-case" +} +main "$@" diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index d96f80f..108fbb1 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -75,4 +75,4 @@ spec: claimName: jellyfin-cache-astreae - name: media persistentVolumeClaim: - claimName: jellyfin-media-asteria + claimName: jellyfin-media-asteria-new diff --git a/services/jellyfin/pvc.yaml b/services/jellyfin/pvc.yaml index 18949d5..bce020b 100644 --- a/services/jellyfin/pvc.yaml +++ b/services/jellyfin/pvc.yaml @@ -38,3 +38,17 @@ spec: requests: storage: 4Ti storageClassName: asteria + +--- + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: jellyfin-media-asteria-new + namespace: jellyfin +spec: + accessModes: ["ReadWriteMany"] + resources: + requests: + storage: 4Ti + storageClassName: asteria diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml new file mode 100644 index 0000000..c8d68e1 --- /dev/null +++ b/services/monitoring/helmrelease.yaml @@ -0,0 +1,206 @@ +# services/monitoring/kube-state-metrics-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: kube-state-metrics + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: kube-state-metrics + version: "~6.0.0" + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + values: + prometheusScrape: true # annotates for /metrics auto-scrape. :contentReference[oaicite:16]{index=16} + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" # ksm serves metrics on 8080 by default + prometheus.io/path: "/metrics" + +--- + +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: node-exporter + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: prometheus-node-exporter + version: "~4.0.0" + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + values: + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + +--- + +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: victoria-metrics-single + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: victoria-metrics-single + version: "~0.15.0" # or omit to track appVersion + sourceRef: + kind: HelmRepository + name: victoria-metrics + namespace: flux-system + values: + server: + # keep ~3 months; change as you like (supports "d", "y") + extraArgs: + retentionPeriod: "90d" # VM flag -retentionPeriod=90d. :contentReference[oaicite:11]{index=11} + + persistentVolume: + enabled: true + size: 100Gi # adjust; uses default StorageClass (Longhorn) + # storageClassName: "" # set if you want a specific class + + # Enable built-in Kubernetes scraping + scrape: + enabled: true # chart enables promscrape. :contentReference[oaicite:12]{index=12} + config: + global: + scrape_interval: 15s + + scrape_configs: + # VM self-metrics + - job_name: victoriametrics + static_configs: + - targets: ["localhost:8428"] + + # --- K8s control-plane & nodes (from VM docs guide) --- + - job_name: "kubernetes-apiservers" + kubernetes_sd_configs: [{ role: endpoints }] + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_name,__meta_kubernetes_endpoint_port_name] + regex: default;kubernetes;https + + - job_name: "kubernetes-nodes" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: [{ role: node }] + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$1/proxy/metrics + + - job_name: "kubernetes-nodes-cadvisor" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: [{ role: node }] + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + + # --- Annotated Services (generic autodiscovery) --- + - job_name: "kubernetes-service-endpoints" + kubernetes_sd_configs: [{ role: endpoints }] + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + regex: "true" + - action: replace + source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + regex: (https?) + target_label: __scheme__ + - action: replace + source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + target_label: __metrics_path__ + - action: replace + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + target_label: __address__ + + # --- Annotated Pods (generic autodiscovery) --- + - job_name: "kubernetes-pods" + kubernetes_sd_configs: [{ role: pod }] + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + regex: "true" + - action: replace + source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + target_label: __metrics_path__ + - action: replace + regex: (.+):(?:\d+);(\d+) + replacement: $1:$2 + source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + target_label: __address__ + + # --- kube-state-metrics (via its Service) --- + - job_name: "kube-state-metrics" + kubernetes_sd_configs: [{ role: endpoints }] + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name] + regex: kube-state-metrics + + # --- Longhorn --- + - job_name: "longhorn-backend" + static_configs: + - targets: ["longhorn-backend.longhorn-system.svc:9500"] + metrics_path: /metrics + + # --- cert-manager (pods expose on 9402) --- + - job_name: "cert-manager" + kubernetes_sd_configs: [{ role: pod }] + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_name] + regex: cert-manager;cert-manager + - action: replace + source_labels: [__address__] + regex: "(.+):\\d+" + replacement: "$1:9402" + target_label: __address__ + + # --- Flux controllers (default :8080/metrics) --- + - job_name: "flux" + kubernetes_sd_configs: [{ role: pod }] + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_part_of] + regex: flux-system;flux + diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml new file mode 100644 index 0000000..036afa3 --- /dev/null +++ b/services/monitoring/kustomization.yaml @@ -0,0 +1,8 @@ +# services/monitoring/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: monitoring +resources: + - namespace.yaml + - rbac.yaml + - helmrelease.yaml diff --git a/services/monitoring/namespace.yaml b/services/monitoring/namespace.yaml new file mode 100644 index 0000000..3335b6a --- /dev/null +++ b/services/monitoring/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring \ No newline at end of file diff --git a/services/monitoring/rbac.yaml b/services/monitoring/rbac.yaml new file mode 100644 index 0000000..b84ed61 --- /dev/null +++ b/services/monitoring/rbac.yaml @@ -0,0 +1,33 @@ +# services/monitoring/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vmsingle-scrape +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + verbs: ["get","list","watch"] + - apiGroups: ["networking.k8s.io","extensions"] + resources: ["ingresses"] + verbs: ["get","list","watch"] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vmsingle-scrape +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vmsingle-scrape +subjects: + - kind: ServiceAccount + name: victoria-metrics-single + namespace: monitoring diff --git a/services/pegasus/deployment.yaml b/services/pegasus/deployment.yaml index 558c508..4bd0c48 100644 --- a/services/pegasus/deployment.yaml +++ b/services/pegasus/deployment.yaml @@ -107,7 +107,7 @@ spec: volumes: - name: media persistentVolumeClaim: - claimName: jellyfin-media-asteria + claimName: jellyfin-media-asteria-new - name: config configMap: { name: pegasus-user-map } - name: tmp