gpu(titan-24): prepare Wolf Nvidia driver volume

This commit is contained in:
jenkins 2026-05-22 04:59:52 -03:00
parent 2d8405d299
commit 2ff55289a8

View File

@ -2,6 +2,7 @@
set -euo pipefail set -euo pipefail
marker="/host/var/lib/atlas-maintenance/titan-24-docker.ready" marker="/host/var/lib/atlas-maintenance/titan-24-docker.ready"
nvidia_volume_marker="/host/var/lib/atlas-maintenance/nvidia-driver-vol.version"
host_sh() { host_sh() {
chroot /host /bin/sh -c "$*" chroot /host /bin/sh -c "$*"
} }
@ -42,11 +43,56 @@ JSON
host_systemctl restart docker host_systemctl restart docker
} }
configure_nvidia_modeset() {
if [ ! -d /host/sys/module/nvidia_drm ]; then
return
fi
mkdir -p /host/etc/modprobe.d
if ! grep -qs '^options nvidia-drm modeset=1' /host/etc/modprobe.d/nvidia-drm-modeset.conf 2>/dev/null; then
cat >/host/etc/modprobe.d/nvidia-drm-modeset.conf <<'EOF'
options nvidia-drm modeset=1
EOF
fi
if [ -f /host/etc/default/grub ] && ! grep -q 'nvidia-drm.modeset=1' /host/etc/default/grub; then
host_sh "cp /etc/default/grub /etc/default/grub.atlas-maintenance.bak"
host_sh "sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=\"\\([^\"]*\\)\"/GRUB_CMDLINE_LINUX_DEFAULT=\"\\1 nvidia-drm.modeset=1\"/' /etc/default/grub"
if [ -x /host/usr/sbin/update-grub ]; then
host_sh "update-grub"
elif [ -x /host/usr/sbin/grub-mkconfig ]; then
host_sh "grub-mkconfig -o /boot/grub/grub.cfg"
fi
fi
}
ensure_nvidia_driver_volume() {
if [ ! -r /host/sys/module/nvidia/version ]; then
return
fi
local driver_version
driver_version="$(cat /host/sys/module/nvidia/version)"
if [ -f "${nvidia_volume_marker}" ] && grep -qx "${driver_version}" "${nvidia_volume_marker}" && host_sh "docker volume inspect nvidia-driver-vol >/dev/null 2>&1"; then
return
fi
host_sh "tmp_dir=\$(mktemp -d) && curl -fsSL https://raw.githubusercontent.com/games-on-whales/gow/master/images/nvidia-driver/Dockerfile -o \"\${tmp_dir}/Dockerfile\" && docker build --build-arg NV_VERSION='${driver_version}' -t 'atlas/nvidia-driver:${driver_version}' \"\${tmp_dir}\"; rc=\$?; rm -rf \"\${tmp_dir}\"; exit \${rc}"
host_sh "docker volume create nvidia-driver-vol >/dev/null"
host_sh "docker run --rm -v nvidia-driver-vol:/usr/nvidia 'atlas/nvidia-driver:${driver_version}' true"
mkdir -p "$(dirname "${nvidia_volume_marker}")"
printf '%s\n' "${driver_version}" >"${nvidia_volume_marker}"
}
if [ ! -x /host/usr/bin/docker ] || ! host_sh "docker version >/dev/null 2>&1"; then if [ ! -x /host/usr/bin/docker ] || ! host_sh "docker version >/dev/null 2>&1"; then
install_docker install_docker
fi fi
configure_docker configure_docker
configure_nvidia_modeset
ensure_nvidia_driver_volume
mkdir -p "$(dirname "${marker}")" mkdir -p "$(dirname "${marker}")"
date -Is >"${marker}" date -Is >"${marker}"
@ -54,6 +100,7 @@ date -Is >"${marker}"
while true; do while true; do
if [ ! -S /host/var/run/docker.sock ]; then if [ ! -S /host/var/run/docker.sock ]; then
configure_docker configure_docker
ensure_nvidia_driver_volume
fi fi
sleep 300 sleep 300
done done