longhorn: reconcile astreae and asteria disk tags

This commit is contained in:
Brad Stein 2026-03-31 13:54:58 -03:00
parent 64ba6c32cd
commit e8a580ee57
3 changed files with 140 additions and 0 deletions

View File

@ -8,11 +8,15 @@ resources:
- vault-sync-deployment.yaml
- helmrelease.yaml
- longhorn-settings-ensure-job.yaml
- longhorn-disk-tags-ensure-job.yaml
configMapGenerator:
- name: longhorn-settings-ensure-script
files:
- longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
- name: longhorn-disk-tags-ensure-script
files:
- longhorn_disk_tags_ensure.py=scripts/longhorn_disk_tags_ensure.py
generatorOptions:
disableNameSuffixHash: true

View File

@ -0,0 +1,36 @@
# infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-disk-tags-ensure-1
namespace: longhorn-system
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
volumes:
- name: longhorn-disk-tags-ensure-script
configMap:
name: longhorn-disk-tags-ensure-script
defaultMode: 0555
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
- key: node-role.kubernetes.io/worker
operator: Exists
containers:
- name: apply
image: python:3.12.9-alpine3.20
command: ["python", "/scripts/longhorn_disk_tags_ensure.py"]
volumeMounts:
- name: longhorn-disk-tags-ensure-script
mountPath: /scripts
readOnly: true

View File

@ -0,0 +1,100 @@
#!/usr/bin/env python3
"""Reconcile Longhorn disk tags for the Titan longhorn storage classes.
The astreae/asteria storageclasses select Longhorn disks by tag. The current
nodes already have the right disk paths, but the tag fields can drift to empty
after node recovery. This job patches the live Longhorn Node CRs back to the
expected tags so PVC provisioning keeps working.
"""
from __future__ import annotations
import json
import os
import ssl
import urllib.request
LONGHORN_NS = "longhorn-system"
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
DESIRED_TAGS = {
"/mnt/astreae": "astreae",
"/mnt/asteria": "asteria",
}
def api_base() -> str:
host = os.environ.get("KUBERNETES_SERVICE_HOST")
port = os.environ.get("KUBERNETES_SERVICE_PORT", "443")
if not host:
raise SystemExit("missing KUBERNETES_SERVICE_HOST")
return f"https://{host}:{port}"
def token() -> str:
path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
with open(path, "r", encoding="utf-8") as fh:
return fh.read().strip()
def ca_context() -> ssl.SSLContext:
cafile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
return ssl.create_default_context(cafile=cafile)
def request_json(method: str, path: str, body: dict | None = None) -> dict:
req = urllib.request.Request(
f"{api_base()}{path}",
method=method,
headers={
"Authorization": f"Bearer {token()}",
"Content-Type": "application/merge-patch+json",
"Accept": "application/json",
},
data=None if body is None else json.dumps(body).encode("utf-8"),
)
with urllib.request.urlopen(req, context=ca_context(), timeout=20) as resp:
payload = resp.read()
return json.loads(payload) if payload else {}
def list_nodes() -> list[dict]:
data = request_json("GET", LONGHORN_API.format(namespace=LONGHORN_NS))
return data.get("items", [])
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
body=body,
)
def main() -> int:
changed = 0
skipped = 0
for node in list_nodes():
name = node.get("metadata", {}).get("name", "")
spec_disks = node.get("spec", {}).get("disks", {}) or {}
for disk_name, disk in spec_disks.items():
disk_path = disk.get("path")
desired_tag = DESIRED_TAGS.get(disk_path)
if not desired_tag:
continue
current_tags = disk.get("tags") or []
if current_tags == [desired_tag]:
skipped += 1
continue
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
patch_disk_tags(name, disk_name, desired_tag)
changed += 1
print(f"done: changed={changed} skipped={skipped}")
return 0
if __name__ == "__main__":
raise SystemExit(main())