longhorn: reconcile astreae and asteria disk tags
This commit is contained in:
parent
64ba6c32cd
commit
e8a580ee57
@ -8,11 +8,15 @@ resources:
|
||||
- vault-sync-deployment.yaml
|
||||
- helmrelease.yaml
|
||||
- longhorn-settings-ensure-job.yaml
|
||||
- longhorn-disk-tags-ensure-job.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: longhorn-settings-ensure-script
|
||||
files:
|
||||
- longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
|
||||
- name: longhorn-disk-tags-ensure-script
|
||||
files:
|
||||
- longhorn_disk_tags_ensure.py=scripts/longhorn_disk_tags_ensure.py
|
||||
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
# infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: longhorn-disk-tags-ensure-1
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
ttlSecondsAfterFinished: 3600
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: longhorn-service-account
|
||||
restartPolicy: Never
|
||||
volumes:
|
||||
- name: longhorn-disk-tags-ensure-script
|
||||
configMap:
|
||||
name: longhorn-disk-tags-ensure-script
|
||||
defaultMode: 0555
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
containers:
|
||||
- name: apply
|
||||
image: python:3.12.9-alpine3.20
|
||||
command: ["python", "/scripts/longhorn_disk_tags_ensure.py"]
|
||||
volumeMounts:
|
||||
- name: longhorn-disk-tags-ensure-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reconcile Longhorn disk tags for the Titan longhorn storage classes.
|
||||
|
||||
The astreae/asteria storageclasses select Longhorn disks by tag. The current
|
||||
nodes already have the right disk paths, but the tag fields can drift to empty
|
||||
after node recovery. This job patches the live Longhorn Node CRs back to the
|
||||
expected tags so PVC provisioning keeps working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import ssl
|
||||
import urllib.request
|
||||
|
||||
|
||||
LONGHORN_NS = "longhorn-system"
|
||||
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
|
||||
DESIRED_TAGS = {
|
||||
"/mnt/astreae": "astreae",
|
||||
"/mnt/asteria": "asteria",
|
||||
}
|
||||
|
||||
|
||||
def api_base() -> str:
|
||||
host = os.environ.get("KUBERNETES_SERVICE_HOST")
|
||||
port = os.environ.get("KUBERNETES_SERVICE_PORT", "443")
|
||||
if not host:
|
||||
raise SystemExit("missing KUBERNETES_SERVICE_HOST")
|
||||
return f"https://{host}:{port}"
|
||||
|
||||
|
||||
def token() -> str:
|
||||
path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||
with open(path, "r", encoding="utf-8") as fh:
|
||||
return fh.read().strip()
|
||||
|
||||
|
||||
def ca_context() -> ssl.SSLContext:
|
||||
cafile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
||||
return ssl.create_default_context(cafile=cafile)
|
||||
|
||||
|
||||
def request_json(method: str, path: str, body: dict | None = None) -> dict:
|
||||
req = urllib.request.Request(
|
||||
f"{api_base()}{path}",
|
||||
method=method,
|
||||
headers={
|
||||
"Authorization": f"Bearer {token()}",
|
||||
"Content-Type": "application/merge-patch+json",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
data=None if body is None else json.dumps(body).encode("utf-8"),
|
||||
)
|
||||
with urllib.request.urlopen(req, context=ca_context(), timeout=20) as resp:
|
||||
payload = resp.read()
|
||||
return json.loads(payload) if payload else {}
|
||||
|
||||
|
||||
def list_nodes() -> list[dict]:
|
||||
data = request_json("GET", LONGHORN_API.format(namespace=LONGHORN_NS))
|
||||
return data.get("items", [])
|
||||
|
||||
|
||||
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
|
||||
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
|
||||
request_json(
|
||||
"PATCH",
|
||||
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
|
||||
body=body,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
changed = 0
|
||||
skipped = 0
|
||||
|
||||
for node in list_nodes():
|
||||
name = node.get("metadata", {}).get("name", "")
|
||||
spec_disks = node.get("spec", {}).get("disks", {}) or {}
|
||||
for disk_name, disk in spec_disks.items():
|
||||
disk_path = disk.get("path")
|
||||
desired_tag = DESIRED_TAGS.get(disk_path)
|
||||
if not desired_tag:
|
||||
continue
|
||||
current_tags = disk.get("tags") or []
|
||||
if current_tags == [desired_tag]:
|
||||
skipped += 1
|
||||
continue
|
||||
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
|
||||
patch_disk_tags(name, disk_name, desired_tag)
|
||||
changed += 1
|
||||
|
||||
print(f"done: changed={changed} skipped={skipped}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Loading…
x
Reference in New Issue
Block a user