chore(ai): read model/gpu from k8s pod annotations when available

This commit is contained in:
Brad Stein 2025-12-21 00:45:30 -03:00
parent 7a832d6e4c
commit dfe409c475

View File

@ -39,6 +39,8 @@ AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)"
AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat") AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat")
AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama") AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai") AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
AI_MODEL_ANNOTATION = os.getenv("AI_MODEL_ANNOTATION", "ai.bstein.dev/model")
AI_GPU_ANNOTATION = os.getenv("AI_GPU_ANNOTATION", "ai.bstein.dev/gpu")
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None} _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
@ -225,15 +227,30 @@ def _discover_ai_meta() -> dict[str, str]:
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
items = data.get("items") or [] items = data.get("items") or []
if items: running = [p for p in items if p.get("status", {}).get("phase") == "Running"] or items
pod = items[0] if running:
pod = running[0]
node_name = pod.get("spec", {}).get("nodeName") or meta["node"] node_name = pod.get("spec", {}).get("nodeName") or meta["node"]
meta["node"] = node_name meta["node"] = node_name
# If GPU info is annotated on the pod, surface it.
annotations = pod.get("metadata", {}).get("annotations") or {} annotations = pod.get("metadata", {}).get("annotations") or {}
gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description") gpu_hint = (
annotations.get(AI_GPU_ANNOTATION)
or annotations.get("ai.gpu/description")
or annotations.get("gpu/description")
)
if gpu_hint: if gpu_hint:
meta["gpu"] = gpu_hint meta["gpu"] = gpu_hint
model_hint = annotations.get(AI_MODEL_ANNOTATION)
if not model_hint:
# Try to infer from container image tag.
containers = pod.get("spec", {}).get("containers") or []
if containers:
image = containers[0].get("image") or ""
model_hint = image.split(":")[-1] if ":" in image else image
if model_hint:
meta["model"] = model_hint
except Exception: except Exception:
# swallow errors; keep fallbacks # swallow errors; keep fallbacks
pass pass