From dfe409c47506d9677c77b7450d36d74081223b92 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 00:45:30 -0300 Subject: [PATCH] chore(ai): read model/gpu from k8s pod annotations when available --- backend/app.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/backend/app.py b/backend/app.py index 9dbc55e..d1544c9 100644 --- a/backend/app.py +++ b/backend/app.py @@ -39,6 +39,8 @@ AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)" AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat") AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama") AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai") +AI_MODEL_ANNOTATION = os.getenv("AI_MODEL_ANNOTATION", "ai.bstein.dev/model") +AI_GPU_ANNOTATION = os.getenv("AI_GPU_ANNOTATION", "ai.bstein.dev/gpu") _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None} @@ -225,15 +227,30 @@ def _discover_ai_meta() -> dict[str, str]: resp.raise_for_status() data = resp.json() items = data.get("items") or [] - if items: - pod = items[0] + running = [p for p in items if p.get("status", {}).get("phase") == "Running"] or items + if running: + pod = running[0] node_name = pod.get("spec", {}).get("nodeName") or meta["node"] meta["node"] = node_name - # If GPU info is annotated on the pod, surface it. + annotations = pod.get("metadata", {}).get("annotations") or {} - gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description") + gpu_hint = ( + annotations.get(AI_GPU_ANNOTATION) + or annotations.get("ai.gpu/description") + or annotations.get("gpu/description") + ) if gpu_hint: meta["gpu"] = gpu_hint + + model_hint = annotations.get(AI_MODEL_ANNOTATION) + if not model_hint: + # Try to infer from container image tag. + containers = pod.get("spec", {}).get("containers") or [] + if containers: + image = containers[0].get("image") or "" + model_hint = image.split(":")[-1] if ":" in image else image + if model_hint: + meta["model"] = model_hint except Exception: # swallow errors; keep fallbacks pass