chore(ai): read model/gpu from k8s pod annotations when available
This commit is contained in:
parent
7a832d6e4c
commit
dfe409c475
@ -39,6 +39,8 @@ AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)"
|
||||
AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat")
|
||||
AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
|
||||
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
|
||||
AI_MODEL_ANNOTATION = os.getenv("AI_MODEL_ANNOTATION", "ai.bstein.dev/model")
|
||||
AI_GPU_ANNOTATION = os.getenv("AI_GPU_ANNOTATION", "ai.bstein.dev/gpu")
|
||||
|
||||
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
||||
|
||||
@ -225,15 +227,30 @@ def _discover_ai_meta() -> dict[str, str]:
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
items = data.get("items") or []
|
||||
if items:
|
||||
pod = items[0]
|
||||
running = [p for p in items if p.get("status", {}).get("phase") == "Running"] or items
|
||||
if running:
|
||||
pod = running[0]
|
||||
node_name = pod.get("spec", {}).get("nodeName") or meta["node"]
|
||||
meta["node"] = node_name
|
||||
# If GPU info is annotated on the pod, surface it.
|
||||
|
||||
annotations = pod.get("metadata", {}).get("annotations") or {}
|
||||
gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description")
|
||||
gpu_hint = (
|
||||
annotations.get(AI_GPU_ANNOTATION)
|
||||
or annotations.get("ai.gpu/description")
|
||||
or annotations.get("gpu/description")
|
||||
)
|
||||
if gpu_hint:
|
||||
meta["gpu"] = gpu_hint
|
||||
|
||||
model_hint = annotations.get(AI_MODEL_ANNOTATION)
|
||||
if not model_hint:
|
||||
# Try to infer from container image tag.
|
||||
containers = pod.get("spec", {}).get("containers") or []
|
||||
if containers:
|
||||
image = containers[0].get("image") or ""
|
||||
model_hint = image.split(":")[-1] if ":" in image else image
|
||||
if model_hint:
|
||||
meta["model"] = model_hint
|
||||
except Exception:
|
||||
# swallow errors; keep fallbacks
|
||||
pass
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user