chore(ai): read model/gpu from k8s pod annotations when available
This commit is contained in:
parent
7a832d6e4c
commit
dfe409c475
@ -39,6 +39,8 @@ AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)"
|
|||||||
AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat")
|
AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat")
|
||||||
AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
|
AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
|
||||||
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
|
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
|
||||||
|
AI_MODEL_ANNOTATION = os.getenv("AI_MODEL_ANNOTATION", "ai.bstein.dev/model")
|
||||||
|
AI_GPU_ANNOTATION = os.getenv("AI_GPU_ANNOTATION", "ai.bstein.dev/gpu")
|
||||||
|
|
||||||
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
||||||
|
|
||||||
@ -225,15 +227,30 @@ def _discover_ai_meta() -> dict[str, str]:
|
|||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
items = data.get("items") or []
|
items = data.get("items") or []
|
||||||
if items:
|
running = [p for p in items if p.get("status", {}).get("phase") == "Running"] or items
|
||||||
pod = items[0]
|
if running:
|
||||||
|
pod = running[0]
|
||||||
node_name = pod.get("spec", {}).get("nodeName") or meta["node"]
|
node_name = pod.get("spec", {}).get("nodeName") or meta["node"]
|
||||||
meta["node"] = node_name
|
meta["node"] = node_name
|
||||||
# If GPU info is annotated on the pod, surface it.
|
|
||||||
annotations = pod.get("metadata", {}).get("annotations") or {}
|
annotations = pod.get("metadata", {}).get("annotations") or {}
|
||||||
gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description")
|
gpu_hint = (
|
||||||
|
annotations.get(AI_GPU_ANNOTATION)
|
||||||
|
or annotations.get("ai.gpu/description")
|
||||||
|
or annotations.get("gpu/description")
|
||||||
|
)
|
||||||
if gpu_hint:
|
if gpu_hint:
|
||||||
meta["gpu"] = gpu_hint
|
meta["gpu"] = gpu_hint
|
||||||
|
|
||||||
|
model_hint = annotations.get(AI_MODEL_ANNOTATION)
|
||||||
|
if not model_hint:
|
||||||
|
# Try to infer from container image tag.
|
||||||
|
containers = pod.get("spec", {}).get("containers") or []
|
||||||
|
if containers:
|
||||||
|
image = containers[0].get("image") or ""
|
||||||
|
model_hint = image.split(":")[-1] if ":" in image else image
|
||||||
|
if model_hint:
|
||||||
|
meta["model"] = model_hint
|
||||||
except Exception:
|
except Exception:
|
||||||
# swallow errors; keep fallbacks
|
# swallow errors; keep fallbacks
|
||||||
pass
|
pass
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user