From 7a832d6e4c4c328aa5b91563c3c2d20aea97f489 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 00:31:48 -0300 Subject: [PATCH] chore(ai): discover AI node via k8s when available --- backend/app.py | 61 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/backend/app.py b/backend/app.py index a79e2de..9dbc55e 100644 --- a/backend/app.py +++ b/backend/app.py @@ -37,6 +37,8 @@ AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20")) AI_NODE_NAME = os.getenv("AI_CHAT_NODE_NAME") or os.getenv("AI_NODE_NAME") or "ai-cluster" AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)" AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat") +AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama") +AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai") _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None} @@ -186,14 +188,57 @@ def ai_chat() -> Any: @app.route("/api/ai/info", methods=["GET"]) def ai_info() -> Any: - return jsonify( - { - "node": AI_NODE_NAME, - "gpu": AI_GPU_DESC, - "model": AI_CHAT_MODEL, - "endpoint": AI_PUBLIC_ENDPOINT or "/api/ai/chat", - } - ) + meta = _discover_ai_meta() + return jsonify(meta) + + +def _discover_ai_meta() -> dict[str, str]: + """ + Best-effort discovery of which node/gpu is hosting the AI service. + Tries the Kubernetes API using the service account if available; falls back to env. + """ + meta = { + "node": AI_NODE_NAME, + "gpu": AI_GPU_DESC, + "model": AI_CHAT_MODEL, + "endpoint": AI_PUBLIC_ENDPOINT or "/api/ai/chat", + } + + # Only attempt k8s if we're in-cluster and credentials exist. + sa_path = Path("/var/run/secrets/kubernetes.io/serviceaccount") + token_path = sa_path / "token" + ca_path = sa_path / "ca.crt" + ns_path = sa_path / "namespace" + if not token_path.exists() or not ca_path.exists() or not ns_path.exists(): + return meta + + try: + token = token_path.read_text().strip() + namespace = ns_path.read_text().strip() or AI_K8S_NAMESPACE + api_server = os.getenv("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc") + api_port = os.getenv("KUBERNETES_SERVICE_PORT", "443") + base_url = f"https://{api_server}:{api_port}" + pod_url = f"{base_url}/api/v1/namespaces/{namespace}/pods?labelSelector={AI_K8S_LABEL}" + + with httpx.Client(verify=str(ca_path), timeout=HTTP_CHECK_TIMEOUT_SEC, headers={"Authorization": f"Bearer {token}"}) as client: + resp = client.get(pod_url) + resp.raise_for_status() + data = resp.json() + items = data.get("items") or [] + if items: + pod = items[0] + node_name = pod.get("spec", {}).get("nodeName") or meta["node"] + meta["node"] = node_name + # If GPU info is annotated on the pod, surface it. + annotations = pod.get("metadata", {}).get("annotations") or {} + gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description") + if gpu_hint: + meta["gpu"] = gpu_hint + except Exception: + # swallow errors; keep fallbacks + pass + + return meta @app.route("/", defaults={"path": ""})