chore(ai): discover AI node via k8s when available

2025-12-21 00:31:48 -03:00 · 2025-12-21 00:31:48 -03:00 · 7a832d6e4c
commit 7a832d6e4c
parent eeaece5bae
1 changed files with 53 additions and 8 deletions
--- a/backend/app.py
+++ b/backend/app.py
@ -37,6 +37,8 @@ AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
 AI_NODE_NAME = os.getenv("AI_CHAT_NODE_NAME") or os.getenv("AI_NODE_NAME") or "ai-cluster"
 AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)"
 AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat")
+AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
+AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")

 _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}

@ -186,14 +188,57 @@ def ai_chat() -> Any:

@app.route("/api/ai/info", methods=["GET"])
 def ai_info() -> Any:
-    return jsonify(
-        {
-            "node": AI_NODE_NAME,
-            "gpu": AI_GPU_DESC,
-            "model": AI_CHAT_MODEL,
-            "endpoint": AI_PUBLIC_ENDPOINT or "/api/ai/chat",
-        }
-    )
+    meta = _discover_ai_meta()
+    return jsonify(meta)
+
+
+def _discover_ai_meta() -> dict[str, str]:
+    """
+    Best-effort discovery of which node/gpu is hosting the AI service.
+    Tries the Kubernetes API using the service account if available; falls back to env.
+    """
+    meta = {
+        "node": AI_NODE_NAME,
+        "gpu": AI_GPU_DESC,
+        "model": AI_CHAT_MODEL,
+        "endpoint": AI_PUBLIC_ENDPOINT or "/api/ai/chat",
+    }
+
+    # Only attempt k8s if we're in-cluster and credentials exist.
+    sa_path = Path("/var/run/secrets/kubernetes.io/serviceaccount")
+    token_path = sa_path / "token"
+    ca_path = sa_path / "ca.crt"
+    ns_path = sa_path / "namespace"
+    if not token_path.exists() or not ca_path.exists() or not ns_path.exists():
+        return meta
+
+    try:
+        token = token_path.read_text().strip()
+        namespace = ns_path.read_text().strip() or AI_K8S_NAMESPACE
+        api_server = os.getenv("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc")
+        api_port = os.getenv("KUBERNETES_SERVICE_PORT", "443")
+        base_url = f"https://{api_server}:{api_port}"
+        pod_url = f"{base_url}/api/v1/namespaces/{namespace}/pods?labelSelector={AI_K8S_LABEL}"
+
+        with httpx.Client(verify=str(ca_path), timeout=HTTP_CHECK_TIMEOUT_SEC, headers={"Authorization": f"Bearer {token}"}) as client:
+            resp = client.get(pod_url)
+            resp.raise_for_status()
+            data = resp.json()
+            items = data.get("items") or []
+            if items:
+                pod = items[0]
+                node_name = pod.get("spec", {}).get("nodeName") or meta["node"]
+                meta["node"] = node_name
+                # If GPU info is annotated on the pod, surface it.
+                annotations = pod.get("metadata", {}).get("annotations") or {}
+                gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description")
+                if gpu_hint:
+                    meta["gpu"] = gpu_hint
+    except Exception:
+        # swallow errors; keep fallbacks
+        pass
+
+    return meta


@app.route("/", defaults={"path": ""})