Compare commits
2 Commits
3ce4ac3273
...
7a832d6e4c
| Author | SHA1 | Date | |
|---|---|---|---|
| 7a832d6e4c | |||
| eeaece5bae |
@ -34,11 +34,11 @@ AI_CHAT_SYSTEM_PROMPT = os.getenv(
|
|||||||
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
|
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
|
||||||
)
|
)
|
||||||
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
|
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
|
||||||
AI_NODE_NAME = os.getenv("AI_NODE_NAME") or os.getenv("NODE_NAME") or "unknown"
|
AI_NODE_NAME = os.getenv("AI_CHAT_NODE_NAME") or os.getenv("AI_NODE_NAME") or "ai-cluster"
|
||||||
try:
|
AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)"
|
||||||
AI_NODE_GPU_MAP = json.loads(os.getenv("AI_NODE_GPU_MAP", "{}"))
|
AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/ai/chat")
|
||||||
except json.JSONDecodeError:
|
AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
|
||||||
AI_NODE_GPU_MAP = {}
|
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
|
||||||
|
|
||||||
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
||||||
|
|
||||||
@ -188,15 +188,57 @@ def ai_chat() -> Any:
|
|||||||
|
|
||||||
@app.route("/api/ai/info", methods=["GET"])
|
@app.route("/api/ai/info", methods=["GET"])
|
||||||
def ai_info() -> Any:
|
def ai_info() -> Any:
|
||||||
gpu_label = AI_NODE_GPU_MAP.get(AI_NODE_NAME, "local GPU (dynamic)")
|
meta = _discover_ai_meta()
|
||||||
return jsonify(
|
return jsonify(meta)
|
||||||
{
|
|
||||||
"node": AI_NODE_NAME,
|
|
||||||
"gpu": gpu_label,
|
def _discover_ai_meta() -> dict[str, str]:
|
||||||
"model": AI_CHAT_MODEL,
|
"""
|
||||||
"endpoint": "/api/ai/chat",
|
Best-effort discovery of which node/gpu is hosting the AI service.
|
||||||
}
|
Tries the Kubernetes API using the service account if available; falls back to env.
|
||||||
)
|
"""
|
||||||
|
meta = {
|
||||||
|
"node": AI_NODE_NAME,
|
||||||
|
"gpu": AI_GPU_DESC,
|
||||||
|
"model": AI_CHAT_MODEL,
|
||||||
|
"endpoint": AI_PUBLIC_ENDPOINT or "/api/ai/chat",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only attempt k8s if we're in-cluster and credentials exist.
|
||||||
|
sa_path = Path("/var/run/secrets/kubernetes.io/serviceaccount")
|
||||||
|
token_path = sa_path / "token"
|
||||||
|
ca_path = sa_path / "ca.crt"
|
||||||
|
ns_path = sa_path / "namespace"
|
||||||
|
if not token_path.exists() or not ca_path.exists() or not ns_path.exists():
|
||||||
|
return meta
|
||||||
|
|
||||||
|
try:
|
||||||
|
token = token_path.read_text().strip()
|
||||||
|
namespace = ns_path.read_text().strip() or AI_K8S_NAMESPACE
|
||||||
|
api_server = os.getenv("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc")
|
||||||
|
api_port = os.getenv("KUBERNETES_SERVICE_PORT", "443")
|
||||||
|
base_url = f"https://{api_server}:{api_port}"
|
||||||
|
pod_url = f"{base_url}/api/v1/namespaces/{namespace}/pods?labelSelector={AI_K8S_LABEL}"
|
||||||
|
|
||||||
|
with httpx.Client(verify=str(ca_path), timeout=HTTP_CHECK_TIMEOUT_SEC, headers={"Authorization": f"Bearer {token}"}) as client:
|
||||||
|
resp = client.get(pod_url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
items = data.get("items") or []
|
||||||
|
if items:
|
||||||
|
pod = items[0]
|
||||||
|
node_name = pod.get("spec", {}).get("nodeName") or meta["node"]
|
||||||
|
meta["node"] = node_name
|
||||||
|
# If GPU info is annotated on the pod, surface it.
|
||||||
|
annotations = pod.get("metadata", {}).get("annotations") or {}
|
||||||
|
gpu_hint = annotations.get("ai.gpu/description") or annotations.get("gpu/description")
|
||||||
|
if gpu_hint:
|
||||||
|
meta["gpu"] = gpu_hint
|
||||||
|
except Exception:
|
||||||
|
# swallow errors; keep fallbacks
|
||||||
|
pass
|
||||||
|
|
||||||
|
return meta
|
||||||
|
|
||||||
|
|
||||||
@app.route("/", defaults={"path": ""})
|
@app.route("/", defaults={"path": ""})
|
||||||
|
|||||||
@ -20,13 +20,13 @@
|
|||||||
<span class="value mono">{{ meta.gpu }}</span>
|
<span class="value mono">{{ meta.gpu }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="fact">
|
<div class="fact">
|
||||||
<span class="label mono">Node</span>
|
<span class="label mono">AI Node</span>
|
||||||
<span class="value mono">{{ meta.node }}</span>
|
<span class="value mono">{{ meta.node }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="fact">
|
<div class="fact">
|
||||||
<span class="label mono">Endpoint</span>
|
<span class="label mono">Endpoint</span>
|
||||||
<button class="endpoint-copy mono" type="button" @click="copyCurl">
|
<button class="endpoint-copy mono" type="button" @click="copyCurl">
|
||||||
{{ apiHost }}
|
{{ meta.endpoint || apiHost }}
|
||||||
<span v-if="copied" class="copied">copied</span>
|
<span v-if="copied" class="copied">copied</span>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
@ -98,6 +98,7 @@ const meta = ref({
|
|||||||
model: "loading...",
|
model: "loading...",
|
||||||
gpu: "local GPU (dynamic)",
|
gpu: "local GPU (dynamic)",
|
||||||
node: "unknown",
|
node: "unknown",
|
||||||
|
endpoint: "",
|
||||||
});
|
});
|
||||||
const messages = ref([
|
const messages = ref([
|
||||||
{
|
{
|
||||||
@ -128,6 +129,7 @@ async function fetchMeta() {
|
|||||||
model: data.model || meta.value.model,
|
model: data.model || meta.value.model,
|
||||||
gpu: data.gpu || meta.value.gpu,
|
gpu: data.gpu || meta.value.gpu,
|
||||||
node: data.node || meta.value.node,
|
node: data.node || meta.value.node,
|
||||||
|
endpoint: data.endpoint || meta.value.endpoint || apiHost,
|
||||||
};
|
};
|
||||||
} catch {
|
} catch {
|
||||||
// swallow
|
// swallow
|
||||||
@ -211,7 +213,8 @@ function handleKeydown(e) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function copyCurl() {
|
async function copyCurl() {
|
||||||
const curl = `curl -X POST ${new URL(API_URL, window.location.href).toString()} -H 'content-type: application/json' -d '{\"message\":\"hi\"}'`;
|
const target = meta.value.endpoint || new URL(API_URL, window.location.href).toString();
|
||||||
|
const curl = `curl -X POST ${target} -H 'content-type: application/json' -d '{\"message\":\"hi\"}'`;
|
||||||
try {
|
try {
|
||||||
await navigator.clipboard.writeText(curl);
|
await navigator.clipboard.writeText(curl);
|
||||||
copied.value = true;
|
copied.value = true;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user