feat(ai): add keep-warm ping for chat model
This commit is contained in:
parent
5228a68fe2
commit
33e6082b56
@ -41,6 +41,8 @@ AI_K8S_LABEL = os.getenv("AI_K8S_LABEL", "app=ollama")
|
|||||||
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
|
AI_K8S_NAMESPACE = os.getenv("AI_K8S_NAMESPACE", "ai")
|
||||||
AI_MODEL_ANNOTATION = os.getenv("AI_MODEL_ANNOTATION", "ai.bstein.dev/model")
|
AI_MODEL_ANNOTATION = os.getenv("AI_MODEL_ANNOTATION", "ai.bstein.dev/model")
|
||||||
AI_GPU_ANNOTATION = os.getenv("AI_GPU_ANNOTATION", "ai.bstein.dev/gpu")
|
AI_GPU_ANNOTATION = os.getenv("AI_GPU_ANNOTATION", "ai.bstein.dev/gpu")
|
||||||
|
AI_WARM_INTERVAL_SEC = float(os.getenv("AI_WARM_INTERVAL_SEC", "300"))
|
||||||
|
AI_WARM_ENABLED = os.getenv("AI_WARM_ENABLED", "true").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
||||||
|
|
||||||
@ -256,6 +258,35 @@ def _discover_ai_meta() -> dict[str, str]:
|
|||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
def _keep_warm() -> None:
|
||||||
|
"""Periodically ping the model to keep it warm."""
|
||||||
|
if not AI_WARM_ENABLED or AI_WARM_INTERVAL_SEC <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
def loop() -> None:
|
||||||
|
while True:
|
||||||
|
time.sleep(AI_WARM_INTERVAL_SEC)
|
||||||
|
try:
|
||||||
|
body = {
|
||||||
|
"model": AI_CHAT_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": "ping"}],
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
with httpx.Client(timeout=min(AI_CHAT_TIMEOUT_SEC, 15)) as client:
|
||||||
|
client.post(f"{AI_CHAT_API}/api/chat", json=body)
|
||||||
|
except Exception:
|
||||||
|
# best-effort; ignore failures
|
||||||
|
continue
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
threading.Thread(target=loop, daemon=True, name="ai-keep-warm").start()
|
||||||
|
|
||||||
|
|
||||||
|
# Start keep-warm loop on import.
|
||||||
|
_keep_warm()
|
||||||
|
|
||||||
|
|
||||||
@app.route("/", defaults={"path": ""})
|
@app.route("/", defaults={"path": ""})
|
||||||
@app.route("/<path:path>")
|
@app.route("/<path:path>")
|
||||||
def serve_frontend(path: str) -> Any:
|
def serve_frontend(path: str) -> Any:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user