atlasbot: enforce per-call timeout across retries
This commit is contained in:
parent
2cb601a614
commit
86a249d014
@ -1,5 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
@ -42,9 +43,16 @@ class LLMClient:
|
||||
"stream": False,
|
||||
}
|
||||
timeout = timeout_sec if timeout_sec is not None else self._timeout
|
||||
deadline = (time.monotonic() + timeout) if timeout_sec is not None else None
|
||||
for attempt in range(max(1, self._settings.ollama_retries + 1)):
|
||||
call_timeout = timeout
|
||||
if deadline is not None:
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining <= 0:
|
||||
raise LLMError("timeout")
|
||||
call_timeout = min(call_timeout, remaining)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
async with httpx.AsyncClient(timeout=call_timeout) as client:
|
||||
resp = await client.post(self._endpoint(), json=payload, headers=self._headers)
|
||||
if resp.status_code == FALLBACK_STATUS_CODE and self._settings.ollama_fallback_model:
|
||||
payload["model"] = self._settings.ollama_fallback_model
|
||||
@ -61,6 +69,8 @@ class LLMClient:
|
||||
return str(content)
|
||||
except Exception as exc:
|
||||
log.warning("ollama call failed", extra={"extra": {"attempt": attempt + 1, "error": str(exc)}})
|
||||
if deadline is not None and (deadline - time.monotonic()) <= 0:
|
||||
raise LLMError("timeout") from exc
|
||||
if attempt + 1 >= max(1, self._settings.ollama_retries + 1):
|
||||
raise LLMError(str(exc)) from exc
|
||||
raise LLMError("ollama retries exhausted")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user