atlasbot: enforce per-call timeout across retries
This commit is contained in:
parent
2cb601a614
commit
86a249d014
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@ -42,9 +43,16 @@ class LLMClient:
|
|||||||
"stream": False,
|
"stream": False,
|
||||||
}
|
}
|
||||||
timeout = timeout_sec if timeout_sec is not None else self._timeout
|
timeout = timeout_sec if timeout_sec is not None else self._timeout
|
||||||
|
deadline = (time.monotonic() + timeout) if timeout_sec is not None else None
|
||||||
for attempt in range(max(1, self._settings.ollama_retries + 1)):
|
for attempt in range(max(1, self._settings.ollama_retries + 1)):
|
||||||
|
call_timeout = timeout
|
||||||
|
if deadline is not None:
|
||||||
|
remaining = deadline - time.monotonic()
|
||||||
|
if remaining <= 0:
|
||||||
|
raise LLMError("timeout")
|
||||||
|
call_timeout = min(call_timeout, remaining)
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
async with httpx.AsyncClient(timeout=call_timeout) as client:
|
||||||
resp = await client.post(self._endpoint(), json=payload, headers=self._headers)
|
resp = await client.post(self._endpoint(), json=payload, headers=self._headers)
|
||||||
if resp.status_code == FALLBACK_STATUS_CODE and self._settings.ollama_fallback_model:
|
if resp.status_code == FALLBACK_STATUS_CODE and self._settings.ollama_fallback_model:
|
||||||
payload["model"] = self._settings.ollama_fallback_model
|
payload["model"] = self._settings.ollama_fallback_model
|
||||||
@ -61,6 +69,8 @@ class LLMClient:
|
|||||||
return str(content)
|
return str(content)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
log.warning("ollama call failed", extra={"extra": {"attempt": attempt + 1, "error": str(exc)}})
|
log.warning("ollama call failed", extra={"extra": {"attempt": attempt + 1, "error": str(exc)}})
|
||||||
|
if deadline is not None and (deadline - time.monotonic()) <= 0:
|
||||||
|
raise LLMError("timeout") from exc
|
||||||
if attempt + 1 >= max(1, self._settings.ollama_retries + 1):
|
if attempt + 1 >= max(1, self._settings.ollama_retries + 1):
|
||||||
raise LLMError(str(exc)) from exc
|
raise LLMError(str(exc)) from exc
|
||||||
raise LLMError("ollama retries exhausted")
|
raise LLMError("ollama retries exhausted")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user