atlasbot: enforce per-call timeout across retries

This commit is contained in:
Brad Stein 2026-03-30 03:44:35 -03:00
parent 2cb601a614
commit 86a249d014

View File

@ -1,5 +1,6 @@
import json
import logging
import time
from typing import Any
import httpx
@ -42,9 +43,16 @@ class LLMClient:
"stream": False,
}
timeout = timeout_sec if timeout_sec is not None else self._timeout
deadline = (time.monotonic() + timeout) if timeout_sec is not None else None
for attempt in range(max(1, self._settings.ollama_retries + 1)):
call_timeout = timeout
if deadline is not None:
remaining = deadline - time.monotonic()
if remaining <= 0:
raise LLMError("timeout")
call_timeout = min(call_timeout, remaining)
try:
async with httpx.AsyncClient(timeout=timeout) as client:
async with httpx.AsyncClient(timeout=call_timeout) as client:
resp = await client.post(self._endpoint(), json=payload, headers=self._headers)
if resp.status_code == FALLBACK_STATUS_CODE and self._settings.ollama_fallback_model:
payload["model"] = self._settings.ollama_fallback_model
@ -61,6 +69,8 @@ class LLMClient:
return str(content)
except Exception as exc:
log.warning("ollama call failed", extra={"extra": {"attempt": attempt + 1, "error": str(exc)}})
if deadline is not None and (deadline - time.monotonic()) <= 0:
raise LLMError("timeout") from exc
if attempt + 1 >= max(1, self._settings.ollama_retries + 1):
raise LLMError(str(exc)) from exc
raise LLMError("ollama retries exhausted")