atlasbot: enforce per-call timeout across retries

2026-03-30 03:44:35 -03:00 · 2026-03-30 03:44:35 -03:00 · 86a249d014
commit 86a249d014
parent 2cb601a614
1 changed files with 11 additions and 1 deletions
--- a/atlasbot/llm/client.py
+++ b/atlasbot/llm/client.py
@ -1,5 +1,6 @@
 import json
 import logging
+import time
 from typing import Any

 import httpx
@ -42,9 +43,16 @@ class LLMClient:
            "stream": False,
        }
        timeout = timeout_sec if timeout_sec is not None else self._timeout
+        deadline = (time.monotonic() + timeout) if timeout_sec is not None else None
        for attempt in range(max(1, self._settings.ollama_retries + 1)):
+            call_timeout = timeout
+            if deadline is not None:
+                remaining = deadline - time.monotonic()
+                if remaining <= 0:
+                    raise LLMError("timeout")
+                call_timeout = min(call_timeout, remaining)
            try:
-                async with httpx.AsyncClient(timeout=timeout) as client:
+                async with httpx.AsyncClient(timeout=call_timeout) as client:
                    resp = await client.post(self._endpoint(), json=payload, headers=self._headers)
                if resp.status_code == FALLBACK_STATUS_CODE and self._settings.ollama_fallback_model:
                    payload["model"] = self._settings.ollama_fallback_model
@ -61,6 +69,8 @@ class LLMClient:
                return str(content)
            except Exception as exc:
                log.warning("ollama call failed", extra={"extra": {"attempt": attempt + 1, "error": str(exc)}})
+                if deadline is not None and (deadline - time.monotonic()) <= 0:
+                    raise LLMError("timeout") from exc
                if attempt + 1 >= max(1, self._settings.ollama_retries + 1):
                    raise LLMError(str(exc)) from exc
        raise LLMError("ollama retries exhausted")