atlasbot: keep retrying MAS login during transient Synapse outages
This commit is contained in:
parent
fa160f5f9b
commit
cfdd5a377d
@ -34,6 +34,9 @@ OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480"))
|
||||
ATLASBOT_HTTP_PORT = int(os.environ.get("ATLASBOT_HTTP_PORT", "8090"))
|
||||
ATLASBOT_INTERNAL_TOKEN = os.environ.get("ATLASBOT_INTERNAL_TOKEN") or os.environ.get("CHAT_API_HOMEPAGE", "")
|
||||
SNAPSHOT_TTL_SEC = int(os.environ.get("ATLASBOT_SNAPSHOT_TTL_SEC", "30"))
|
||||
LOGIN_RETRY_CAP_SEC = int(os.environ.get("ATLASBOT_LOGIN_RETRY_CAP_SEC", "60"))
|
||||
# 0 means retry forever (default); useful during startup when MAS/Synapse ordering is still converging.
|
||||
LOGIN_MAX_ATTEMPTS = int(os.environ.get("ATLASBOT_LOGIN_MAX_ATTEMPTS", "0"))
|
||||
|
||||
KB_DIR = os.environ.get("KB_DIR", "")
|
||||
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428")
|
||||
@ -5182,14 +5185,21 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
|
||||
history[hist_key] = history[hist_key][-80:]
|
||||
|
||||
def login_with_retry(user: str, password: str):
|
||||
last_err = None
|
||||
for attempt in range(10):
|
||||
attempts = 0
|
||||
while True:
|
||||
try:
|
||||
return login(user, password)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last_err = exc
|
||||
time.sleep(min(30, 2 ** attempt))
|
||||
raise last_err
|
||||
attempts += 1
|
||||
if LOGIN_MAX_ATTEMPTS > 0 and attempts >= LOGIN_MAX_ATTEMPTS:
|
||||
raise
|
||||
delay = min(LOGIN_RETRY_CAP_SEC, 2 ** min(attempts, 8))
|
||||
print(
|
||||
f"atlasbot login retry for {normalize_user_id(user)} "
|
||||
f"(attempt={attempts}, delay={delay}s): {exc}",
|
||||
flush=True,
|
||||
)
|
||||
time.sleep(delay)
|
||||
|
||||
def _bot_accounts() -> list[dict[str, str]]:
|
||||
accounts: list[dict[str, str]] = []
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user