atlasbot: keep retrying MAS login during transient Synapse outages
This commit is contained in:
parent
fa160f5f9b
commit
cfdd5a377d
@ -34,6 +34,9 @@ OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480"))
|
|||||||
ATLASBOT_HTTP_PORT = int(os.environ.get("ATLASBOT_HTTP_PORT", "8090"))
|
ATLASBOT_HTTP_PORT = int(os.environ.get("ATLASBOT_HTTP_PORT", "8090"))
|
||||||
ATLASBOT_INTERNAL_TOKEN = os.environ.get("ATLASBOT_INTERNAL_TOKEN") or os.environ.get("CHAT_API_HOMEPAGE", "")
|
ATLASBOT_INTERNAL_TOKEN = os.environ.get("ATLASBOT_INTERNAL_TOKEN") or os.environ.get("CHAT_API_HOMEPAGE", "")
|
||||||
SNAPSHOT_TTL_SEC = int(os.environ.get("ATLASBOT_SNAPSHOT_TTL_SEC", "30"))
|
SNAPSHOT_TTL_SEC = int(os.environ.get("ATLASBOT_SNAPSHOT_TTL_SEC", "30"))
|
||||||
|
LOGIN_RETRY_CAP_SEC = int(os.environ.get("ATLASBOT_LOGIN_RETRY_CAP_SEC", "60"))
|
||||||
|
# 0 means retry forever (default); useful during startup when MAS/Synapse ordering is still converging.
|
||||||
|
LOGIN_MAX_ATTEMPTS = int(os.environ.get("ATLASBOT_LOGIN_MAX_ATTEMPTS", "0"))
|
||||||
|
|
||||||
KB_DIR = os.environ.get("KB_DIR", "")
|
KB_DIR = os.environ.get("KB_DIR", "")
|
||||||
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428")
|
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428")
|
||||||
@ -5182,14 +5185,21 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
|
|||||||
history[hist_key] = history[hist_key][-80:]
|
history[hist_key] = history[hist_key][-80:]
|
||||||
|
|
||||||
def login_with_retry(user: str, password: str):
|
def login_with_retry(user: str, password: str):
|
||||||
last_err = None
|
attempts = 0
|
||||||
for attempt in range(10):
|
while True:
|
||||||
try:
|
try:
|
||||||
return login(user, password)
|
return login(user, password)
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
last_err = exc
|
attempts += 1
|
||||||
time.sleep(min(30, 2 ** attempt))
|
if LOGIN_MAX_ATTEMPTS > 0 and attempts >= LOGIN_MAX_ATTEMPTS:
|
||||||
raise last_err
|
raise
|
||||||
|
delay = min(LOGIN_RETRY_CAP_SEC, 2 ** min(attempts, 8))
|
||||||
|
print(
|
||||||
|
f"atlasbot login retry for {normalize_user_id(user)} "
|
||||||
|
f"(attempt={attempts}, delay={delay}s): {exc}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
def _bot_accounts() -> list[dict[str, str]]:
|
def _bot_accounts() -> list[dict[str, str]]:
|
||||||
accounts: list[dict[str, str]] = []
|
accounts: list[dict[str, str]] = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user