From 27e8a770448033a5e03227eee68206911b17eed3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 27 Jan 2026 21:16:47 -0300 Subject: [PATCH] atlasbot: add model fallback and rollout --- services/comms/atlasbot-deployment.yaml | 4 +++- services/comms/scripts/atlasbot/bot.py | 24 +++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 97567eb..7414f1e 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: manual-atlasbot-71 + checksum/atlasbot-configmap: manual-atlasbot-72 vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "comms" vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" @@ -83,6 +83,8 @@ spec: value: http://ollama.ai.svc.cluster.local:11434 - name: OLLAMA_MODEL value: qwen2.5:14b-instruct + - name: OLLAMA_FALLBACK_MODEL + value: qwen2.5:14b-instruct-q4_0 - name: OLLAMA_TIMEOUT_SEC value: "600" - name: ATLASBOT_THINKING_INTERVAL_SEC diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 47458ea..2c93b75 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -17,6 +17,7 @@ ROOM_ALIAS = "#othrys:live.bstein.dev" OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") +FALLBACK_MODEL = os.environ.get("OLLAMA_FALLBACK_MODEL", "") API_KEY = os.environ.get("CHAT_API_KEY", "") OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480")) ATLASBOT_HTTP_PORT = int(os.environ.get("ATLASBOT_HTTP_PORT", "8090")) @@ -3133,14 +3134,23 @@ def _ollama_call( if lock: lock.acquire() try: - with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp: - data = json.loads(resp.read().decode()) - msg = data.get("message") if isinstance(data, dict) else None - if isinstance(msg, dict): - raw_reply = msg.get("content") + try: + with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp: + data = json.loads(resp.read().decode()) + except error.HTTPError as exc: + if exc.code == 404 and FALLBACK_MODEL and FALLBACK_MODEL != payload["model"]: + payload["model"] = FALLBACK_MODEL + r = request.Request(endpoint, data=json.dumps(payload).encode(), headers=headers) + with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp: + data = json.loads(resp.read().decode()) else: - raw_reply = data.get("response") or data.get("reply") or data - reply = _normalize_reply(raw_reply) or "I'm here to help." + raise + msg = data.get("message") if isinstance(data, dict) else None + if isinstance(msg, dict): + raw_reply = msg.get("content") + else: + raw_reply = data.get("response") or data.get("reply") or data + reply = _normalize_reply(raw_reply) or "I'm here to help." if use_history: history[hist_key].append(f"Atlas: {reply}") return reply