atlasbot: add model fallback and rollout
This commit is contained in:
parent
65e50d1923
commit
27e8a77044
@ -16,7 +16,7 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: atlasbot
|
app: atlasbot
|
||||||
annotations:
|
annotations:
|
||||||
checksum/atlasbot-configmap: manual-atlasbot-71
|
checksum/atlasbot-configmap: manual-atlasbot-72
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "comms"
|
vault.hashicorp.com/role: "comms"
|
||||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||||
@ -83,6 +83,8 @@ spec:
|
|||||||
value: http://ollama.ai.svc.cluster.local:11434
|
value: http://ollama.ai.svc.cluster.local:11434
|
||||||
- name: OLLAMA_MODEL
|
- name: OLLAMA_MODEL
|
||||||
value: qwen2.5:14b-instruct
|
value: qwen2.5:14b-instruct
|
||||||
|
- name: OLLAMA_FALLBACK_MODEL
|
||||||
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
- name: OLLAMA_TIMEOUT_SEC
|
- name: OLLAMA_TIMEOUT_SEC
|
||||||
value: "600"
|
value: "600"
|
||||||
- name: ATLASBOT_THINKING_INTERVAL_SEC
|
- name: ATLASBOT_THINKING_INTERVAL_SEC
|
||||||
|
|||||||
@ -17,6 +17,7 @@ ROOM_ALIAS = "#othrys:live.bstein.dev"
|
|||||||
|
|
||||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
|
||||||
MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0")
|
MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0")
|
||||||
|
FALLBACK_MODEL = os.environ.get("OLLAMA_FALLBACK_MODEL", "")
|
||||||
API_KEY = os.environ.get("CHAT_API_KEY", "")
|
API_KEY = os.environ.get("CHAT_API_KEY", "")
|
||||||
OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480"))
|
OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480"))
|
||||||
ATLASBOT_HTTP_PORT = int(os.environ.get("ATLASBOT_HTTP_PORT", "8090"))
|
ATLASBOT_HTTP_PORT = int(os.environ.get("ATLASBOT_HTTP_PORT", "8090"))
|
||||||
@ -3132,9 +3133,18 @@ def _ollama_call(
|
|||||||
lock = _OLLAMA_LOCK if OLLAMA_SERIALIZE else None
|
lock = _OLLAMA_LOCK if OLLAMA_SERIALIZE else None
|
||||||
if lock:
|
if lock:
|
||||||
lock.acquire()
|
lock.acquire()
|
||||||
|
try:
|
||||||
try:
|
try:
|
||||||
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
|
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
|
||||||
data = json.loads(resp.read().decode())
|
data = json.loads(resp.read().decode())
|
||||||
|
except error.HTTPError as exc:
|
||||||
|
if exc.code == 404 and FALLBACK_MODEL and FALLBACK_MODEL != payload["model"]:
|
||||||
|
payload["model"] = FALLBACK_MODEL
|
||||||
|
r = request.Request(endpoint, data=json.dumps(payload).encode(), headers=headers)
|
||||||
|
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
|
||||||
|
data = json.loads(resp.read().decode())
|
||||||
|
else:
|
||||||
|
raise
|
||||||
msg = data.get("message") if isinstance(data, dict) else None
|
msg = data.get("message") if isinstance(data, dict) else None
|
||||||
if isinstance(msg, dict):
|
if isinstance(msg, dict):
|
||||||
raw_reply = msg.get("content")
|
raw_reply = msg.get("content")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user