atlasbot: call ollama chat directly
This commit is contained in:
parent
d8ae9c5901
commit
3b1e74d278
@ -16,7 +16,7 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: atlasbot
|
app: atlasbot
|
||||||
annotations:
|
annotations:
|
||||||
checksum/atlasbot-configmap: manual-atlasbot-33
|
checksum/atlasbot-configmap: manual-atlasbot-34
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "comms"
|
vault.hashicorp.com/role: "comms"
|
||||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||||
@ -80,7 +80,7 @@ spec:
|
|||||||
- name: BOT_MENTIONS
|
- name: BOT_MENTIONS
|
||||||
value: atlasbot,aatlasbot
|
value: atlasbot,aatlasbot
|
||||||
- name: OLLAMA_URL
|
- name: OLLAMA_URL
|
||||||
value: http://chat-ai-gateway.bstein-dev-home.svc.cluster.local/
|
value: http://ollama.ai.svc.cluster.local:11434
|
||||||
- name: OLLAMA_MODEL
|
- name: OLLAMA_MODEL
|
||||||
value: qwen2.5:14b-instruct-q4_0
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
- name: OLLAMA_TIMEOUT_SEC
|
- name: OLLAMA_TIMEOUT_SEC
|
||||||
|
|||||||
@ -155,6 +155,37 @@ def _ensure_confidence(text: str) -> str:
|
|||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _ollama_endpoint() -> str:
|
||||||
|
url = (OLLAMA_URL or "").strip()
|
||||||
|
if not url:
|
||||||
|
return ""
|
||||||
|
if url.endswith("/api/chat"):
|
||||||
|
return url
|
||||||
|
return url.rstrip("/") + "/api/chat"
|
||||||
|
|
||||||
|
|
||||||
|
def _history_to_messages(lines: list[str]) -> list[dict[str, str]]:
|
||||||
|
messages: list[dict[str, str]] = []
|
||||||
|
for line in lines:
|
||||||
|
raw = (line or "").strip()
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
role = "user"
|
||||||
|
content = raw
|
||||||
|
lowered = raw.lower()
|
||||||
|
if lowered.startswith("atlas:"):
|
||||||
|
role = "assistant"
|
||||||
|
content = raw.split(":", 1)[1].strip()
|
||||||
|
elif lowered.startswith("user:"):
|
||||||
|
role = "user"
|
||||||
|
content = raw.split(":", 1)[1].strip()
|
||||||
|
elif ":" in raw:
|
||||||
|
content = raw.split(":", 1)[1].strip()
|
||||||
|
if content:
|
||||||
|
messages.append({"role": role, "content": content})
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
# Mention detection (Matrix rich mentions + plain @atlas).
|
# Mention detection (Matrix rich mentions + plain @atlas).
|
||||||
MENTION_TOKENS = [m.strip() for m in BOT_MENTIONS.split(",") if m.strip()]
|
MENTION_TOKENS = [m.strip() for m in BOT_MENTIONS.split(",") if m.strip()]
|
||||||
MENTION_LOCALPARTS = [m.lstrip("@").split(":", 1)[0] for m in MENTION_TOKENS]
|
MENTION_LOCALPARTS = [m.lstrip("@").split(":", 1)[0] for m in MENTION_TOKENS]
|
||||||
@ -1837,25 +1868,33 @@ def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
|
|||||||
"If the answer is not grounded in the provided context or tool data, say you do not know. "
|
"If the answer is not grounded in the provided context or tool data, say you do not know. "
|
||||||
"End every response with a line: 'Confidence: high|medium|low'."
|
"End every response with a line: 'Confidence: high|medium|low'."
|
||||||
)
|
)
|
||||||
transcript_parts = [system]
|
endpoint = _ollama_endpoint()
|
||||||
|
if not endpoint:
|
||||||
|
raise RuntimeError("ollama endpoint missing")
|
||||||
|
system_content = system
|
||||||
if context:
|
if context:
|
||||||
transcript_parts.append("Context (grounded):\n" + context[:MAX_CONTEXT_CHARS])
|
system_content += "\n\nContext (grounded):\n" + context[:MAX_CONTEXT_CHARS]
|
||||||
transcript_parts.extend(history[hist_key][-24:])
|
|
||||||
transcript_parts.append(f"User: {prompt}")
|
|
||||||
transcript = "\n".join(transcript_parts)
|
|
||||||
|
|
||||||
payload = {"model": MODEL, "message": transcript}
|
messages: list[dict[str, str]] = [{"role": "system", "content": system_content}]
|
||||||
|
messages.extend(_history_to_messages(history[hist_key][-24:]))
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
payload = {"model": MODEL, "messages": messages, "stream": False}
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
if API_KEY:
|
if API_KEY:
|
||||||
headers["x-api-key"] = API_KEY
|
headers["x-api-key"] = API_KEY
|
||||||
r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers)
|
r = request.Request(endpoint, data=json.dumps(payload).encode(), headers=headers)
|
||||||
lock = _OLLAMA_LOCK if OLLAMA_SERIALIZE else None
|
lock = _OLLAMA_LOCK if OLLAMA_SERIALIZE else None
|
||||||
if lock:
|
if lock:
|
||||||
lock.acquire()
|
lock.acquire()
|
||||||
try:
|
try:
|
||||||
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
|
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
|
||||||
data = json.loads(resp.read().decode())
|
data = json.loads(resp.read().decode())
|
||||||
raw_reply = data.get("message") or data.get("response") or data.get("reply") or data
|
msg = data.get("message") if isinstance(data, dict) else None
|
||||||
|
if isinstance(msg, dict):
|
||||||
|
raw_reply = msg.get("content")
|
||||||
|
else:
|
||||||
|
raw_reply = data.get("response") or data.get("reply") or data
|
||||||
reply = _normalize_reply(raw_reply) or "I'm here to help."
|
reply = _normalize_reply(raw_reply) or "I'm here to help."
|
||||||
history[hist_key].append(f"Atlas: {reply}")
|
history[hist_key].append(f"Atlas: {reply}")
|
||||||
return reply
|
return reply
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user