diff --git a/backend/atlas_portal/routes/ai.py b/backend/atlas_portal/routes/ai.py index 1c0055a..3d5e705 100644 --- a/backend/atlas_portal/routes/ai.py +++ b/backend/atlas_portal/routes/ai.py @@ -19,34 +19,42 @@ def register(app) -> None: payload = request.get_json(silent=True) or {} user_message = (payload.get("message") or "").strip() history = payload.get("history") or [] + profile = (payload.get("profile") or payload.get("mode") or "atlas-quick").strip().lower() if not user_message: return jsonify({"error": "message required"}), 400 started = time.time() - atlasbot_reply = _atlasbot_answer(user_message) - if atlasbot_reply: + if profile in {"stock", "stock-ai", "stock_ai"}: + reply = _stock_answer(user_message, history) + source = "stock" + else: + mode = "smart" if profile in {"atlas-smart", "smart"} else "quick" + reply = _atlasbot_answer(user_message, mode) + source = f"atlas-{mode}" + if reply: elapsed_ms = int((time.time() - started) * 1000) - return jsonify({"reply": atlasbot_reply, "latency_ms": elapsed_ms, "source": "atlasbot"}) + return jsonify({"reply": reply, "latency_ms": elapsed_ms, "source": source}) elapsed_ms = int((time.time() - started) * 1000) return jsonify( { "reply": "Atlasbot is busy. Please try again in a moment.", "latency_ms": elapsed_ms, - "source": "atlasbot", + "source": source, } ) @app.route("/api/chat/info", methods=["GET"]) @app.route("/api/ai/info", methods=["GET"]) def ai_info() -> Any: - meta = _discover_ai_meta() + profile = (request.args.get("profile") or "atlas-quick").strip().lower() + meta = _discover_ai_meta(profile) return jsonify(meta) _start_keep_warm() -def _atlasbot_answer(message: str) -> str: +def _atlasbot_answer(message: str, mode: str) -> str: endpoint = settings.AI_ATLASBOT_ENDPOINT if not endpoint: return "" @@ -55,7 +63,7 @@ def _atlasbot_answer(message: str) -> str: headers["X-Internal-Token"] = settings.AI_ATLASBOT_TOKEN try: with httpx.Client(timeout=settings.AI_ATLASBOT_TIMEOUT_SEC) as client: - resp = client.post(endpoint, json={"prompt": message}, headers=headers) + resp = client.post(endpoint, json={"prompt": message, "mode": mode}, headers=headers) if resp.status_code != 200: return "" data = resp.json() @@ -64,13 +72,55 @@ def _atlasbot_answer(message: str) -> str: except (httpx.RequestError, ValueError): return "" -def _discover_ai_meta() -> dict[str, str]: +def _stock_answer(message: str, history: list[dict[str, Any]]) -> str: + body = { + "model": settings.AI_CHAT_MODEL, + "messages": _build_messages(message, history), + "stream": False, + } + try: + with httpx.Client(timeout=settings.AI_CHAT_TIMEOUT_SEC) as client: + resp = client.post(f"{settings.AI_CHAT_API}/api/chat", json=body) + resp.raise_for_status() + data = resp.json() + except (httpx.RequestError, ValueError): + return "" + message_data = data.get("message") if isinstance(data, dict) else None + if isinstance(message_data, dict) and message_data.get("content"): + return str(message_data["content"]).strip() + if isinstance(data, dict) and data.get("response"): + return str(data["response"]).strip() + return "" + + +def _build_messages(message: str, history: list[dict[str, Any]]) -> list[dict[str, str]]: + messages = [{"role": "system", "content": settings.AI_CHAT_SYSTEM_PROMPT}] + for entry in history: + role = entry.get("role") + content = entry.get("content") + if role in {"user", "assistant"} and isinstance(content, str) and content.strip(): + messages.append({"role": role, "content": content}) + messages.append({"role": "user", "content": message}) + return messages + + +def _discover_ai_meta(profile: str) -> dict[str, str]: meta = { "node": settings.AI_NODE_NAME, "gpu": settings.AI_GPU_DESC, "model": settings.AI_CHAT_MODEL, "endpoint": settings.AI_PUBLIC_ENDPOINT or "/api/chat", + "profile": profile, } + if profile in {"atlas-smart", "smart"}: + meta["model"] = settings.AI_ATLASBOT_MODEL_SMART or settings.AI_CHAT_MODEL + meta["endpoint"] = "/api/ai/chat" + elif profile in {"atlas-quick", "quick"}: + meta["model"] = settings.AI_ATLASBOT_MODEL_FAST or settings.AI_CHAT_MODEL + meta["endpoint"] = "/api/ai/chat" + elif profile in {"stock", "stock-ai", "stock_ai"}: + meta["model"] = settings.AI_CHAT_MODEL + meta["endpoint"] = "/api/ai/chat" sa_path = Path("/var/run/secrets/kubernetes.io/serviceaccount") token_path = sa_path / "token" diff --git a/backend/atlas_portal/settings.py b/backend/atlas_portal/settings.py index 35c2d78..a35a922 100644 --- a/backend/atlas_portal/settings.py +++ b/backend/atlas_portal/settings.py @@ -29,6 +29,8 @@ AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20")) AI_ATLASBOT_ENDPOINT = os.getenv("AI_ATLASBOT_ENDPOINT", "").strip() AI_ATLASBOT_TOKEN = os.getenv("AI_ATLASBOT_TOKEN", "").strip() AI_ATLASBOT_TIMEOUT_SEC = float(os.getenv("AI_ATLASBOT_TIMEOUT_SEC", "5")) +AI_ATLASBOT_MODEL_FAST = os.getenv("AI_ATLASBOT_MODEL_FAST", "").strip() +AI_ATLASBOT_MODEL_SMART = os.getenv("AI_ATLASBOT_MODEL_SMART", "").strip() AI_NODE_NAME = os.getenv("AI_CHAT_NODE_NAME") or os.getenv("AI_NODE_NAME") or "ai-cluster" AI_GPU_DESC = os.getenv("AI_CHAT_GPU_DESC") or "local GPU (dynamic)" AI_PUBLIC_ENDPOINT = os.getenv("AI_PUBLIC_CHAT_ENDPOINT", "https://chat.ai.bstein.dev/api/chat") diff --git a/frontend/src/views/AiView.vue b/frontend/src/views/AiView.vue index 7863a24..9bfa28a 100644 --- a/frontend/src/views/AiView.vue +++ b/frontend/src/views/AiView.vue @@ -12,16 +12,16 @@
Model - {{ meta.model }} + {{ current.meta.model }}
GPU - {{ meta.gpu }} + {{ current.meta.gpu }}
Endpoint
@@ -29,8 +29,20 @@
+
+ +
-
+
{{ msg.role === 'assistant' ? 'Atlas AI' : 'you' }}

{{ msg.content }}

@@ -38,10 +50,10 @@
{{ msg.latency_ms }} ms
-
+
error
-

{{ error }}

+

{{ current.error }}

@@ -65,32 +77,49 @@