feat(ai): add ollama chat proxy and UI
This commit is contained in:
parent
4f9211be32
commit
7aa8b8fce8
@ -9,8 +9,9 @@ from urllib.error import URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
|
||||
from flask import Flask, jsonify, send_from_directory
|
||||
from flask import Flask, jsonify, request, send_from_directory
|
||||
from flask_cors import CORS
|
||||
import httpx
|
||||
|
||||
|
||||
app = Flask(__name__, static_folder="../frontend/dist", static_url_path="")
|
||||
@ -26,6 +27,13 @@ HTTP_CHECK_TIMEOUT_SEC = float(os.getenv("HTTP_CHECK_TIMEOUT_SEC", "2"))
|
||||
LAB_STATUS_CACHE_SEC = float(os.getenv("LAB_STATUS_CACHE_SEC", "30"))
|
||||
GRAFANA_HEALTH_URL = os.getenv("GRAFANA_HEALTH_URL", "https://metrics.bstein.dev/api/health")
|
||||
OCEANUS_NODE_EXPORTER_URL = os.getenv("OCEANUS_NODE_EXPORTER_URL", "http://192.168.22.24:9100/metrics")
|
||||
AI_CHAT_API = os.getenv("AI_CHAT_API", "http://ollama.ai.svc.cluster.local:11434").rstrip("/")
|
||||
AI_CHAT_MODEL = os.getenv("AI_CHAT_MODEL", "phi3:mini")
|
||||
AI_CHAT_SYSTEM_PROMPT = os.getenv(
|
||||
"AI_CHAT_SYSTEM_PROMPT",
|
||||
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
|
||||
)
|
||||
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
|
||||
|
||||
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
||||
|
||||
@ -137,6 +145,42 @@ def lab_status() -> Any:
|
||||
return jsonify(payload)
|
||||
|
||||
|
||||
@app.route("/api/ai/chat", methods=["POST"])
|
||||
def ai_chat() -> Any:
|
||||
payload = request.get_json(silent=True) or {}
|
||||
user_message = (payload.get("message") or "").strip()
|
||||
history = payload.get("history") or []
|
||||
|
||||
if not user_message:
|
||||
return jsonify({"error": "message required"}), 400
|
||||
|
||||
messages: list[dict[str, str]] = []
|
||||
if AI_CHAT_SYSTEM_PROMPT:
|
||||
messages.append({"role": "system", "content": AI_CHAT_SYSTEM_PROMPT})
|
||||
|
||||
for item in history:
|
||||
role = item.get("role")
|
||||
content = (item.get("content") or "").strip()
|
||||
if role in ("user", "assistant") and content:
|
||||
messages.append({"role": role, "content": content})
|
||||
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
|
||||
body = {"model": AI_CHAT_MODEL, "messages": messages, "stream": False}
|
||||
started = time.time()
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=AI_CHAT_TIMEOUT_SEC) as client:
|
||||
resp = client.post(f"{AI_CHAT_API}/api/chat", json=body)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
reply = (data.get("message") or {}).get("content") or ""
|
||||
elapsed_ms = int((time.time() - started) * 1000)
|
||||
return jsonify({"reply": reply, "latency_ms": elapsed_ms})
|
||||
except (httpx.RequestError, httpx.HTTPStatusError, ValueError) as exc:
|
||||
return jsonify({"error": str(exc)}), 502
|
||||
|
||||
|
||||
@app.route("/", defaults={"path": ""})
|
||||
@app.route("/<path:path>")
|
||||
def serve_frontend(path: str) -> Any:
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
flask==3.0.3
|
||||
flask-cors==4.0.0
|
||||
gunicorn==21.2.0
|
||||
httpx==0.27.2
|
||||
|
||||
@ -122,14 +122,14 @@ export function fallbackServices() {
|
||||
link: "https://meet.bstein.dev",
|
||||
status: "degraded",
|
||||
},
|
||||
{
|
||||
name: "AI Chat",
|
||||
category: "ai",
|
||||
summary: "LLM Chat - Planned",
|
||||
link: "/ai",
|
||||
host: "chat.ai.bstein.dev",
|
||||
status: "planned",
|
||||
},
|
||||
{
|
||||
name: "AI Chat",
|
||||
category: "ai",
|
||||
summary: "LLM chat (public beta)",
|
||||
link: "/ai",
|
||||
host: "bstein.dev/ai",
|
||||
status: "beta",
|
||||
},
|
||||
{
|
||||
name: "AI Image",
|
||||
category: "ai",
|
||||
|
||||
@ -1,25 +1,271 @@
|
||||
<template>
|
||||
<div class="page">
|
||||
<section class="card">
|
||||
<h1>AI services (planned)</h1>
|
||||
<p>Targets for chat.ai.bstein.dev, draw.ai.bstein.dev, and talk.ai.bstein.dev. These will land behind Keycloak once the pipelines are ready.</p>
|
||||
<section class="card hero glass">
|
||||
<div>
|
||||
<p class="eyebrow">Atlas AI</p>
|
||||
<h1>Chat</h1>
|
||||
<p class="lede">
|
||||
Lightweight LLM running on titan-24 (RTX 3080, 8GB). Anyone can chat without auth. Responses are single-turn per
|
||||
send; the client sends the on-page history with every request.
|
||||
</p>
|
||||
<div class="pill mono pill-live">Online</div>
|
||||
</div>
|
||||
<div class="hero-facts">
|
||||
<div class="fact">
|
||||
<span class="label mono">Model</span>
|
||||
<span class="value mono">phi3:mini (4k)</span>
|
||||
</div>
|
||||
<div class="fact">
|
||||
<span class="label mono">GPU</span>
|
||||
<span class="value mono">titan-24 · 3080 (8GB)</span>
|
||||
</div>
|
||||
<div class="fact">
|
||||
<span class="label mono">Endpoint</span>
|
||||
<span class="value mono">/api/ai/chat</span>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="card chat-card">
|
||||
<div class="chat-window" ref="chatWindow">
|
||||
<div v-for="(msg, idx) in messages" :key="idx" :class="['chat-row', msg.role]">
|
||||
<div class="bubble">
|
||||
<div class="role mono">{{ msg.role === 'assistant' ? 'ai' : 'you' }}</div>
|
||||
<p>{{ msg.content }}</p>
|
||||
<div v-if="msg.latency_ms" class="meta mono">{{ msg.latency_ms }} ms</div>
|
||||
</div>
|
||||
</div>
|
||||
<div v-if="error" class="chat-row error">
|
||||
<div class="bubble">
|
||||
<div class="role mono">error</div>
|
||||
<p>{{ error }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<form class="chat-form" @submit.prevent="sendMessage">
|
||||
<textarea
|
||||
v-model="draft"
|
||||
placeholder="Ask anything about the lab or general topics..."
|
||||
rows="3"
|
||||
:disabled="sending"
|
||||
/>
|
||||
<div class="actions">
|
||||
<span class="hint mono">Shift+Enter for newline</span>
|
||||
<button class="primary" type="submit" :disabled="sending || !draft.trim()">
|
||||
{{ sending ? "Sending..." : "Send" }}
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<section class="card info-card">
|
||||
<h2>Notes</h2>
|
||||
<ul>
|
||||
<li>Chat: conversational agent with SSO.</li>
|
||||
<li>Image: text-to-image workflows for user media.</li>
|
||||
<li>Speech: voice-to-voice translation and dubbing.</li>
|
||||
<li>Backend proxies requests to Ollama inside the cluster; no external calls are made.</li>
|
||||
<li>Short-term context: the chat history in this page is sent each turn. Refresh clears it.</li>
|
||||
<li>Future: swap in larger models on the Jetsons, add streaming and rate limits.</li>
|
||||
</ul>
|
||||
</section>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { onUpdated, ref } from "vue";
|
||||
|
||||
const messages = ref([
|
||||
{
|
||||
role: "assistant",
|
||||
content: "Hi! I'm the Titan Lab assistant running on titan-24. How can I help?",
|
||||
},
|
||||
]);
|
||||
const draft = ref("");
|
||||
const sending = ref(false);
|
||||
const error = ref("");
|
||||
const chatWindow = ref(null);
|
||||
|
||||
onUpdated(() => {
|
||||
if (chatWindow.value) {
|
||||
chatWindow.value.scrollTop = chatWindow.value.scrollHeight;
|
||||
}
|
||||
});
|
||||
|
||||
async function sendMessage() {
|
||||
if (!draft.value.trim() || sending.value) return;
|
||||
const text = draft.value.trim();
|
||||
draft.value = "";
|
||||
error.value = "";
|
||||
const userEntry = { role: "user", content: text };
|
||||
messages.value.push(userEntry);
|
||||
sending.value = true;
|
||||
|
||||
try {
|
||||
const history = messages.value.map((m) => ({ role: m.role, content: m.content }));
|
||||
const resp = await fetch("/api/ai/chat", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ message: text, history }),
|
||||
});
|
||||
const data = await resp.json();
|
||||
if (!resp.ok || data.error) {
|
||||
throw new Error(data.error || "Request failed");
|
||||
}
|
||||
messages.value.push({
|
||||
role: "assistant",
|
||||
content: data.reply || "(empty response)",
|
||||
latency_ms: data.latency_ms,
|
||||
});
|
||||
} catch (err) {
|
||||
error.value = err.message || "Unexpected error";
|
||||
} finally {
|
||||
sending.value = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.page {
|
||||
max-width: 900px;
|
||||
max-width: 1100px;
|
||||
margin: 0 auto;
|
||||
padding: 32px 22px 72px;
|
||||
}
|
||||
|
||||
ul {
|
||||
.hero {
|
||||
display: grid;
|
||||
grid-template-columns: 2fr 1fr;
|
||||
gap: 18px;
|
||||
}
|
||||
|
||||
.hero-facts {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
align-content: start;
|
||||
}
|
||||
|
||||
.fact {
|
||||
border: 1px solid var(--card-border);
|
||||
border-radius: 10px;
|
||||
padding: 10px 12px;
|
||||
background: rgba(255, 255, 255, 0.02);
|
||||
}
|
||||
|
||||
.label {
|
||||
color: var(--text-muted);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.value {
|
||||
display: block;
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.pill-live {
|
||||
display: inline-block;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.chat-card {
|
||||
margin-top: 18px;
|
||||
}
|
||||
|
||||
.chat-window {
|
||||
background: rgba(255, 255, 255, 0.02);
|
||||
border: 1px solid var(--card-border);
|
||||
border-radius: 12px;
|
||||
padding: 14px;
|
||||
min-height: 320px;
|
||||
max-height: 520px;
|
||||
overflow-y: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.chat-row {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.chat-row.user {
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.bubble {
|
||||
max-width: 85%;
|
||||
padding: 10px 12px;
|
||||
border-radius: 12px;
|
||||
border: 1px solid var(--card-border);
|
||||
background: rgba(255, 255, 255, 0.04);
|
||||
}
|
||||
|
||||
.chat-row.assistant .bubble {
|
||||
background: rgba(80, 163, 255, 0.08);
|
||||
}
|
||||
|
||||
.chat-row.user .bubble {
|
||||
background: rgba(255, 255, 255, 0.06);
|
||||
}
|
||||
|
||||
.chat-row.error .bubble {
|
||||
background: rgba(255, 87, 87, 0.1);
|
||||
border-color: rgba(255, 87, 87, 0.5);
|
||||
}
|
||||
|
||||
.role {
|
||||
font-size: 12px;
|
||||
color: var(--text-muted);
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.meta {
|
||||
color: var(--text-muted);
|
||||
font-size: 12px;
|
||||
margin-top: 6px;
|
||||
}
|
||||
|
||||
.chat-form {
|
||||
margin-top: 12px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
textarea {
|
||||
width: 100%;
|
||||
border-radius: 12px;
|
||||
border: 1px solid var(--card-border);
|
||||
background: rgba(255, 255, 255, 0.03);
|
||||
color: var(--text-primary);
|
||||
padding: 10px 12px;
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
.actions {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.hint {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
button.primary {
|
||||
background: linear-gradient(90deg, #4f8bff, #7dd0ff);
|
||||
color: #0b1222;
|
||||
padding: 10px 16px;
|
||||
border: none;
|
||||
border-radius: 10px;
|
||||
cursor: pointer;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.info-card ul {
|
||||
color: var(--text-muted);
|
||||
padding-left: 18px;
|
||||
}
|
||||
</style>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user