feat(ai): add ollama chat proxy and UI

This commit is contained in:
Brad Stein 2025-12-20 14:25:55 -03:00
parent 4f9211be32
commit 7aa8b8fce8
4 changed files with 308 additions and 17 deletions

View File

@ -9,8 +9,9 @@ from urllib.error import URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from flask import Flask, jsonify, send_from_directory from flask import Flask, jsonify, request, send_from_directory
from flask_cors import CORS from flask_cors import CORS
import httpx
app = Flask(__name__, static_folder="../frontend/dist", static_url_path="") app = Flask(__name__, static_folder="../frontend/dist", static_url_path="")
@ -26,6 +27,13 @@ HTTP_CHECK_TIMEOUT_SEC = float(os.getenv("HTTP_CHECK_TIMEOUT_SEC", "2"))
LAB_STATUS_CACHE_SEC = float(os.getenv("LAB_STATUS_CACHE_SEC", "30")) LAB_STATUS_CACHE_SEC = float(os.getenv("LAB_STATUS_CACHE_SEC", "30"))
GRAFANA_HEALTH_URL = os.getenv("GRAFANA_HEALTH_URL", "https://metrics.bstein.dev/api/health") GRAFANA_HEALTH_URL = os.getenv("GRAFANA_HEALTH_URL", "https://metrics.bstein.dev/api/health")
OCEANUS_NODE_EXPORTER_URL = os.getenv("OCEANUS_NODE_EXPORTER_URL", "http://192.168.22.24:9100/metrics") OCEANUS_NODE_EXPORTER_URL = os.getenv("OCEANUS_NODE_EXPORTER_URL", "http://192.168.22.24:9100/metrics")
AI_CHAT_API = os.getenv("AI_CHAT_API", "http://ollama.ai.svc.cluster.local:11434").rstrip("/")
AI_CHAT_MODEL = os.getenv("AI_CHAT_MODEL", "phi3:mini")
AI_CHAT_SYSTEM_PROMPT = os.getenv(
"AI_CHAT_SYSTEM_PROMPT",
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
)
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None} _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
@ -137,6 +145,42 @@ def lab_status() -> Any:
return jsonify(payload) return jsonify(payload)
@app.route("/api/ai/chat", methods=["POST"])
def ai_chat() -> Any:
payload = request.get_json(silent=True) or {}
user_message = (payload.get("message") or "").strip()
history = payload.get("history") or []
if not user_message:
return jsonify({"error": "message required"}), 400
messages: list[dict[str, str]] = []
if AI_CHAT_SYSTEM_PROMPT:
messages.append({"role": "system", "content": AI_CHAT_SYSTEM_PROMPT})
for item in history:
role = item.get("role")
content = (item.get("content") or "").strip()
if role in ("user", "assistant") and content:
messages.append({"role": role, "content": content})
messages.append({"role": "user", "content": user_message})
body = {"model": AI_CHAT_MODEL, "messages": messages, "stream": False}
started = time.time()
try:
with httpx.Client(timeout=AI_CHAT_TIMEOUT_SEC) as client:
resp = client.post(f"{AI_CHAT_API}/api/chat", json=body)
resp.raise_for_status()
data = resp.json()
reply = (data.get("message") or {}).get("content") or ""
elapsed_ms = int((time.time() - started) * 1000)
return jsonify({"reply": reply, "latency_ms": elapsed_ms})
except (httpx.RequestError, httpx.HTTPStatusError, ValueError) as exc:
return jsonify({"error": str(exc)}), 502
@app.route("/", defaults={"path": ""}) @app.route("/", defaults={"path": ""})
@app.route("/<path:path>") @app.route("/<path:path>")
def serve_frontend(path: str) -> Any: def serve_frontend(path: str) -> Any:

View File

@ -1,3 +1,4 @@
flask==3.0.3 flask==3.0.3
flask-cors==4.0.0 flask-cors==4.0.0
gunicorn==21.2.0 gunicorn==21.2.0
httpx==0.27.2

View File

@ -122,14 +122,14 @@ export function fallbackServices() {
link: "https://meet.bstein.dev", link: "https://meet.bstein.dev",
status: "degraded", status: "degraded",
}, },
{ {
name: "AI Chat", name: "AI Chat",
category: "ai", category: "ai",
summary: "LLM Chat - Planned", summary: "LLM chat (public beta)",
link: "/ai", link: "/ai",
host: "chat.ai.bstein.dev", host: "bstein.dev/ai",
status: "planned", status: "beta",
}, },
{ {
name: "AI Image", name: "AI Image",
category: "ai", category: "ai",

View File

@ -1,25 +1,271 @@
<template> <template>
<div class="page"> <div class="page">
<section class="card"> <section class="card hero glass">
<h1>AI services (planned)</h1> <div>
<p>Targets for chat.ai.bstein.dev, draw.ai.bstein.dev, and talk.ai.bstein.dev. These will land behind Keycloak once the pipelines are ready.</p> <p class="eyebrow">Atlas AI</p>
<h1>Chat</h1>
<p class="lede">
Lightweight LLM running on titan-24 (RTX 3080, 8GB). Anyone can chat without auth. Responses are single-turn per
send; the client sends the on-page history with every request.
</p>
<div class="pill mono pill-live">Online</div>
</div>
<div class="hero-facts">
<div class="fact">
<span class="label mono">Model</span>
<span class="value mono">phi3:mini (4k)</span>
</div>
<div class="fact">
<span class="label mono">GPU</span>
<span class="value mono">titan-24 · 3080 (8GB)</span>
</div>
<div class="fact">
<span class="label mono">Endpoint</span>
<span class="value mono">/api/ai/chat</span>
</div>
</div>
</section>
<section class="card chat-card">
<div class="chat-window" ref="chatWindow">
<div v-for="(msg, idx) in messages" :key="idx" :class="['chat-row', msg.role]">
<div class="bubble">
<div class="role mono">{{ msg.role === 'assistant' ? 'ai' : 'you' }}</div>
<p>{{ msg.content }}</p>
<div v-if="msg.latency_ms" class="meta mono">{{ msg.latency_ms }} ms</div>
</div>
</div>
<div v-if="error" class="chat-row error">
<div class="bubble">
<div class="role mono">error</div>
<p>{{ error }}</p>
</div>
</div>
</div>
<form class="chat-form" @submit.prevent="sendMessage">
<textarea
v-model="draft"
placeholder="Ask anything about the lab or general topics..."
rows="3"
:disabled="sending"
/>
<div class="actions">
<span class="hint mono">Shift+Enter for newline</span>
<button class="primary" type="submit" :disabled="sending || !draft.trim()">
{{ sending ? "Sending..." : "Send" }}
</button>
</div>
</form>
</section>
<section class="card info-card">
<h2>Notes</h2>
<ul> <ul>
<li>Chat: conversational agent with SSO.</li> <li>Backend proxies requests to Ollama inside the cluster; no external calls are made.</li>
<li>Image: text-to-image workflows for user media.</li> <li>Short-term context: the chat history in this page is sent each turn. Refresh clears it.</li>
<li>Speech: voice-to-voice translation and dubbing.</li> <li>Future: swap in larger models on the Jetsons, add streaming and rate limits.</li>
</ul> </ul>
</section> </section>
</div> </div>
</template> </template>
<script setup>
import { onUpdated, ref } from "vue";
const messages = ref([
{
role: "assistant",
content: "Hi! I'm the Titan Lab assistant running on titan-24. How can I help?",
},
]);
const draft = ref("");
const sending = ref(false);
const error = ref("");
const chatWindow = ref(null);
onUpdated(() => {
if (chatWindow.value) {
chatWindow.value.scrollTop = chatWindow.value.scrollHeight;
}
});
async function sendMessage() {
if (!draft.value.trim() || sending.value) return;
const text = draft.value.trim();
draft.value = "";
error.value = "";
const userEntry = { role: "user", content: text };
messages.value.push(userEntry);
sending.value = true;
try {
const history = messages.value.map((m) => ({ role: m.role, content: m.content }));
const resp = await fetch("/api/ai/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ message: text, history }),
});
const data = await resp.json();
if (!resp.ok || data.error) {
throw new Error(data.error || "Request failed");
}
messages.value.push({
role: "assistant",
content: data.reply || "(empty response)",
latency_ms: data.latency_ms,
});
} catch (err) {
error.value = err.message || "Unexpected error";
} finally {
sending.value = false;
}
}
</script>
<style scoped> <style scoped>
.page { .page {
max-width: 900px; max-width: 1100px;
margin: 0 auto; margin: 0 auto;
padding: 32px 22px 72px; padding: 32px 22px 72px;
} }
ul { .hero {
display: grid;
grid-template-columns: 2fr 1fr;
gap: 18px;
}
.hero-facts {
display: grid;
gap: 10px;
align-content: start;
}
.fact {
border: 1px solid var(--card-border);
border-radius: 10px;
padding: 10px 12px;
background: rgba(255, 255, 255, 0.02);
}
.label {
color: var(--text-muted);
font-size: 12px;
}
.value {
display: block;
margin-top: 4px;
}
.pill-live {
display: inline-block;
margin-top: 8px;
}
.chat-card {
margin-top: 18px;
}
.chat-window {
background: rgba(255, 255, 255, 0.02);
border: 1px solid var(--card-border);
border-radius: 12px;
padding: 14px;
min-height: 320px;
max-height: 520px;
overflow-y: auto;
display: flex;
flex-direction: column;
gap: 12px;
}
.chat-row {
display: flex;
}
.chat-row.user {
justify-content: flex-end;
}
.bubble {
max-width: 85%;
padding: 10px 12px;
border-radius: 12px;
border: 1px solid var(--card-border);
background: rgba(255, 255, 255, 0.04);
}
.chat-row.assistant .bubble {
background: rgba(80, 163, 255, 0.08);
}
.chat-row.user .bubble {
background: rgba(255, 255, 255, 0.06);
}
.chat-row.error .bubble {
background: rgba(255, 87, 87, 0.1);
border-color: rgba(255, 87, 87, 0.5);
}
.role {
font-size: 12px;
color: var(--text-muted);
margin-bottom: 4px;
}
.meta {
color: var(--text-muted);
font-size: 12px;
margin-top: 6px;
}
.chat-form {
margin-top: 12px;
display: flex;
flex-direction: column;
gap: 8px;
}
textarea {
width: 100%;
border-radius: 12px;
border: 1px solid var(--card-border);
background: rgba(255, 255, 255, 0.03);
color: var(--text-primary);
padding: 10px 12px;
resize: vertical;
}
.actions {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
}
.hint {
color: var(--text-muted); color: var(--text-muted);
} }
button.primary {
background: linear-gradient(90deg, #4f8bff, #7dd0ff);
color: #0b1222;
padding: 10px 16px;
border: none;
border-radius: 10px;
cursor: pointer;
font-weight: 700;
}
button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.info-card ul {
color: var(--text-muted);
padding-left: 18px;
}
</style> </style>