feat(ai): add ollama chat proxy and UI

2025-12-20 14:25:55 -03:00 · 2025-12-20 14:25:55 -03:00 · 7aa8b8fce8
commit 7aa8b8fce8
parent 4f9211be32
4 changed files with 308 additions and 17 deletions
--- a/backend/app.py
+++ b/backend/app.py
@ -9,8 +9,9 @@ from urllib.error import URLError
 from urllib.parse import urlencode
 from urllib.request import urlopen

-from flask import Flask, jsonify, send_from_directory
+from flask import Flask, jsonify, request, send_from_directory
 from flask_cors import CORS
+import httpx


 app = Flask(__name__, static_folder="../frontend/dist", static_url_path="")
@ -26,6 +27,13 @@ HTTP_CHECK_TIMEOUT_SEC = float(os.getenv("HTTP_CHECK_TIMEOUT_SEC", "2"))
 LAB_STATUS_CACHE_SEC = float(os.getenv("LAB_STATUS_CACHE_SEC", "30"))
 GRAFANA_HEALTH_URL = os.getenv("GRAFANA_HEALTH_URL", "https://metrics.bstein.dev/api/health")
 OCEANUS_NODE_EXPORTER_URL = os.getenv("OCEANUS_NODE_EXPORTER_URL", "http://192.168.22.24:9100/metrics")
+AI_CHAT_API = os.getenv("AI_CHAT_API", "http://ollama.ai.svc.cluster.local:11434").rstrip("/")
+AI_CHAT_MODEL = os.getenv("AI_CHAT_MODEL", "phi3:mini")
+AI_CHAT_SYSTEM_PROMPT = os.getenv(
+    "AI_CHAT_SYSTEM_PROMPT",
+    "You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
+)
+AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))

 _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}

@ -137,6 +145,42 @@ def lab_status() -> Any:
    return jsonify(payload)


+@app.route("/api/ai/chat", methods=["POST"])
+def ai_chat() -> Any:
+    payload = request.get_json(silent=True) or {}
+    user_message = (payload.get("message") or "").strip()
+    history = payload.get("history") or []
+
+    if not user_message:
+        return jsonify({"error": "message required"}), 400
+
+    messages: list[dict[str, str]] = []
+    if AI_CHAT_SYSTEM_PROMPT:
+        messages.append({"role": "system", "content": AI_CHAT_SYSTEM_PROMPT})
+
+    for item in history:
+        role = item.get("role")
+        content = (item.get("content") or "").strip()
+        if role in ("user", "assistant") and content:
+            messages.append({"role": role, "content": content})
+
+    messages.append({"role": "user", "content": user_message})
+
+    body = {"model": AI_CHAT_MODEL, "messages": messages, "stream": False}
+    started = time.time()
+
+    try:
+        with httpx.Client(timeout=AI_CHAT_TIMEOUT_SEC) as client:
+            resp = client.post(f"{AI_CHAT_API}/api/chat", json=body)
+            resp.raise_for_status()
+            data = resp.json()
+            reply = (data.get("message") or {}).get("content") or ""
+            elapsed_ms = int((time.time() - started) * 1000)
+            return jsonify({"reply": reply, "latency_ms": elapsed_ms})
+    except (httpx.RequestError, httpx.HTTPStatusError, ValueError) as exc:
+        return jsonify({"error": str(exc)}), 502
+
+
@app.route("/", defaults={"path": ""})
@app.route("/<path:path>")
 def serve_frontend(path: str) -> Any:
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -1,3 +1,4 @@
 flask==3.0.3
 flask-cors==4.0.0
 gunicorn==21.2.0
+httpx==0.27.2
--- a/frontend/src/data/sample.js
+++ b/frontend/src/data/sample.js
@ -122,14 +122,14 @@ export function fallbackServices() {
        link: "https://meet.bstein.dev",
        status: "degraded",
      },
-      {
-        name: "AI Chat",
-        category: "ai",
-        summary: "LLM Chat - Planned",
-        link: "/ai",
-        host: "chat.ai.bstein.dev",
-        status: "planned",
-      },
+    {
+      name: "AI Chat",
+      category: "ai",
+      summary: "LLM chat (public beta)",
+      link: "/ai",
+      host: "bstein.dev/ai",
+      status: "beta",
+    },
      {
        name: "AI Image",
        category: "ai",
--- a/frontend/src/views/AiView.vue
+++ b/frontend/src/views/AiView.vue
@ -1,25 +1,271 @@
 <template>
  <div class="page">
-    <section class="card">
-      <h1>AI services (planned)</h1>
-      <p>Targets for chat.ai.bstein.dev, draw.ai.bstein.dev, and talk.ai.bstein.dev. These will land behind Keycloak once the pipelines are ready.</p>
+    <section class="card hero glass">
+      <div>
+        <p class="eyebrow">Atlas AI</p>
+        <h1>Chat</h1>
+        <p class="lede">
+          Lightweight LLM running on titan-24 (RTX 3080, 8GB). Anyone can chat without auth. Responses are single-turn per
+          send; the client sends the on-page history with every request.
+        </p>
+        <div class="pill mono pill-live">Online</div>
+      </div>
+      <div class="hero-facts">
+        <div class="fact">
+          <span class="label mono">Model</span>
+          <span class="value mono">phi3:mini (4k)</span>
+        </div>
+        <div class="fact">
+          <span class="label mono">GPU</span>
+          <span class="value mono">titan-24 · 3080 (8GB)</span>
+        </div>
+        <div class="fact">
+          <span class="label mono">Endpoint</span>
+          <span class="value mono">/api/ai/chat</span>
+        </div>
+      </div>
+    </section>
+
+    <section class="card chat-card">
+      <div class="chat-window" ref="chatWindow">
+        <div v-for="(msg, idx) in messages" :key="idx" :class="['chat-row', msg.role]">
+          <div class="bubble">
+            <div class="role mono">{{ msg.role === 'assistant' ? 'ai' : 'you' }}</div>
+            <p>{{ msg.content }}</p>
+            <div v-if="msg.latency_ms" class="meta mono">{{ msg.latency_ms }} ms</div>
+          </div>
+        </div>
+        <div v-if="error" class="chat-row error">
+          <div class="bubble">
+            <div class="role mono">error</div>
+            <p>{{ error }}</p>
+          </div>
+        </div>
+      </div>
+      <form class="chat-form" @submit.prevent="sendMessage">
+        <textarea
+          v-model="draft"
+          placeholder="Ask anything about the lab or general topics..."
+          rows="3"
+          :disabled="sending"
+        />
+        <div class="actions">
+          <span class="hint mono">Shift+Enter for newline</span>
+          <button class="primary" type="submit" :disabled="sending || !draft.trim()">
+            {{ sending ? "Sending..." : "Send" }}
+          </button>
+        </div>
+      </form>
+    </section>
+
+    <section class="card info-card">
+      <h2>Notes</h2>
      <ul>
-        <li>Chat: conversational agent with SSO.</li>
-        <li>Image: text-to-image workflows for user media.</li>
-        <li>Speech: voice-to-voice translation and dubbing.</li>
+        <li>Backend proxies requests to Ollama inside the cluster; no external calls are made.</li>
+        <li>Short-term context: the chat history in this page is sent each turn. Refresh clears it.</li>
+        <li>Future: swap in larger models on the Jetsons, add streaming and rate limits.</li>
      </ul>
    </section>
  </div>
 </template>

+<script setup>
+import { onUpdated, ref } from "vue";
+
+const messages = ref([
+  {
+    role: "assistant",
+    content: "Hi! I'm the Titan Lab assistant running on titan-24. How can I help?",
+  },
+]);
+const draft = ref("");
+const sending = ref(false);
+const error = ref("");
+const chatWindow = ref(null);
+
+onUpdated(() => {
+  if (chatWindow.value) {
+    chatWindow.value.scrollTop = chatWindow.value.scrollHeight;
+  }
+});
+
+async function sendMessage() {
+  if (!draft.value.trim() || sending.value) return;
+  const text = draft.value.trim();
+  draft.value = "";
+  error.value = "";
+  const userEntry = { role: "user", content: text };
+  messages.value.push(userEntry);
+  sending.value = true;
+
+  try {
+    const history = messages.value.map((m) => ({ role: m.role, content: m.content }));
+    const resp = await fetch("/api/ai/chat", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ message: text, history }),
+    });
+    const data = await resp.json();
+    if (!resp.ok || data.error) {
+      throw new Error(data.error || "Request failed");
+    }
+    messages.value.push({
+      role: "assistant",
+      content: data.reply || "(empty response)",
+      latency_ms: data.latency_ms,
+    });
+  } catch (err) {
+    error.value = err.message || "Unexpected error";
+  } finally {
+    sending.value = false;
+  }
+}
+</script>
+
 <style scoped>
 .page {
-  max-width: 900px;
+  max-width: 1100px;
  margin: 0 auto;
  padding: 32px 22px 72px;
 }

-ul {
+.hero {
+  display: grid;
+  grid-template-columns: 2fr 1fr;
+  gap: 18px;
+}
+
+.hero-facts {
+  display: grid;
+  gap: 10px;
+  align-content: start;
+}
+
+.fact {
+  border: 1px solid var(--card-border);
+  border-radius: 10px;
+  padding: 10px 12px;
+  background: rgba(255, 255, 255, 0.02);
+}
+
+.label {
+  color: var(--text-muted);
+  font-size: 12px;
+}
+
+.value {
+  display: block;
+  margin-top: 4px;
+}
+
+.pill-live {
+  display: inline-block;
+  margin-top: 8px;
+}
+
+.chat-card {
+  margin-top: 18px;
+}
+
+.chat-window {
+  background: rgba(255, 255, 255, 0.02);
+  border: 1px solid var(--card-border);
+  border-radius: 12px;
+  padding: 14px;
+  min-height: 320px;
+  max-height: 520px;
+  overflow-y: auto;
+  display: flex;
+  flex-direction: column;
+  gap: 12px;
+}
+
+.chat-row {
+  display: flex;
+}
+
+.chat-row.user {
+  justify-content: flex-end;
+}
+
+.bubble {
+  max-width: 85%;
+  padding: 10px 12px;
+  border-radius: 12px;
+  border: 1px solid var(--card-border);
+  background: rgba(255, 255, 255, 0.04);
+}
+
+.chat-row.assistant .bubble {
+  background: rgba(80, 163, 255, 0.08);
+}
+
+.chat-row.user .bubble {
+  background: rgba(255, 255, 255, 0.06);
+}
+
+.chat-row.error .bubble {
+  background: rgba(255, 87, 87, 0.1);
+  border-color: rgba(255, 87, 87, 0.5);
+}
+
+.role {
+  font-size: 12px;
+  color: var(--text-muted);
+  margin-bottom: 4px;
+}
+
+.meta {
+  color: var(--text-muted);
+  font-size: 12px;
+  margin-top: 6px;
+}
+
+.chat-form {
+  margin-top: 12px;
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+
+textarea {
+  width: 100%;
+  border-radius: 12px;
+  border: 1px solid var(--card-border);
+  background: rgba(255, 255, 255, 0.03);
+  color: var(--text-primary);
+  padding: 10px 12px;
+  resize: vertical;
+}
+
+.actions {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+.hint {
  color: var(--text-muted);
 }
+
+button.primary {
+  background: linear-gradient(90deg, #4f8bff, #7dd0ff);
+  color: #0b1222;
+  padding: 10px 16px;
+  border: none;
+  border-radius: 10px;
+  cursor: pointer;
+  font-weight: 700;
+}
+
+button:disabled {
+  opacity: 0.6;
+  cursor: not-allowed;
+}
+
+.info-card ul {
+  color: var(--text-muted);
+  padding-left: 18px;
+}
 </style>