feat(ai): add ollama chat proxy and UI

2025-12-20 14:25:55 -03:00 · 2025-12-20 14:25:55 -03:00 · 7aa8b8fce8
commit 7aa8b8fce8
parent 4f9211be32
4 changed files with 308 additions and 17 deletions
--- a/backend/app.py
+++ b/backend/app.py
@ -9,8 +9,9 @@ from urllib.error import URLError
 from urllib.parse import urlencode
 from urllib.request import urlopen
-from flask import Flask, jsonify, send_from_directory
+from flask import Flask, jsonify, request, send_from_directory
 from flask_cors import CORS
 import httpx
 app = Flask(__name__, static_folder="../frontend/dist", static_url_path="")
@ -26,6 +27,13 @@ HTTP_CHECK_TIMEOUT_SEC = float(os.getenv("HTTP_CHECK_TIMEOUT_SEC", "2"))
 LAB_STATUS_CACHE_SEC = float(os.getenv("LAB_STATUS_CACHE_SEC", "30"))
 GRAFANA_HEALTH_URL = os.getenv("GRAFANA_HEALTH_URL", "https://metrics.bstein.dev/api/health")
 OCEANUS_NODE_EXPORTER_URL = os.getenv("OCEANUS_NODE_EXPORTER_URL", "http://192.168.22.24:9100/metrics")
 AI_CHAT_API = os.getenv("AI_CHAT_API", "http://ollama.ai.svc.cluster.local:11434").rstrip("/")
 AI_CHAT_MODEL = os.getenv("AI_CHAT_MODEL", "phi3:mini")
 AI_CHAT_SYSTEM_PROMPT = os.getenv(
    "AI_CHAT_SYSTEM_PROMPT",
    "You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
 )
 AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
 _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
@ -137,6 +145,42 @@ def lab_status() -> Any:
    return jsonify(payload)
@app.route("/api/ai/chat", methods=["POST"])
 def ai_chat() -> Any:
    payload = request.get_json(silent=True) or {}
    user_message = (payload.get("message") or "").strip()
    history = payload.get("history") or []
    if not user_message:
        return jsonify({"error": "message required"}), 400
    messages: list[dict[str, str]] = []
    if AI_CHAT_SYSTEM_PROMPT:
        messages.append({"role": "system", "content": AI_CHAT_SYSTEM_PROMPT})
    for item in history:
        role = item.get("role")
        content = (item.get("content") or "").strip()
        if role in ("user", "assistant") and content:
            messages.append({"role": role, "content": content})
    messages.append({"role": "user", "content": user_message})
    body = {"model": AI_CHAT_MODEL, "messages": messages, "stream": False}
    started = time.time()
    try:
        with httpx.Client(timeout=AI_CHAT_TIMEOUT_SEC) as client:
            resp = client.post(f"{AI_CHAT_API}/api/chat", json=body)
            resp.raise_for_status()
            data = resp.json()
            reply = (data.get("message") or {}).get("content") or ""
            elapsed_ms = int((time.time() - started) * 1000)
            return jsonify({"reply": reply, "latency_ms": elapsed_ms})
    except (httpx.RequestError, httpx.HTTPStatusError, ValueError) as exc:
        return jsonify({"error": str(exc)}), 502
@app.route("/", defaults={"path": ""})
@app.route("/<path:path>")
 def serve_frontend(path: str) -> Any:
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -1,3 +1,4 @@
 flask==3.0.3
 flask-cors==4.0.0
 gunicorn==21.2.0
 httpx==0.27.2
--- a/frontend/src/data/sample.js
+++ b/frontend/src/data/sample.js
@ -122,14 +122,14 @@ export function fallbackServices() {
        link: "https://meet.bstein.dev",
        status: "degraded",
      },
-      {
+    {
-        name: "AI Chat",
+      name: "AI Chat",
-        category: "ai",
+      category: "ai",
-        summary: "LLM Chat - Planned",
+      summary: "LLM chat (public beta)",
-        link: "/ai",
+      link: "/ai",
-        host: "chat.ai.bstein.dev",
+      host: "bstein.dev/ai",
-        status: "planned",
+      status: "beta",
-      },
+    },
      {
        name: "AI Image",
        category: "ai",
--- a/frontend/src/views/AiView.vue
+++ b/frontend/src/views/AiView.vue
@ -1,25 +1,271 @@
 <template>
  <div class="page">
-    <section class="card">
+    <section class="card hero glass">
-      <h1>AI services (planned)</h1>
+      <div>
-      <p>Targets for chat.ai.bstein.dev, draw.ai.bstein.dev, and talk.ai.bstein.dev. These will land behind Keycloak once the pipelines are ready.</p>
+        <p class="eyebrow">Atlas AI</p>
        <h1>Chat</h1>
        <p class="lede">
          Lightweight LLM running on titan-24 (RTX 3080, 8GB). Anyone can chat without auth. Responses are single-turn per
          send; the client sends the on-page history with every request.
        </p>
        <div class="pill mono pill-live">Online</div>
      </div>
      <div class="hero-facts">
        <div class="fact">
          <span class="label mono">Model</span>
          <span class="value mono">phi3:mini (4k)</span>
        </div>
        <div class="fact">
          <span class="label mono">GPU</span>
          <span class="value mono">titan-24 · 3080 (8GB)</span>
        </div>
        <div class="fact">
          <span class="label mono">Endpoint</span>
          <span class="value mono">/api/ai/chat</span>
        </div>
      </div>
    </section>
    <section class="card chat-card">
      <div class="chat-window" ref="chatWindow">
        <div v-for="(msg, idx) in messages" :key="idx" :class="['chat-row', msg.role]">
          <div class="bubble">
            <div class="role mono">{{ msg.role === 'assistant' ? 'ai' : 'you' }}</div>
            <p>{{ msg.content }}</p>
            <div v-if="msg.latency_ms" class="meta mono">{{ msg.latency_ms }} ms</div>
          </div>
        </div>
        <div v-if="error" class="chat-row error">
          <div class="bubble">
            <div class="role mono">error</div>
            <p>{{ error }}</p>
          </div>
        </div>
      </div>
      <form class="chat-form" @submit.prevent="sendMessage">
        <textarea
          v-model="draft"
          placeholder="Ask anything about the lab or general topics..."
          rows="3"
          :disabled="sending"
        />
        <div class="actions">
          <span class="hint mono">Shift+Enter for newline</span>
          <button class="primary" type="submit" :disabled="sending || !draft.trim()">
            {{ sending ? "Sending..." : "Send" }}
          </button>
        </div>
      </form>
    </section>
    <section class="card info-card">
      <h2>Notes</h2>
      <ul>
-        <li>Chat: conversational agent with SSO.</li>
+        <li>Backend proxies requests to Ollama inside the cluster; no external calls are made.</li>
-        <li>Image: text-to-image workflows for user media.</li>
+        <li>Short-term context: the chat history in this page is sent each turn. Refresh clears it.</li>
-        <li>Speech: voice-to-voice translation and dubbing.</li>
+        <li>Future: swap in larger models on the Jetsons, add streaming and rate limits.</li>
      </ul>
    </section>
  </div>
 </template>
 <script setup>
 import { onUpdated, ref } from "vue";
 const messages = ref([
  {
    role: "assistant",
    content: "Hi! I'm the Titan Lab assistant running on titan-24. How can I help?",
  },
 ]);
 const draft = ref("");
 const sending = ref(false);
 const error = ref("");
 const chatWindow = ref(null);
 onUpdated(() => {
  if (chatWindow.value) {
    chatWindow.value.scrollTop = chatWindow.value.scrollHeight;
  }
 });
 async function sendMessage() {
  if (!draft.value.trim() || sending.value) return;
  const text = draft.value.trim();
  draft.value = "";
  error.value = "";
  const userEntry = { role: "user", content: text };
  messages.value.push(userEntry);
  sending.value = true;
  try {
    const history = messages.value.map((m) => ({ role: m.role, content: m.content }));
    const resp = await fetch("/api/ai/chat", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ message: text, history }),
    });
    const data = await resp.json();
    if (!resp.ok || data.error) {
      throw new Error(data.error || "Request failed");
    }
    messages.value.push({
      role: "assistant",
      content: data.reply || "(empty response)",
      latency_ms: data.latency_ms,
    });
  } catch (err) {
    error.value = err.message || "Unexpected error";
  } finally {
    sending.value = false;
  }
 }
 </script>
 <style scoped>
 .page {
-  max-width: 900px;
+  max-width: 1100px;
  margin: 0 auto;
  padding: 32px 22px 72px;
 }
-ul {
+.hero {
  display: grid;
  grid-template-columns: 2fr 1fr;
  gap: 18px;
 }
 .hero-facts {
  display: grid;
  gap: 10px;
  align-content: start;
 }
 .fact {
  border: 1px solid var(--card-border);
  border-radius: 10px;
  padding: 10px 12px;
  background: rgba(255, 255, 255, 0.02);
 }
 .label {
  color: var(--text-muted);
  font-size: 12px;
 }
 .value {
  display: block;
  margin-top: 4px;
 }
 .pill-live {
  display: inline-block;
  margin-top: 8px;
 }
 .chat-card {
  margin-top: 18px;
 }
 .chat-window {
  background: rgba(255, 255, 255, 0.02);
  border: 1px solid var(--card-border);
  border-radius: 12px;
  padding: 14px;
  min-height: 320px;
  max-height: 520px;
  overflow-y: auto;
  display: flex;
  flex-direction: column;
  gap: 12px;
 }
 .chat-row {
  display: flex;
 }
 .chat-row.user {
  justify-content: flex-end;
 }
 .bubble {
  max-width: 85%;
  padding: 10px 12px;
  border-radius: 12px;
  border: 1px solid var(--card-border);
  background: rgba(255, 255, 255, 0.04);
 }
 .chat-row.assistant .bubble {
  background: rgba(80, 163, 255, 0.08);
 }
 .chat-row.user .bubble {
  background: rgba(255, 255, 255, 0.06);
 }
 .chat-row.error .bubble {
  background: rgba(255, 87, 87, 0.1);
  border-color: rgba(255, 87, 87, 0.5);
 }
 .role {
  font-size: 12px;
  color: var(--text-muted);
  margin-bottom: 4px;
 }
 .meta {
  color: var(--text-muted);
  font-size: 12px;
  margin-top: 6px;
 }
 .chat-form {
  margin-top: 12px;
  display: flex;
  flex-direction: column;
  gap: 8px;
 }
 textarea {
  width: 100%;
  border-radius: 12px;
  border: 1px solid var(--card-border);
  background: rgba(255, 255, 255, 0.03);
  color: var(--text-primary);
  padding: 10px 12px;
  resize: vertical;
 }
 .actions {
  display: flex;
  align-items: center;
  justify-content: space-between;
  gap: 12px;
 }
 .hint {
  color: var(--text-muted);
 }
 button.primary {
  background: linear-gradient(90deg, #4f8bff, #7dd0ff);
  color: #0b1222;
  padding: 10px 16px;
  border: none;
  border-radius: 10px;
  cursor: pointer;
  font-weight: 700;
 }
 button:disabled {
  opacity: 0.6;
  cursor: not-allowed;
 }
 .info-card ul {
  color: var(--text-muted);
  padding-left: 18px;
 }
 </style>