feat(ai): expose node/gpu meta and improve chat UX

This commit is contained in:
Brad Stein 2025-12-21 00:16:43 -03:00
parent e81658d1d2
commit bef08fb1fb
2 changed files with 77 additions and 6 deletions

View File

@ -34,6 +34,11 @@ AI_CHAT_SYSTEM_PROMPT = os.getenv(
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.", "You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
) )
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20")) AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
AI_NODE_NAME = os.getenv("AI_NODE_NAME") or os.getenv("NODE_NAME") or "unknown"
try:
AI_NODE_GPU_MAP = json.loads(os.getenv("AI_NODE_GPU_MAP", "{}"))
except json.JSONDecodeError:
AI_NODE_GPU_MAP = {}
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None} _LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
@ -181,6 +186,19 @@ def ai_chat() -> Any:
return jsonify({"error": str(exc)}), 502 return jsonify({"error": str(exc)}), 502
@app.route("/api/ai/info", methods=["GET"])
def ai_info() -> Any:
gpu_label = AI_NODE_GPU_MAP.get(AI_NODE_NAME, "local GPU (dynamic)")
return jsonify(
{
"node": AI_NODE_NAME,
"gpu": gpu_label,
"model": AI_CHAT_MODEL,
"endpoint": "/api/ai/chat",
}
)
@app.route("/", defaults={"path": ""}) @app.route("/", defaults={"path": ""})
@app.route("/<path:path>") @app.route("/<path:path>")
def serve_frontend(path: str) -> Any: def serve_frontend(path: str) -> Any:

View File

@ -5,23 +5,26 @@
<p class="eyebrow">Atlas AI</p> <p class="eyebrow">Atlas AI</p>
<h1>Chat</h1> <h1>Chat</h1>
<p class="lede"> <p class="lede">
Lightweight LLM running on local GPU accelerated hardware. Anyone can chat without auth. The client streams responses Lightweight LLM running on local GPU accelerated hardware. Anyone can chat without auth. The client streams responses,
and shows round-trip latency for each turn. shows round-trip latency for each turn, and we're training an Atlas-aware model steeped in Titan Lab context.
</p> </p>
<div class="pill mono pill-live">Online</div> <div class="pill mono pill-live">Online</div>
</div> </div>
<div class="hero-facts"> <div class="hero-facts">
<div class="fact"> <div class="fact">
<span class="label mono">Model</span> <span class="label mono">Model</span>
<span class="value mono">qwen2.5-coder:7b-instruct-q4_0</span> <span class="value mono">{{ meta.model }}</span>
</div> </div>
<div class="fact"> <div class="fact">
<span class="label mono">GPU</span> <span class="label mono">GPU</span>
<span class="value mono">local GPU (dynamic)</span> <span class="value mono">{{ meta.gpu }}</span>
</div> </div>
<div class="fact"> <div class="fact">
<span class="label mono">Endpoint</span> <span class="label mono">Endpoint</span>
<span class="value mono">{{ apiHost }}</span> <button class="endpoint-copy mono" type="button" @click="copyCurl">
{{ apiHost }}
<span v-if="copied" class="copied">copied</span>
</button>
</div> </div>
</div> </div>
</section> </section>
@ -81,12 +84,17 @@ curl -X POST https://chat.ai.bstein.dev/api/ai/chat \
</template> </template>
<script setup> <script setup>
import { onUpdated, ref } from "vue"; import { onMounted, onUpdated, ref } from "vue";
const API_URL = (import.meta.env.VITE_AI_ENDPOINT || "/api/ai/chat").trim(); const API_URL = (import.meta.env.VITE_AI_ENDPOINT || "/api/ai/chat").trim();
const apiHost = new URL(API_URL, window.location.href).host + new URL(API_URL, window.location.href).pathname; const apiHost = new URL(API_URL, window.location.href).host + new URL(API_URL, window.location.href).pathname;
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const meta = ref({
model: "loading...",
gpu: "local GPU (dynamic)",
node: "unknown",
});
const messages = ref([ const messages = ref([
{ {
role: "assistant", role: "assistant",
@ -97,6 +105,9 @@ const draft = ref("");
const sending = ref(false); const sending = ref(false);
const error = ref(""); const error = ref("");
const chatWindow = ref(null); const chatWindow = ref(null);
const copied = ref(false);
onMounted(fetchMeta);
onUpdated(() => { onUpdated(() => {
if (chatWindow.value) { if (chatWindow.value) {
@ -104,6 +115,21 @@ onUpdated(() => {
} }
}); });
async function fetchMeta() {
try {
const resp = await fetch("/api/ai/info");
if (!resp.ok) return;
const data = await resp.json();
meta.value = {
model: data.model || meta.value.model,
gpu: data.gpu || meta.value.gpu,
node: data.node || meta.value.node,
};
} catch {
// swallow
}
}
async function sendMessage() { async function sendMessage() {
if (!draft.value.trim() || sending.value) return; if (!draft.value.trim() || sending.value) return;
const text = draft.value.trim(); const text = draft.value.trim();
@ -179,6 +205,17 @@ function handleKeydown(e) {
sendMessage(); sendMessage();
} }
} }
async function copyCurl() {
const curl = `curl -X POST ${new URL(API_URL, window.location.href).toString()} -H 'content-type: application/json' -d '{\"message\":\"hi\"}'`;
try {
await navigator.clipboard.writeText(curl);
copied.value = true;
setTimeout(() => (copied.value = false), 1400);
} catch {
copied.value = false;
}
}
</script> </script>
<style scoped> <style scoped>
@ -217,6 +254,22 @@ function handleKeydown(e) {
margin-top: 4px; margin-top: 4px;
} }
.endpoint-copy {
background: none;
color: inherit;
border: 1px solid var(--card-border);
border-radius: 8px;
padding: 6px 8px;
width: 100%;
text-align: left;
cursor: pointer;
}
.endpoint-copy .copied {
float: right;
color: var(--accent-cyan);
font-size: 11px;
}
.pill-live { .pill-live {
display: inline-block; display: inline-block;
margin-top: 8px; margin-top: 8px;