feat(ai): expose node/gpu meta and improve chat UX
This commit is contained in:
parent
e81658d1d2
commit
bef08fb1fb
@ -34,6 +34,11 @@ AI_CHAT_SYSTEM_PROMPT = os.getenv(
|
|||||||
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
|
"You are the Titan Lab assistant for bstein.dev. Be concise and helpful.",
|
||||||
)
|
)
|
||||||
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
|
AI_CHAT_TIMEOUT_SEC = float(os.getenv("AI_CHAT_TIMEOUT_SEC", "20"))
|
||||||
|
AI_NODE_NAME = os.getenv("AI_NODE_NAME") or os.getenv("NODE_NAME") or "unknown"
|
||||||
|
try:
|
||||||
|
AI_NODE_GPU_MAP = json.loads(os.getenv("AI_NODE_GPU_MAP", "{}"))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
AI_NODE_GPU_MAP = {}
|
||||||
|
|
||||||
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
_LAB_STATUS_CACHE: dict[str, Any] = {"ts": 0.0, "value": None}
|
||||||
|
|
||||||
@ -181,6 +186,19 @@ def ai_chat() -> Any:
|
|||||||
return jsonify({"error": str(exc)}), 502
|
return jsonify({"error": str(exc)}), 502
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ai/info", methods=["GET"])
|
||||||
|
def ai_info() -> Any:
|
||||||
|
gpu_label = AI_NODE_GPU_MAP.get(AI_NODE_NAME, "local GPU (dynamic)")
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"node": AI_NODE_NAME,
|
||||||
|
"gpu": gpu_label,
|
||||||
|
"model": AI_CHAT_MODEL,
|
||||||
|
"endpoint": "/api/ai/chat",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/", defaults={"path": ""})
|
@app.route("/", defaults={"path": ""})
|
||||||
@app.route("/<path:path>")
|
@app.route("/<path:path>")
|
||||||
def serve_frontend(path: str) -> Any:
|
def serve_frontend(path: str) -> Any:
|
||||||
|
|||||||
@ -5,23 +5,26 @@
|
|||||||
<p class="eyebrow">Atlas AI</p>
|
<p class="eyebrow">Atlas AI</p>
|
||||||
<h1>Chat</h1>
|
<h1>Chat</h1>
|
||||||
<p class="lede">
|
<p class="lede">
|
||||||
Lightweight LLM running on local GPU accelerated hardware. Anyone can chat without auth. The client streams responses
|
Lightweight LLM running on local GPU accelerated hardware. Anyone can chat without auth. The client streams responses,
|
||||||
and shows round-trip latency for each turn.
|
shows round-trip latency for each turn, and we're training an Atlas-aware model steeped in Titan Lab context.
|
||||||
</p>
|
</p>
|
||||||
<div class="pill mono pill-live">Online</div>
|
<div class="pill mono pill-live">Online</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="hero-facts">
|
<div class="hero-facts">
|
||||||
<div class="fact">
|
<div class="fact">
|
||||||
<span class="label mono">Model</span>
|
<span class="label mono">Model</span>
|
||||||
<span class="value mono">qwen2.5-coder:7b-instruct-q4_0</span>
|
<span class="value mono">{{ meta.model }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="fact">
|
<div class="fact">
|
||||||
<span class="label mono">GPU</span>
|
<span class="label mono">GPU</span>
|
||||||
<span class="value mono">local GPU (dynamic)</span>
|
<span class="value mono">{{ meta.gpu }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="fact">
|
<div class="fact">
|
||||||
<span class="label mono">Endpoint</span>
|
<span class="label mono">Endpoint</span>
|
||||||
<span class="value mono">{{ apiHost }}</span>
|
<button class="endpoint-copy mono" type="button" @click="copyCurl">
|
||||||
|
{{ apiHost }}
|
||||||
|
<span v-if="copied" class="copied">copied</span>
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
@ -81,12 +84,17 @@ curl -X POST https://chat.ai.bstein.dev/api/ai/chat \
|
|||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup>
|
<script setup>
|
||||||
import { onUpdated, ref } from "vue";
|
import { onMounted, onUpdated, ref } from "vue";
|
||||||
|
|
||||||
const API_URL = (import.meta.env.VITE_AI_ENDPOINT || "/api/ai/chat").trim();
|
const API_URL = (import.meta.env.VITE_AI_ENDPOINT || "/api/ai/chat").trim();
|
||||||
const apiHost = new URL(API_URL, window.location.href).host + new URL(API_URL, window.location.href).pathname;
|
const apiHost = new URL(API_URL, window.location.href).host + new URL(API_URL, window.location.href).pathname;
|
||||||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
|
||||||
|
const meta = ref({
|
||||||
|
model: "loading...",
|
||||||
|
gpu: "local GPU (dynamic)",
|
||||||
|
node: "unknown",
|
||||||
|
});
|
||||||
const messages = ref([
|
const messages = ref([
|
||||||
{
|
{
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
@ -97,6 +105,9 @@ const draft = ref("");
|
|||||||
const sending = ref(false);
|
const sending = ref(false);
|
||||||
const error = ref("");
|
const error = ref("");
|
||||||
const chatWindow = ref(null);
|
const chatWindow = ref(null);
|
||||||
|
const copied = ref(false);
|
||||||
|
|
||||||
|
onMounted(fetchMeta);
|
||||||
|
|
||||||
onUpdated(() => {
|
onUpdated(() => {
|
||||||
if (chatWindow.value) {
|
if (chatWindow.value) {
|
||||||
@ -104,6 +115,21 @@ onUpdated(() => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
async function fetchMeta() {
|
||||||
|
try {
|
||||||
|
const resp = await fetch("/api/ai/info");
|
||||||
|
if (!resp.ok) return;
|
||||||
|
const data = await resp.json();
|
||||||
|
meta.value = {
|
||||||
|
model: data.model || meta.value.model,
|
||||||
|
gpu: data.gpu || meta.value.gpu,
|
||||||
|
node: data.node || meta.value.node,
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
// swallow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function sendMessage() {
|
async function sendMessage() {
|
||||||
if (!draft.value.trim() || sending.value) return;
|
if (!draft.value.trim() || sending.value) return;
|
||||||
const text = draft.value.trim();
|
const text = draft.value.trim();
|
||||||
@ -179,6 +205,17 @@ function handleKeydown(e) {
|
|||||||
sendMessage();
|
sendMessage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function copyCurl() {
|
||||||
|
const curl = `curl -X POST ${new URL(API_URL, window.location.href).toString()} -H 'content-type: application/json' -d '{\"message\":\"hi\"}'`;
|
||||||
|
try {
|
||||||
|
await navigator.clipboard.writeText(curl);
|
||||||
|
copied.value = true;
|
||||||
|
setTimeout(() => (copied.value = false), 1400);
|
||||||
|
} catch {
|
||||||
|
copied.value = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style scoped>
|
<style scoped>
|
||||||
@ -217,6 +254,22 @@ function handleKeydown(e) {
|
|||||||
margin-top: 4px;
|
margin-top: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.endpoint-copy {
|
||||||
|
background: none;
|
||||||
|
color: inherit;
|
||||||
|
border: 1px solid var(--card-border);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 6px 8px;
|
||||||
|
width: 100%;
|
||||||
|
text-align: left;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
.endpoint-copy .copied {
|
||||||
|
float: right;
|
||||||
|
color: var(--accent-cyan);
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
|
||||||
.pill-live {
|
.pill-live {
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
margin-top: 8px;
|
margin-top: 8px;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user