From 63e8017fb94df9df2ee9af5fe072a36067f746a6 Mon Sep 17 00:00:00 2001
From: Tony Tran <thanhtan.tran@gmail.com>
Date: Sun, 19 Apr 2026 06:35:22 +0000
Subject: [PATCH] add files

---
 app.py               |  37 ++++
 default.profraw      | Bin 0 -> 144 bytes
 requirements.txt     |   3 +
 server.py            | 132 ++++++++++++
 templates/index.html | 502 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 674 insertions(+)
 create mode 100644 app.py
 create mode 100644 default.profraw
 create mode 100644 requirements.txt
 create mode 100644 server.py
 create mode 100644 templates/index.html

diff --git a/app.py b/app.py
new file mode 100644
index 0000000..53c5c6f
--- /dev/null
+++ b/app.py
@@ -0,0 +1,37 @@
+import litert_lm
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from contextlib import asynccontextmanager
+
+MODEL_PATH = "gemma-4-E2B-it.litertlm"
+
+class PromptRequest(BaseModel):
+    prompt: str
+
+ml_models = {}
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    engine = litert_lm.Engine(MODEL_PATH, backend=litert_lm.Backend.CPU)
+    ml_models["engine"] = engine
+    yield
+    del ml_models["engine"]
+
+app = FastAPI(lifespan=lifespan)
+
+@app.post("/generate")
+async def generate_text(request: PromptRequest):
+    engine = ml_models.get("engine")
+    if not engine:
+        raise HTTPException(status_code=503, detail="Model engine not initialized")
+    try:
+        # Không dùng "with", tạo conversation trực tiếp
+        conversation = engine.create_conversation()
+        result = conversation.send_message(request.prompt)
+        return {"response": result["content"][0]["text"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/default.profraw b/default.profraw
new file mode 100644
index 0000000000000000000000000000000000000000..6e624b056b19209b6cb99fc35efa3fc4979783cb
GIT binary patch
literal 144
lcmZoHO3N=Q$obF3009C}n${E()B+AD{kb%W^Mpl9H~?Jb2j~C*

literal 0
HcmV?d00001

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f2ad05e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+fastapi
+uvicorn
+litert-lm-api-nightly
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..a5da892
--- /dev/null
+++ b/server.py
@@ -0,0 +1,132 @@
+import os
+import uuid
+import time
+from pathlib import Path
+
+# Suppress verbose logs
+os.environ["GRPC_VERBOSITY"] = "ERROR"
+os.environ["GLOG_minloglevel"] = "3"
+
+import litert_lm
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import HTMLResponse
+from pydantic import BaseModel
+from contextlib import asynccontextmanager
+
+# ── Config ────────────────────────────────────────────────────────────────────
+
+MODEL_PATH   = "gemma-4-E2B-it.litertlm"
+TEMPLATE_DIR = Path(__file__).parent / "templates"
+
+# ── Models ───────────────────────────────────────────────────────────────────
+
+class PromptRequest(BaseModel):
+    prompt: str
+
+# ── State ────────────────────────────────────────────────────────────────────
+
+ml_models = {}
+sessions: dict = {}  # session_id -> conversation object
+
+# ── Lifespan ─────────────────────────────────────────────────────────────────
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    engine = litert_lm.Engine(MODEL_PATH, backend=litert_lm.Backend.CPU)
+    ml_models["engine"] = engine
+    yield
+    sessions.clear()
+    del ml_models["engine"]
+
+# ── App ───────────────────────────────────────────────────────────────────────
+
+app = FastAPI(title="LiteRT-LM API", lifespan=lifespan)
+
+# ── REST: stateless single-turn ───────────────────────────────────────────────
+
+@app.post("/generate")
+async def generate_text(request: PromptRequest):
+    """Single-turn generation. No memory between calls."""
+    engine = ml_models.get("engine")
+    if not engine:
+        raise HTTPException(status_code=503, detail="Model engine not initialized")
+    try:
+        conversation = engine.create_conversation()
+        t0 = time.perf_counter()
+        result = conversation.send_message(request.prompt)
+        elapsed = time.perf_counter() - t0
+        text = result["content"][0]["text"]
+        num_tokens = len(engine.tokenize(text))
+        tps = round(num_tokens / elapsed, 2) if elapsed > 0 else 0
+        return {
+            "response": text,
+            "tokens": num_tokens,
+            "elapsed_s": round(elapsed, 2),
+            "tokens_per_sec": tps,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+# ── REST: multi-turn chat sessions ────────────────────────────────────────────
+
+@app.post("/chat/new")
+async def new_session():
+    """Create a new chat session. Returns session_id."""
+    engine = ml_models.get("engine")
+    if not engine:
+        raise HTTPException(status_code=503, detail="Model engine not initialized")
+    session_id = str(uuid.uuid4())
+    sessions[session_id] = engine.create_conversation()
+    return {"session_id": session_id}
+
+@app.post("/chat/{session_id}")
+async def chat(session_id: str, request: PromptRequest):
+    """Send a message in an existing session (retains conversation history)."""
+    if session_id not in sessions:
+        raise HTTPException(
+            status_code=404,
+            detail="Session not found. Create one via POST /chat/new",
+        )
+    try:
+        engine = ml_models.get("engine")
+        t0 = time.perf_counter()
+        result = sessions[session_id].send_message(request.prompt)
+        elapsed = time.perf_counter() - t0
+        text = result["content"][0]["text"]
+        num_tokens = len(engine.tokenize(text)) if engine else 0
+        tps = round(num_tokens / elapsed, 2) if elapsed > 0 else 0
+        return {
+            "session_id": session_id,
+            "response": text,
+            "tokens": num_tokens,
+            "elapsed_s": round(elapsed, 2),
+            "tokens_per_sec": tps,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.delete("/chat/{session_id}")
+async def clear_session(session_id: str):
+    """Delete a session and free its memory."""
+    if session_id not in sessions:
+        raise HTTPException(status_code=404, detail="Session not found")
+    del sessions[session_id]
+    return {"status": "cleared", "session_id": session_id}
+
+@app.get("/chat/sessions/list")
+async def list_sessions():
+    """List all active session IDs."""
+    return {"sessions": list(sessions.keys()), "count": len(sessions)}
+
+# ── WebUI ─────────────────────────────────────────────────────────────────────
+
+@app.get("/", response_class=HTMLResponse)
+async def web_ui():
+    html = (TEMPLATE_DIR / "index.html").read_text(encoding="utf-8")
+    return HTMLResponse(content=html)
+
+# ── Run ───────────────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..2d7c950
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,502 @@
+<!DOCTYPE html>
+<html lang="vi">
+<head>
+<meta charset="UTF-8"/>
+<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+<title>Gemma 4 Chat</title>
+<link href="https://fonts.googleapis.com/css2?family=DM+Mono:ital,wght@0,300;0,400;0,500;1,300&family=Space+Grotesk:wght@300;400;500;600&display=swap" rel="stylesheet"/>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/marked/12.0.0/marked.min.js"></script>
+<style>
+  :root {
+    --bg: #0d0f12;
+    --surface: #13161b;
+    --surface2: #1a1e25;
+    --border: #252a33;
+    --accent: #7effd4;
+    --accent2: #5bc8f5;
+    --text: #e2e8f0;
+    --muted: #5a6478;
+    --user-bubble: #1a2a3a;
+    --model-bubble: #141a20;
+    --radius: 14px;
+    --font-ui: 'Space Grotesk', sans-serif;
+    --font-mono: 'DM Mono', monospace;
+  }
+  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+  body {
+    background: var(--bg);
+    color: var(--text);
+    font-family: var(--font-ui);
+    height: 100dvh;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+  }
+
+  /* ── Header ── */
+  header {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    padding: 14px 20px;
+    border-bottom: 1px solid var(--border);
+    background: var(--surface);
+    flex-shrink: 0;
+  }
+  .logo {
+    width: 32px; height: 32px;
+    background: linear-gradient(135deg, var(--accent), var(--accent2));
+    border-radius: 8px;
+    display: flex; align-items: center; justify-content: center;
+    font-size: 16px;
+  }
+  header h1 { font-size: 15px; font-weight: 600; letter-spacing: .02em; }
+  header .sub { font-size: 11px; color: var(--muted); font-family: var(--font-mono); }
+
+  .header-right { margin-left: auto; display: flex; align-items: center; gap: 8px; }
+  #status-dot {
+    width: 7px; height: 7px; border-radius: 50%;
+    background: #3a3f4a;
+    transition: background .4s;
+  }
+  #status-dot.ready { background: var(--accent); box-shadow: 0 0 6px var(--accent); }
+  #status-dot.thinking { background: #f5c842; animation: pulse 1s infinite; }
+  #status-text { font-size: 11px; color: var(--muted); font-family: var(--font-mono); }
+
+  @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.4} }
+
+  /* ── Session bar ── */
+  #session-bar {
+    display: flex; align-items: center; gap: 8px;
+    padding: 8px 20px;
+    background: var(--surface2);
+    border-bottom: 1px solid var(--border);
+    font-size: 12px;
+    flex-shrink: 0;
+  }
+  #session-bar label { color: var(--muted); font-family: var(--font-mono); }
+  #session-id-display {
+    font-family: var(--font-mono);
+    font-size: 11px;
+    color: var(--accent2);
+    background: #0d1620;
+    padding: 3px 8px;
+    border-radius: 6px;
+    border: 1px solid #1e2e3e;
+    max-width: 280px;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+  }
+  .btn-sm {
+    padding: 4px 10px;
+    font-size: 11px;
+    font-family: var(--font-ui);
+    font-weight: 500;
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    background: var(--surface);
+    color: var(--text);
+    cursor: pointer;
+    transition: all .15s;
+  }
+  .btn-sm:hover { border-color: var(--accent); color: var(--accent); }
+  .btn-sm.danger:hover { border-color: #f87171; color: #f87171; }
+
+  /* ── Chat area ── */
+  #chat {
+    flex: 1;
+    overflow-y: auto;
+    padding: 20px;
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+    scroll-behavior: smooth;
+  }
+  #chat::-webkit-scrollbar { width: 4px; }
+  #chat::-webkit-scrollbar-track { background: transparent; }
+  #chat::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
+
+  .msg {
+    display: flex;
+    gap: 10px;
+    animation: fadeUp .25s ease both;
+  }
+  @keyframes fadeUp {
+    from { opacity: 0; transform: translateY(8px); }
+    to   { opacity: 1; transform: translateY(0); }
+  }
+  .msg.user { flex-direction: row-reverse; }
+
+  .avatar {
+    width: 30px; height: 30px; border-radius: 8px;
+    display: flex; align-items: center; justify-content: center;
+    font-size: 13px; flex-shrink: 0; margin-top: 2px;
+  }
+  .msg.user .avatar { background: linear-gradient(135deg,#2a4a6a,#1a3050); }
+  .msg.model .avatar { background: linear-gradient(135deg,#1a3a2a,#0d2018); color: var(--accent); }
+
+  .bubble {
+    padding: 11px 15px;
+    border-radius: var(--radius);
+    font-size: 14px;
+    line-height: 1.65;
+    white-space: pre-wrap;
+    word-break: break-word;
+    border: 1px solid transparent;
+  }
+  .msg.user .bubble {
+    background: var(--user-bubble);
+    border-color: #253a50;
+    border-radius: var(--radius) 4px var(--radius) var(--radius);
+  }
+  .msg.model .bubble {
+    background: var(--model-bubble);
+    border-color: var(--border);
+    border-radius: 4px var(--radius) var(--radius) var(--radius);
+  }
+
+  /* ── Markdown rendering ── */
+  .bubble.md-body { white-space: normal; }
+  .bubble.md-body p { margin-bottom: .6em; }
+  .bubble.md-body p:last-child { margin-bottom: 0; }
+  .bubble.md-body h1,.bubble.md-body h2,.bubble.md-body h3 {
+    font-weight: 600; margin: .8em 0 .3em; line-height: 1.3;
+  }
+  .bubble.md-body h1 { font-size: 1.15em; }
+  .bubble.md-body h2 { font-size: 1.05em; }
+  .bubble.md-body h3 { font-size: .95em; color: var(--accent2); }
+  .bubble.md-body ul,.bubble.md-body ol { padding-left: 1.4em; margin: .4em 0; }
+  .bubble.md-body li { margin-bottom: .25em; }
+  .bubble.md-body code {
+    font-family: var(--font-mono); font-size: .82em;
+    background: #0a1018; border: 1px solid var(--border);
+    border-radius: 4px; padding: 1px 5px; color: var(--accent);
+  }
+  .bubble.md-body pre {
+    background: #080c10; border: 1px solid var(--border);
+    border-radius: 8px; padding: 12px 14px;
+    overflow-x: auto; margin: .6em 0;
+  }
+  .bubble.md-body pre code {
+    background: none; border: none; padding: 0;
+    font-size: .82em; color: #c9d8e8;
+  }
+  .bubble.md-body blockquote {
+    border-left: 3px solid var(--accent2); padding-left: 10px;
+    color: var(--muted); margin: .5em 0; font-style: italic;
+  }
+  .bubble.md-body strong { color: #fff; font-weight: 600; }
+  .bubble.md-body em { color: #b0c8e0; }
+  .bubble.md-body a { color: var(--accent2); text-decoration: none; }
+  .bubble.md-body a:hover { text-decoration: underline; }
+  .bubble.md-body hr { border: none; border-top: 1px solid var(--border); margin: .8em 0; }
+  .bubble.md-body table {
+    border-collapse: collapse; width: 100%; margin: .6em 0; font-size: .88em;
+  }
+  .bubble.md-body th,.bubble.md-body td {
+    border: 1px solid var(--border); padding: 5px 10px; text-align: left;
+  }
+  .bubble.md-body th { background: var(--surface2); color: var(--accent2); }
+
+  /* ── Token/s badge ── */
+  .msg-inner { display: flex; flex-direction: column; max-width: 72%; }
+  .msg-meta {
+    font-family: var(--font-mono); font-size: 10px;
+    color: var(--muted); margin-top: 5px; padding: 0 4px;
+    display: flex; gap: 8px; align-items: center;
+  }
+  .tps-badge {
+    background: #0d1a12; border: 1px solid #1a3a22;
+    color: var(--accent); border-radius: 4px;
+    padding: 1px 6px; font-size: 10px;
+  }
+
+  /* ── Thinking dots ── */
+  .thinking-dots span {
+    display: inline-block;
+    width: 6px; height: 6px; border-radius: 50%;
+    background: var(--muted); margin: 0 2px;
+    animation: bounce .9s infinite;
+  }
+  .thinking-dots span:nth-child(2) { animation-delay: .2s; }
+  .thinking-dots span:nth-child(3) { animation-delay: .4s; }
+  @keyframes bounce { 0%,80%,100%{transform:translateY(0)} 40%{transform:translateY(-6px)} }
+
+  /* ── Empty state ── */
+  #empty {
+    flex: 1; display: flex; flex-direction: column;
+    align-items: center; justify-content: center;
+    color: var(--muted); text-align: center; gap: 12px;
+    pointer-events: none;
+  }
+  #empty .icon { font-size: 40px; opacity: .3; }
+  #empty p { font-size: 13px; line-height: 1.6; max-width: 240px; }
+
+  /* ── Input bar ── */
+  #input-bar {
+    display: flex; gap: 10px; align-items: flex-end;
+    padding: 14px 20px;
+    background: var(--surface);
+    border-top: 1px solid var(--border);
+    flex-shrink: 0;
+  }
+  #prompt {
+    flex: 1;
+    background: var(--surface2);
+    border: 1px solid var(--border);
+    border-radius: 10px;
+    color: var(--text);
+    font-family: var(--font-ui);
+    font-size: 14px;
+    padding: 10px 14px;
+    resize: none;
+    min-height: 42px;
+    max-height: 160px;
+    line-height: 1.5;
+    outline: none;
+    transition: border-color .2s;
+  }
+  #prompt:focus { border-color: var(--accent2); }
+  #prompt::placeholder { color: var(--muted); }
+
+  #send-btn {
+    width: 42px; height: 42px;
+    background: var(--accent);
+    border: none; border-radius: 10px;
+    cursor: pointer;
+    display: flex; align-items: center; justify-content: center;
+    transition: all .15s;
+    flex-shrink: 0;
+    color: #000;
+    font-size: 17px;
+  }
+  #send-btn:hover { background: #5ef0c0; transform: scale(1.05); }
+  #send-btn:disabled { background: var(--border); color: var(--muted); cursor: not-allowed; transform: none; }
+
+  .mode-label {
+    font-size: 10px; font-family: var(--font-mono);
+    color: var(--muted); align-self: center;
+  }
+</style>
+</head>
+<body>
+
+<header>
+  <div class="logo">&#10022;</div>
+  <div>
+    <h1>Gemma 4 Chat</h1>
+    <div class="sub">LiteRT-LM &middot; Orange Pi 5</div>
+  </div>
+  <div class="header-right">
+    <div id="status-dot"></div>
+    <span id="status-text">initializing&hellip;</span>
+  </div>
+</header>
+
+<div id="session-bar">
+  <label>session:</label>
+  <span id="session-id-display">&mdash;</span>
+  <button class="btn-sm" onclick="newSession()">&#xFF0B; New</button>
+  <button class="btn-sm danger" onclick="clearSession()">&#x2715; Clear</button>
+  <span id="mode-badge" class="mode-label">multi-turn</span>
+</div>
+
+<div id="chat">
+  <div id="empty">
+    <div class="icon">&#9672;</div>
+    <p>Start a conversation.<br/>Gemma 4 remembers context within a session.</p>
+  </div>
+</div>
+
+<div id="input-bar">
+  <textarea id="prompt" rows="1" placeholder="Nh&#x1EAF;n tin v&#x1EDB;i Gemma 4&hellip;"></textarea>
+  <button id="send-btn" onclick="sendMessage()" title="Send (Enter)">&#x27A4;</button>
+</div>
+
+<script>
+// ── Marked.js config ─────────────────────────────────────────────────────────
+marked.setOptions({ breaks: true, gfm: true });
+
+let sessionId = null;
+let busy = false;
+
+const chatEl         = document.getElementById('chat');
+const emptyEl        = document.getElementById('empty');
+const promptEl       = document.getElementById('prompt');
+const sendBtn        = document.getElementById('send-btn');
+const statusDot      = document.getElementById('status-dot');
+const statusTxt      = document.getElementById('status-text');
+const sessionDisplay = document.getElementById('session-id-display');
+
+// ── Auto-resize textarea ──────────────────────────────────────────────────────
+promptEl.addEventListener('input', () => {
+  promptEl.style.height = 'auto';
+  promptEl.style.height = Math.min(promptEl.scrollHeight, 160) + 'px';
+});
+promptEl.addEventListener('keydown', e => {
+  if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); }
+});
+
+// ── Session management ────────────────────────────────────────────────────────
+async function newSession() {
+  try {
+    setStatus('thinking', 'creating session...');
+    const res = await fetch('/chat/new', { method: 'POST' });
+    const data = await res.json();
+    sessionId = data.session_id;
+    sessionDisplay.textContent = sessionId;
+    clearChat();
+    setStatus('ready', 'ready');
+  } catch(e) {
+    setStatus('', 'error');
+    alert('Khong tao duoc session: ' + e.message);
+  }
+}
+
+async function clearSession() {
+  if (!sessionId) return;
+  try { await fetch('/chat/' + sessionId, { method: 'DELETE' }); } catch(_) {}
+  await newSession();
+}
+
+function clearChat() {
+  chatEl.innerHTML = '';
+  chatEl.appendChild(emptyEl);
+  emptyEl.style.display = 'flex';
+}
+
+// ── Status ────────────────────────────────────────────────────────────────────
+function setStatus(state, text) {
+  statusDot.className = state;
+  statusTxt.textContent = text;
+}
+
+// ── Message rendering ─────────────────────────────────────────────────────────
+function appendMsg(role, text, stats) {
+  emptyEl.style.display = 'none';
+
+  const wrapper = document.createElement('div');
+  wrapper.className = 'msg ' + role;
+
+  const avatar = document.createElement('div');
+  avatar.className = 'avatar';
+  avatar.textContent = role === 'user' ? '\uD83D\uDC64' : '\u2726';
+
+  const inner = document.createElement('div');
+  inner.className = 'msg-inner';
+
+  const bubble = document.createElement('div');
+  if (role === 'model') {
+    bubble.className = 'bubble md-body';
+    bubble.innerHTML = marked.parse(text);
+    bubble.querySelectorAll('a').forEach(a => {
+      a.target = '_blank';
+      a.rel = 'noopener noreferrer';
+    });
+  } else {
+    bubble.className = 'bubble';
+    bubble.textContent = text;
+  }
+  inner.appendChild(bubble);
+
+  if (role === 'model' && stats) {
+    const meta = document.createElement('div');
+    meta.className = 'msg-meta';
+    const badge = document.createElement('span');
+    badge.className = 'tps-badge';
+    badge.textContent = '\u26A1 ' + stats.tokens_per_sec + ' tok/s';
+    const detail = document.createElement('span');
+    detail.textContent = stats.tokens + ' tokens \u00B7 ' + stats.elapsed_s + 's';
+    meta.appendChild(badge);
+    meta.appendChild(detail);
+    inner.appendChild(meta);
+  }
+
+  if (role === 'user') {
+    wrapper.appendChild(inner);
+    wrapper.appendChild(avatar);
+  } else {
+    wrapper.appendChild(avatar);
+    wrapper.appendChild(inner);
+  }
+
+  chatEl.appendChild(wrapper);
+  chatEl.scrollTop = chatEl.scrollHeight;
+  return wrapper;
+}
+
+function appendThinking() {
+  emptyEl.style.display = 'none';
+  const div = document.createElement('div');
+  div.className = 'msg model';
+
+  const avatar = document.createElement('div');
+  avatar.className = 'avatar';
+  avatar.textContent = '\u2726';
+
+  const bubble = document.createElement('div');
+  bubble.className = 'bubble thinking-dots';
+  bubble.innerHTML = '<span></span><span></span><span></span>';
+
+  div.appendChild(avatar);
+  div.appendChild(bubble);
+  chatEl.appendChild(div);
+  chatEl.scrollTop = chatEl.scrollHeight;
+  return div;
+}
+
+// ── Send ──────────────────────────────────────────────────────────────────────
+async function sendMessage() {
+  if (busy) return;
+  const text = promptEl.value.trim();
+  if (!text) return;
+
+  if (!sessionId) await newSession();
+
+  appendMsg('user', text);
+  promptEl.value = '';
+  promptEl.style.height = 'auto';
+
+  const thinkingEl = appendThinking();
+  setBusy(true);
+
+  try {
+    const res = await fetch('/chat/' + sessionId, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ prompt: text }),
+    });
+    if (!res.ok) {
+      const err = await res.json();
+      throw new Error(err.detail || res.statusText);
+    }
+    const data = await res.json();
+    thinkingEl.remove();
+    appendMsg('model', data.response, {
+      tokens_per_sec: data.tokens_per_sec,
+      tokens: data.tokens,
+      elapsed_s: data.elapsed_s,
+    });
+    setStatus('ready', 'ready \u00B7 last: ' + data.tokens_per_sec + ' tok/s');
+  } catch(e) {
+    thinkingEl.remove();
+    appendMsg('model', 'Loi: ' + e.message);
+    setStatus('', 'error');
+  } finally {
+    setBusy(false);
+  }
+}
+
+function setBusy(val) {
+  busy = val;
+  sendBtn.disabled = val;
+  if (val) setStatus('thinking', 'thinking...');
+}
+
+// ── Init ──────────────────────────────────────────────────────────────────────
+(async () => { await newSession(); })();
+</script>
+</body>
+</html>