37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
import litert_lm
|
|
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel
|
|
from contextlib import asynccontextmanager
|
|
|
|
MODEL_PATH = "gemma-4-E2B-it.litertlm"
|
|
|
|
class PromptRequest(BaseModel):
|
|
prompt: str
|
|
|
|
ml_models = {}
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
engine = litert_lm.Engine(MODEL_PATH, backend=litert_lm.Backend.CPU)
|
|
ml_models["engine"] = engine
|
|
yield
|
|
del ml_models["engine"]
|
|
|
|
app = FastAPI(lifespan=lifespan)
|
|
|
|
@app.post("/generate")
|
|
async def generate_text(request: PromptRequest):
|
|
engine = ml_models.get("engine")
|
|
if not engine:
|
|
raise HTTPException(status_code=503, detail="Model engine not initialized")
|
|
try:
|
|
# Không dùng "with", tạo conversation trực tiếp
|
|
conversation = engine.create_conversation()
|
|
result = conversation.send_message(request.prompt)
|
|
return {"response": result["content"][0]["text"]}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=8000) |