import litert_lm from fastapi import FastAPI, HTTPException from pydantic import BaseModel from contextlib import asynccontextmanager MODEL_PATH = "gemma-4-E2B-it.litertlm" class PromptRequest(BaseModel): prompt: str ml_models = {} @asynccontextmanager async def lifespan(app: FastAPI): engine = litert_lm.Engine(MODEL_PATH, backend=litert_lm.Backend.CPU) ml_models["engine"] = engine yield del ml_models["engine"] app = FastAPI(lifespan=lifespan) @app.post("/generate") async def generate_text(request: PromptRequest): engine = ml_models.get("engine") if not engine: raise HTTPException(status_code=503, detail="Model engine not initialized") try: # Không dùng "with", tạo conversation trực tiếp conversation = engine.create_conversation() result = conversation.send_message(request.prompt) return {"response": result["content"][0]["text"]} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)