diff --git a/fastchat/serve/base_model_worker.py b/fastchat/serve/base_model_worker.py index 2fe322990..859ec125d 100644 --- a/fastchat/serve/base_model_worker.py +++ b/fastchat/serve/base_model_worker.py @@ -206,8 +206,10 @@ async def api_generate_stream(request: Request): async def api_generate(request: Request): params = await request.json() await acquire_worker_semaphore() - output = await asyncio.to_thread(worker.generate_gate, params) - release_worker_semaphore() + try: + output = await asyncio.to_thread(worker.generate_gate, params) + finally: + release_worker_semaphore() return JSONResponse(output) diff --git a/fastchat/serve/model_worker.py b/fastchat/serve/model_worker.py index 683a78556..9a777f383 100644 --- a/fastchat/serve/model_worker.py +++ b/fastchat/serve/model_worker.py @@ -142,6 +142,12 @@ def generate_stream_gate(self, params): "error_code": ErrorCode.INTERNAL_ERROR, } yield json.dumps(ret).encode() + b"\0" + except Exception as e: + ret = { + "text": f"{SERVER_ERROR_MSG}\n\n({e})", + "error_code": ErrorCode.INTERNAL_ERROR, + } + yield json.dumps(ret).encode() + b"\0" def generate_gate(self, params): for x in self.generate_stream_gate(params):