From 82a08d5eff9fa97df503778f318bee1f7941aa24 Mon Sep 17 00:00:00 2001 From: pandyamarut Date: Mon, 9 Dec 2024 12:54:16 -0800 Subject: [PATCH 1/6] fix errors at top level Signed-off-by: pandyamarut --- src/engine.py | 6 +++--- src/utils.py | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/engine.py b/src/engine.py index 48e8aac..56b7481 100644 --- a/src/engine.py +++ b/src/engine.py @@ -166,7 +166,7 @@ async def generate(self, openai_request: JobInput): async for response in self._handle_chat_or_completion_request(openai_request): yield response else: - yield create_error_response("Invalid route").model_dump() + yield {"error": create_error_response("Invalid route").model_dump()} async def _handle_model_request(self): models = await self.chat_engine.show_available_models() @@ -184,8 +184,9 @@ async def _handle_chat_or_completion_request(self, openai_request: JobInput): request = request_class( **openai_request.openai_input ) + print(request) except Exception as e: - yield create_error_response(str(e)).model_dump() + yield {"error": create_error_response(str(e))} return dummy_request = DummyRequest() @@ -219,4 +220,3 @@ async def _handle_chat_or_completion_request(self, openai_request: JobInput): if self.raw_openai_output: batch = "".join(batch) yield batch - \ No newline at end of file diff --git a/src/utils.py b/src/utils.py index bfc8ce9..39ff82c 100644 --- a/src/utils.py +++ b/src/utils.py @@ -74,10 +74,13 @@ def update(self): if self.is_dynamic: self.current_batch_size = min(self.current_batch_size*self.batch_size_growth_factor, self.max_batch_size) -def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse: - return ErrorResponse(message=message, - type=err_type, - code=status_code.value) +# def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse: +# return ErrorResponse(message=message, +# type=err_type, +# code=status_code.value) + +def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> dict: + return {"error": message} def get_int_bool_env(env_var: str, default: bool) -> bool: return int(os.getenv(env_var, int(default))) == 1 From 5f288f8f50b87575754d1673a80aac5025c7e99b Mon Sep 17 00:00:00 2001 From: pandyamarut Date: Mon, 9 Dec 2024 13:14:03 -0800 Subject: [PATCH 2/6] handle exception Signed-off-by: pandyamarut --- src/handler.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/handler.py b/src/handler.py index 176ec7e..f51a423 100644 --- a/src/handler.py +++ b/src/handler.py @@ -7,11 +7,15 @@ OpenAIvLLMEngine = OpenAIvLLMEngine(vllm_engine) async def handler(job): - job_input = JobInput(job["input"]) - engine = OpenAIvLLMEngine if job_input.openai_route else vllm_engine - results_generator = engine.generate(job_input) - async for batch in results_generator: - yield batch + try: + job_input = JobInput(job["input"]) + engine = OpenAIvLLMEngine if job_input.openai_route else vllm_engine + results_generator = engine.generate(job_input) + async for batch in results_generator: + yield batch + except Exception as e: + yield {"error": str(e)} + return runpod.serverless.start( { From d6d3de74ddfcba11e90d80e450bb873c68cd67e6 Mon Sep 17 00:00:00 2001 From: pandyamarut Date: Mon, 9 Dec 2024 13:19:45 -0800 Subject: [PATCH 3/6] revert util Signed-off-by: pandyamarut --- src/utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/utils.py b/src/utils.py index 39ff82c..78810fd 100644 --- a/src/utils.py +++ b/src/utils.py @@ -74,13 +74,11 @@ def update(self): if self.is_dynamic: self.current_batch_size = min(self.current_batch_size*self.batch_size_growth_factor, self.max_batch_size) -# def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse: -# return ErrorResponse(message=message, -# type=err_type, -# code=status_code.value) +def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse: + return ErrorResponse(message=message, + type=err_type, + code=status_code.value) -def create_error_response(message: str, err_type: str = "BadRequestError", status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> dict: - return {"error": message} def get_int_bool_env(env_var: str, default: bool) -> bool: return int(os.getenv(env_var, int(default))) == 1 From fbad8b53573a70cea2b1cd12b6837a4b77b3cc6c Mon Sep 17 00:00:00 2001 From: pandyamarut Date: Mon, 9 Dec 2024 14:05:08 -0800 Subject: [PATCH 4/6] handle engine initialisation error Signed-off-by: pandyamarut --- src/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine.py b/src/engine.py index 56b7481..f17f09d 100644 --- a/src/engine.py +++ b/src/engine.py @@ -113,7 +113,7 @@ def _initialize_llm(self): return engine except Exception as e: logging.error("Error initializing vLLM engine: %s", e) - raise e + raise RuntimeError({"error": str(e)}) class OpenAIvLLMEngine(vLLMEngine): From 4ee8595437eaad87f1a1b3afb0c185c31e225494 Mon Sep 17 00:00:00 2001 From: pandyamarut Date: Mon, 9 Dec 2024 14:31:58 -0800 Subject: [PATCH 5/6] fix Signed-off-by: pandyamarut --- src/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine.py b/src/engine.py index f17f09d..1605302 100644 --- a/src/engine.py +++ b/src/engine.py @@ -113,7 +113,7 @@ def _initialize_llm(self): return engine except Exception as e: logging.error("Error initializing vLLM engine: %s", e) - raise RuntimeError({"error": str(e)}) + raise class OpenAIvLLMEngine(vLLMEngine): From 4ec95fe6a115f50754921d56556e5fc6e52a2a21 Mon Sep 17 00:00:00 2001 From: pandyamarut Date: Mon, 9 Dec 2024 14:54:29 -0800 Subject: [PATCH 6/6] handle request failure Signed-off-by: pandyamarut --- src/handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/handler.py b/src/handler.py index f51a423..7a6bbdc 100644 --- a/src/handler.py +++ b/src/handler.py @@ -12,7 +12,11 @@ async def handler(job): engine = OpenAIvLLMEngine if job_input.openai_route else vllm_engine results_generator = engine.generate(job_input) async for batch in results_generator: - yield batch + # If there's any kind of error in the batch, format it + if isinstance(batch, dict) and 'error' in batch: + yield {"error": str(batch)} + else: + yield batch except Exception as e: yield {"error": str(e)} return