From 36fbdfef9e183e241d7a64935ae28f8d29c17328 Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Mon, 21 Oct 2024 19:53:39 +0800 Subject: [PATCH 1/5] Support input request_id --- vllm/entrypoints/openai/protocol.py | 1 + vllm/entrypoints/openai/serving_chat.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 06114339b7c6..950e75f10e3a 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -192,6 +192,7 @@ class ChatCompletionRequest(OpenAIBaseModel): spaces_between_special_tokens: bool = True truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None prompt_logprobs: Optional[int] = None + request_id: str = Field(default_factory=lambda: f"{random_uuid()}") # doc: end-chat-completion-sampling-params # doc: begin-chat-completion-extra-params diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index c3fa0e44e5e8..a5305d2a8638 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -176,7 +176,7 @@ async def create_chat_completion( "\"auto\" tool choice requires " "--enable-auto-tool-choice and --tool-call-parser to be set") - request_id = f"chat-{random_uuid()}" + request_id = f"chat-{request.request_id}" if request.request_id is not None else f"chat-{random_uuid()}" request_metadata = RequestResponseMetadata(request_id=request_id) if raw_request: From a0aaea0bb02225ad071f8756219ebedee9abe75e Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Mon, 21 Oct 2024 20:18:37 +0800 Subject: [PATCH 2/5] python lint --- vllm/entrypoints/openai/serving_chat.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index a5305d2a8638..5439c7a02871 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -176,7 +176,11 @@ async def create_chat_completion( "\"auto\" tool choice requires " "--enable-auto-tool-choice and --tool-call-parser to be set") - request_id = f"chat-{request.request_id}" if request.request_id is not None else f"chat-{random_uuid()}" + request_id = ( + f"chat-{request.request_id}" + if request.request_id is not None + else f"chat-{random_uuid()}" + ) request_metadata = RequestResponseMetadata(request_id=request_id) if raw_request: From df8dfe2ea7e2b321d0f166429402973cc0c3a8bd Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Mon, 21 Oct 2024 20:23:34 +0800 Subject: [PATCH 3/5] python lint --- vllm/entrypoints/openai/serving_chat.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 5439c7a02871..c1cf5abc95e8 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -176,11 +176,8 @@ async def create_chat_completion( "\"auto\" tool choice requires " "--enable-auto-tool-choice and --tool-call-parser to be set") - request_id = ( - f"chat-{request.request_id}" - if request.request_id is not None - else f"chat-{random_uuid()}" - ) + request_id = (f"chat-{request.request_id}" if request.request_id + is not None else f"chat-{random_uuid()}") request_metadata = RequestResponseMetadata(request_id=request_id) if raw_request: From d1644d01c8aa3e6185f63ecd1b118e1b13b5bf6c Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Tue, 22 Oct 2024 23:30:31 +0800 Subject: [PATCH 4/5] Address comments --- vllm/entrypoints/openai/protocol.py | 7 ++++++- vllm/entrypoints/openai/serving_chat.py | 3 +-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 950e75f10e3a..733decf80a71 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -192,7 +192,6 @@ class ChatCompletionRequest(OpenAIBaseModel): spaces_between_special_tokens: bool = True truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None prompt_logprobs: Optional[int] = None - request_id: str = Field(default_factory=lambda: f"{random_uuid()}") # doc: end-chat-completion-sampling-params # doc: begin-chat-completion-extra-params @@ -285,6 +284,12 @@ class ChatCompletionRequest(OpenAIBaseModel): "The priority of the request (lower means earlier handling; " "default: 0). Any priority other than 0 will raise an error " "if the served model does not use priority scheduling.")) + request_id: str = Field( + default_factory=lambda: f"{random_uuid()}", + description=( + "The request_id related to this request. If the caller does " + "not set it, a random_uuid will be generated. This id is used " + "through out the inference process and return in response.")) # doc: end-chat-completion-extra-params diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index c1cf5abc95e8..a79d56c5ad47 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -176,8 +176,7 @@ async def create_chat_completion( "\"auto\" tool choice requires " "--enable-auto-tool-choice and --tool-call-parser to be set") - request_id = (f"chat-{request.request_id}" if request.request_id - is not None else f"chat-{random_uuid()}") + request_id = f"chat-{request.request_id}" request_metadata = RequestResponseMetadata(request_id=request_id) if raw_request: From 8ac9aa307d3e773bc8956c13721cc008388d47c1 Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Tue, 22 Oct 2024 23:34:28 +0800 Subject: [PATCH 5/5] python lint --- vllm/entrypoints/openai/serving_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index a79d56c5ad47..b9b240b64850 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -38,7 +38,7 @@ from vllm.tracing import (contains_trace_headers, extract_trace_headers, log_tracing_disabled_warning) from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer -from vllm.utils import iterate_with_cancellation, random_uuid +from vllm.utils import iterate_with_cancellation logger = init_logger(__name__)