From 36fbdfef9e183e241d7a64935ae28f8d29c17328 Mon Sep 17 00:00:00 2001
From: Yuhong Guo <yuhong.gyh@antgroup.com>
Date: Mon, 21 Oct 2024 19:53:39 +0800
Subject: [PATCH 1/5] Support input request_id

---
 vllm/entrypoints/openai/protocol.py     | 1 +
 vllm/entrypoints/openai/serving_chat.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 06114339b7c6..950e75f10e3a 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -192,6 +192,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
     spaces_between_special_tokens: bool = True
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
     prompt_logprobs: Optional[int] = None
+    request_id: str = Field(default_factory=lambda: f"{random_uuid()}")
     # doc: end-chat-completion-sampling-params
 
     # doc: begin-chat-completion-extra-params
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index c3fa0e44e5e8..a5305d2a8638 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -176,7 +176,7 @@ async def create_chat_completion(
                 "\"auto\" tool choice requires "
                 "--enable-auto-tool-choice and --tool-call-parser to be set")
 
-        request_id = f"chat-{random_uuid()}"
+        request_id = f"chat-{request.request_id}" if request.request_id is not None else f"chat-{random_uuid()}"
 
         request_metadata = RequestResponseMetadata(request_id=request_id)
         if raw_request:

From a0aaea0bb02225ad071f8756219ebedee9abe75e Mon Sep 17 00:00:00 2001
From: Yuhong Guo <yuhong.gyh@antgroup.com>
Date: Mon, 21 Oct 2024 20:18:37 +0800
Subject: [PATCH 2/5] python lint

---
 vllm/entrypoints/openai/serving_chat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index a5305d2a8638..5439c7a02871 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -176,7 +176,11 @@ async def create_chat_completion(
                 "\"auto\" tool choice requires "
                 "--enable-auto-tool-choice and --tool-call-parser to be set")
 
-        request_id = f"chat-{request.request_id}" if request.request_id is not None else f"chat-{random_uuid()}"
+        request_id = (
+            f"chat-{request.request_id}"
+            if request.request_id is not None
+            else f"chat-{random_uuid()}"
+        )
 
         request_metadata = RequestResponseMetadata(request_id=request_id)
         if raw_request:

From df8dfe2ea7e2b321d0f166429402973cc0c3a8bd Mon Sep 17 00:00:00 2001
From: Yuhong Guo <yuhong.gyh@antgroup.com>
Date: Mon, 21 Oct 2024 20:23:34 +0800
Subject: [PATCH 3/5] python lint

---
 vllm/entrypoints/openai/serving_chat.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 5439c7a02871..c1cf5abc95e8 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -176,11 +176,8 @@ async def create_chat_completion(
                 "\"auto\" tool choice requires "
                 "--enable-auto-tool-choice and --tool-call-parser to be set")
 
-        request_id = (
-            f"chat-{request.request_id}"
-            if request.request_id is not None
-            else f"chat-{random_uuid()}"
-        )
+        request_id = (f"chat-{request.request_id}" if request.request_id
+                      is not None else f"chat-{random_uuid()}")
 
         request_metadata = RequestResponseMetadata(request_id=request_id)
         if raw_request:

From d1644d01c8aa3e6185f63ecd1b118e1b13b5bf6c Mon Sep 17 00:00:00 2001
From: Yuhong Guo <yuhong.gyh@antgroup.com>
Date: Tue, 22 Oct 2024 23:30:31 +0800
Subject: [PATCH 4/5] Address comments

---
 vllm/entrypoints/openai/protocol.py     | 7 ++++++-
 vllm/entrypoints/openai/serving_chat.py | 3 +--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 950e75f10e3a..733decf80a71 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -192,7 +192,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
     spaces_between_special_tokens: bool = True
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
     prompt_logprobs: Optional[int] = None
-    request_id: str = Field(default_factory=lambda: f"{random_uuid()}")
     # doc: end-chat-completion-sampling-params
 
     # doc: begin-chat-completion-extra-params
@@ -285,6 +284,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
             "The priority of the request (lower means earlier handling; "
             "default: 0). Any priority other than 0 will raise an error "
             "if the served model does not use priority scheduling."))
+    request_id: str = Field(
+        default_factory=lambda: f"{random_uuid()}",
+        description=(
+            "The request_id related to this request. If the caller does "
+            "not set it, a random_uuid will be generated. This id is used "
+            "through out the inference process and return in response."))
 
     # doc: end-chat-completion-extra-params
 
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index c1cf5abc95e8..a79d56c5ad47 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -176,8 +176,7 @@ async def create_chat_completion(
                 "\"auto\" tool choice requires "
                 "--enable-auto-tool-choice and --tool-call-parser to be set")
 
-        request_id = (f"chat-{request.request_id}" if request.request_id
-                      is not None else f"chat-{random_uuid()}")
+        request_id = f"chat-{request.request_id}"
 
         request_metadata = RequestResponseMetadata(request_id=request_id)
         if raw_request:

From 8ac9aa307d3e773bc8956c13721cc008388d47c1 Mon Sep 17 00:00:00 2001
From: Yuhong Guo <yuhong.gyh@antgroup.com>
Date: Tue, 22 Oct 2024 23:34:28 +0800
Subject: [PATCH 5/5] python lint

---
 vllm/entrypoints/openai/serving_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index a79d56c5ad47..b9b240b64850 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -38,7 +38,7 @@
 from vllm.tracing import (contains_trace_headers, extract_trace_headers,
                           log_tracing_disabled_warning)
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils import iterate_with_cancellation, random_uuid
+from vllm.utils import iterate_with_cancellation
 
 logger = init_logger(__name__)