diff --git a/python/sglang/srt/entrypoints/openai/serving_base.py b/python/sglang/srt/entrypoints/openai/serving_base.py index 097e02f66e28..669aed7b0462 100644 --- a/python/sglang/srt/entrypoints/openai/serving_base.py +++ b/python/sglang/srt/entrypoints/openai/serving_base.py @@ -88,9 +88,6 @@ async def handle_request( """Handle the specific request type with common pattern If you want to override this method, you should be careful to record the validation time. """ - received_time = time.time() - received_time_perf = time.perf_counter() - try: # Validate request validation_start = time.perf_counter() @@ -106,12 +103,6 @@ async def handle_request( if hasattr(adapted_request, "validation_time"): adapted_request.validation_time = validation_time - if hasattr(adapted_request, "received_time"): - adapted_request.received_time = received_time - - if hasattr(adapted_request, "received_time_perf"): - adapted_request.received_time_perf = received_time_perf - # Note(Xinyuan): raw_request below is only used for detecting the connection of the client if hasattr(request, "stream") and request.stream: return await self._handle_streaming_request( diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index 9253aed09b6d..b4f9d3335924 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -204,12 +204,6 @@ class GenerateReqInput(BaseReq): # Validation step duration validation_time: Optional[float] = None - # For metrics - received_time: Optional[float] = None - - # Perf_counter equivalents for accurate time calculations - received_time_perf: Optional[float] = None - # For data parallel rank routing data_parallel_rank: Optional[int] = None diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 8278d65466f1..787de125728c 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -431,7 +431,7 @@ async def generate_request( obj: Union[GenerateReqInput, EmbeddingReqInput], request: Optional[fastapi.Request] = None, ): - created_time = obj.received_time if obj.received_time else time.time() + created_time = time.time() self.auto_create_handle_loop() obj.normalize_batch_and_arguments()