From d2c3b92ebf1a460c81fe93509e0a95da2db25f0c Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 27 Feb 2026 13:42:23 -0800 Subject: [PATCH] clean unused cudagraph_batch_sizes Signed-off-by: Boyuan Feng --- vllm/v1/worker/gpu_model_runner.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f711d1d791bc..616fd677d822 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -590,15 +590,6 @@ def __init__( self.async_output_copy_stream = torch.cuda.Stream() self.prepare_inputs_event = torch.Event() - # self.cudagraph_batch_sizes sorts in ascending order. - if ( - self.compilation_config.cudagraph_capture_sizes - and self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE - ): - self.cudagraph_batch_sizes = sorted( - self.compilation_config.cudagraph_capture_sizes - ) - # Cache the device properties. self._init_device_properties() @@ -5637,10 +5628,6 @@ def _check_and_update_cudagraph_mode( self.compilation_config.adjust_cudagraph_sizes_for_spec_decode( self.uniform_decode_query_len, self.parallel_config.tensor_parallel_size ) - capture_sizes = self.compilation_config.cudagraph_capture_sizes - self.cudagraph_batch_sizes = ( - capture_sizes if capture_sizes is not None else [] - ) # Trigger cudagraph dispatching keys initialization after # resolved cudagraph mode.