vllm-project · BoyuanFeng · Mar 2, 2026 · Feb 27, 2026
@@ -590,15 +590,6 @@ def __init__(
             self.async_output_copy_stream = torch.cuda.Stream()
             self.prepare_inputs_event = torch.Event()
 
-        # self.cudagraph_batch_sizes sorts in ascending order.
-        if (
-            self.compilation_config.cudagraph_capture_sizes
-            and self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE
-        ):
-            self.cudagraph_batch_sizes = sorted(
-                self.compilation_config.cudagraph_capture_sizes
-            )
-
         # Cache the device properties.
         self._init_device_properties()
 
@@ -5637,10 +5628,6 @@ def _check_and_update_cudagraph_mode(
             self.compilation_config.adjust_cudagraph_sizes_for_spec_decode(
                 self.uniform_decode_query_len, self.parallel_config.tensor_parallel_size
             )
-            capture_sizes = self.compilation_config.cudagraph_capture_sizes
-            self.cudagraph_batch_sizes = (
-                capture_sizes if capture_sizes is not None else []
-            )
 
         # Trigger cudagraph dispatching keys initialization after
         # resolved cudagraph mode.