From a275fbd29dbbeb12fc0c6ac56e6e1ac0d03db5a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 17:45:00 +0000 Subject: [PATCH 1/4] Initial plan From 544ec01ebb7243458a3839e263552dc1d88a8056 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 17:48:28 +0000 Subject: [PATCH 2/4] Rename compile_ranges_split_points to compile_ranges_endpoints Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com> --- tests/compile/fusions_e2e/conftest.py | 6 +++--- tests/compile/test_compile_ranges.py | 6 +++--- vllm/compilation/backends.py | 4 ++-- vllm/config/compilation.py | 10 +++++----- vllm/config/vllm.py | 20 ++++++++++---------- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/compile/fusions_e2e/conftest.py b/tests/compile/fusions_e2e/conftest.py index d083b6f14e4b..29eb8425183c 100644 --- a/tests/compile/fusions_e2e/conftest.py +++ b/tests/compile/fusions_e2e/conftest.py @@ -46,10 +46,10 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg generated_text = output.outputs[0].text print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - # Get the compile ranges split points after vllm config post init + # Get the compile ranges endpoints after vllm config post init # in order to compute compile ranges correctly - compilation_config.compile_ranges_split_points = ( - llm.llm_engine.vllm_config.compilation_config.compile_ranges_split_points + compilation_config.compile_ranges_endpoints = ( + llm.llm_engine.vllm_config.compilation_config.compile_ranges_endpoints ) diff --git a/tests/compile/test_compile_ranges.py b/tests/compile/test_compile_ranges.py index c90454ed0e95..52aa1578dc35 100644 --- a/tests/compile/test_compile_ranges.py +++ b/tests/compile/test_compile_ranges.py @@ -85,7 +85,7 @@ def test_compile_ranges(use_fresh_inductor_cache): ), compilation_config=CompilationConfig( mode=CompilationMode.VLLM_COMPILE, - compile_ranges_split_points=[8, 32], + compile_ranges_endpoints=[8, 32], compile_sizes=[16, 64, 128], inductor_compile_config={ "post_grad_custom_post_pass": post_grad_range_checker, @@ -109,7 +109,7 @@ def test_compile_ranges(use_fresh_inductor_cache): def test_compile_config_get_compile_ranges(): compilation_config = CompilationConfig( - compile_ranges_split_points=[8, 32], + compile_ranges_endpoints=[8, 32], ) VllmConfig( scheduler_config=SchedulerConfig( @@ -148,7 +148,7 @@ def create_vllm_config(): scheduler_config=scheduler_config, compilation_config=CompilationConfig( mode=CompilationMode.VLLM_COMPILE, - compile_ranges_split_points=[8], + compile_ranges_endpoints=[8], inductor_compile_config={ "post_grad_custom_post_pass": post_grad_range_checker, }, diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 09fd1f75091e..4764a6ad158e 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -834,8 +834,8 @@ def list_to_str(lst: list | None) -> str: "splitting_ops": list_to_str(cc.splitting_ops), "cudagraph_mode": str(cc.cudagraph_mode), "compile_sizes": list_to_str(cc.compile_sizes), - "compile_ranges_split_points": list_to_str( - cc.compile_ranges_split_points + "compile_ranges_endpoints": list_to_str( + cc.compile_ranges_endpoints ), "use_inductor_graph_partition": cc.use_inductor_graph_partition, "inductor_passes": list_to_str(list(cc.inductor_passes.keys())), diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 9cc2cbb49e45..68ff8d0e7be0 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -363,8 +363,8 @@ class CompilationConfig: [vllm.config.CompilationConfig.cudagraph_copy_inputs] - Inductor compilation: - [`compile_sizes`][vllm.config.CompilationConfig.compile_sizes] - - [`compile_ranges_split_points`] - [vllm.config.CompilationConfig.compile_ranges_split_points] + - [`compile_ranges_endpoints`] + [vllm.config.CompilationConfig.compile_ranges_endpoints] - [`inductor_compile_config`] [vllm.config.CompilationConfig.inductor_compile_config] - [`inductor_passes`][vllm.config.CompilationConfig.inductor_passes] @@ -480,7 +480,7 @@ class CompilationConfig: to integers, it also supports "cudagraph_capture_sizes" to specify the sizes for cudagraph capture.""" - compile_ranges_split_points: list[int] | None = None + compile_ranges_endpoints: list[int] | None = None """Split points that represent compile ranges for inductor. The compile ranges are [1, split_points[0]], @@ -1244,9 +1244,9 @@ def adjust_cudagraph_sizes_for_mamba_cache( def get_compile_ranges(self) -> list[Range]: """Get the compile ranges for the compilation config.""" - if self.compile_ranges_split_points is None: + if self.compile_ranges_endpoints is None: return [] - split_points = sorted(set(self.compile_ranges_split_points)) + split_points = sorted(set(self.compile_ranges_endpoints)) return [ Range(start=s + 1, end=e) for s, e in zip([0] + split_points[:-1], split_points) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index fd5e3b464159..b7b77eed398e 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -1487,12 +1487,12 @@ def _set_compile_ranges(self): Set the compile ranges for the compilation config. """ compilation_config = self.compilation_config - computed_compile_ranges_split_points = [] + computed_compile_ranges_endpoints = [] # The upper bound of the compile ranges is the max_num_batched_tokens. compile_range_end = self.scheduler_config.max_num_batched_tokens if compile_range_end is not None: - computed_compile_ranges_split_points.append(compile_range_end) + computed_compile_ranges_endpoints.append(compile_range_end) # Add the compile ranges for flashinfer if compilation_config.pass_config.fuse_allreduce_rms: @@ -1504,7 +1504,7 @@ def _set_compile_ranges(self): * self.model_config.dtype.itemsize ) if compile_range_end is not None and max_token_num < compile_range_end: - computed_compile_ranges_split_points.append(max_token_num) + computed_compile_ranges_endpoints.append(max_token_num) else: logger.debug( "Max num batched tokens below allreduce-rms fusion threshold, " @@ -1539,7 +1539,7 @@ def _set_compile_ranges(self): # Add split point at min_token_num - 1 to ensure SP applies # starting from min_token_num # This creates ranges: [1, min-1] (no SP), [min, max] (SP applies) - computed_compile_ranges_split_points.append(min_token_num - 1) + computed_compile_ranges_endpoints.append(min_token_num - 1) if compilation_config.pass_config.fuse_rope_kvcache: max_token_num = ( @@ -1547,7 +1547,7 @@ def _set_compile_ranges(self): ) if max_token_num is not None: if compile_range_end is not None and max_token_num < compile_range_end: - computed_compile_ranges_split_points.append(max_token_num) + computed_compile_ranges_endpoints.append(max_token_num) else: logger.debug( "Max num batched tokens below rope+kvcache fusion threshold, " @@ -1555,14 +1555,14 @@ def _set_compile_ranges(self): compile_range_end, ) - if compilation_config.compile_ranges_split_points is not None: - for x in compilation_config.compile_ranges_split_points: + if compilation_config.compile_ranges_endpoints is not None: + for x in compilation_config.compile_ranges_endpoints: assert isinstance(x, int) assert x > 0, f"Invalid compile range split point: {x}" if compile_range_end is not None and x < compile_range_end and x > 1: - computed_compile_ranges_split_points.append(x) - compilation_config.compile_ranges_split_points = sorted( - computed_compile_ranges_split_points + computed_compile_ranges_endpoints.append(x) + compilation_config.compile_ranges_endpoints = sorted( + computed_compile_ranges_endpoints ) def try_verify_and_update_config(self): From 36c7f0598bf83b7323d48d9cf1d03aac9b148301 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:15:29 +0000 Subject: [PATCH 3/4] Fix remaining split_point references in comments and local variable Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com> --- vllm/config/compilation.py | 12 ++++++------ vllm/config/vllm.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 68ff8d0e7be0..1db7800c5f72 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -481,11 +481,11 @@ class CompilationConfig: specify the sizes for cudagraph capture.""" compile_ranges_endpoints: list[int] | None = None - """Split points that represent compile ranges for inductor. + """Endpoints for Inductor compile ranges. The compile ranges are - [1, split_points[0]], - [split_points[0] + 1, split_points[1]], ..., - [split_points[-1] + 1, max_num_batched_tokens]. + [1, endpoints[0]], + [endpoints[0] + 1, endpoints[1]], ..., + [endpoints[-1] + 1, max_num_batched_tokens]. Compile sizes are also used single element ranges, the range is represented as [compile_sizes[i], compile_sizes[i]]. @@ -1246,8 +1246,8 @@ def get_compile_ranges(self) -> list[Range]: """Get the compile ranges for the compilation config.""" if self.compile_ranges_endpoints is None: return [] - split_points = sorted(set(self.compile_ranges_endpoints)) + endpoints = sorted(set(self.compile_ranges_endpoints)) return [ Range(start=s + 1, end=e) - for s, e in zip([0] + split_points[:-1], split_points) + for s, e in zip([0] + endpoints[:-1], endpoints) ] diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index b7b77eed398e..82a8f17a5db8 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -1536,7 +1536,7 @@ def _set_compile_ranges(self): and min_token_num < max_num_batched_tokens and min_token_num > 1 ): - # Add split point at min_token_num - 1 to ensure SP applies + # Add endpoint at min_token_num - 1 to ensure SP applies # starting from min_token_num # This creates ranges: [1, min-1] (no SP), [min, max] (SP applies) computed_compile_ranges_endpoints.append(min_token_num - 1) @@ -1558,7 +1558,7 @@ def _set_compile_ranges(self): if compilation_config.compile_ranges_endpoints is not None: for x in compilation_config.compile_ranges_endpoints: assert isinstance(x, int) - assert x > 0, f"Invalid compile range split point: {x}" + assert x > 0, f"Invalid compile range endpoint: {x}" if compile_range_end is not None and x < compile_range_end and x > 1: computed_compile_ranges_endpoints.append(x) compilation_config.compile_ranges_endpoints = sorted( From 0c761fbd9f94e3a44085ed0b4c31991aed0ddabf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Govedi=C4=8D?= Date: Fri, 6 Mar 2026 08:07:41 -0500 Subject: [PATCH 4/4] Fix precommit comprehension syntax in compilation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luka Govedič --- vllm/config/compilation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 1db7800c5f72..f37e206ff4b2 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -1248,6 +1248,5 @@ def get_compile_ranges(self) -> list[Range]: return [] endpoints = sorted(set(self.compile_ranges_endpoints)) return [ - Range(start=s + 1, end=e) - for s, e in zip([0] + endpoints[:-1], endpoints) + Range(start=s + 1, end=e) for s, e in zip([0] + endpoints[:-1], endpoints) ]