From a275fbd29dbbeb12fc0c6ac56e6e1ac0d03db5a3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:45:00 +0000
Subject: [PATCH 1/4] Initial plan


From 544ec01ebb7243458a3839e263552dc1d88a8056 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:48:28 +0000
Subject: [PATCH 2/4] Rename compile_ranges_split_points to
 compile_ranges_endpoints

Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
---
 tests/compile/fusions_e2e/conftest.py |  6 +++---
 tests/compile/test_compile_ranges.py  |  6 +++---
 vllm/compilation/backends.py          |  4 ++--
 vllm/config/compilation.py            | 10 +++++-----
 vllm/config/vllm.py                   | 20 ++++++++++----------
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/tests/compile/fusions_e2e/conftest.py b/tests/compile/fusions_e2e/conftest.py
index d083b6f14e4b..29eb8425183c 100644
--- a/tests/compile/fusions_e2e/conftest.py
+++ b/tests/compile/fusions_e2e/conftest.py
@@ -46,10 +46,10 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
         generated_text = output.outputs[0].text
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 
-    # Get the compile ranges split points after vllm config post init
+    # Get the compile ranges endpoints after vllm config post init
     # in order to compute compile ranges correctly
-    compilation_config.compile_ranges_split_points = (
-        llm.llm_engine.vllm_config.compilation_config.compile_ranges_split_points
+    compilation_config.compile_ranges_endpoints = (
+        llm.llm_engine.vllm_config.compilation_config.compile_ranges_endpoints
     )
 
 
diff --git a/tests/compile/test_compile_ranges.py b/tests/compile/test_compile_ranges.py
index c90454ed0e95..52aa1578dc35 100644
--- a/tests/compile/test_compile_ranges.py
+++ b/tests/compile/test_compile_ranges.py
@@ -85,7 +85,7 @@ def test_compile_ranges(use_fresh_inductor_cache):
         ),
         compilation_config=CompilationConfig(
             mode=CompilationMode.VLLM_COMPILE,
-            compile_ranges_split_points=[8, 32],
+            compile_ranges_endpoints=[8, 32],
             compile_sizes=[16, 64, 128],
             inductor_compile_config={
                 "post_grad_custom_post_pass": post_grad_range_checker,
@@ -109,7 +109,7 @@ def test_compile_ranges(use_fresh_inductor_cache):
 
 def test_compile_config_get_compile_ranges():
     compilation_config = CompilationConfig(
-        compile_ranges_split_points=[8, 32],
+        compile_ranges_endpoints=[8, 32],
     )
     VllmConfig(
         scheduler_config=SchedulerConfig(
@@ -148,7 +148,7 @@ def create_vllm_config():
             scheduler_config=scheduler_config,
             compilation_config=CompilationConfig(
                 mode=CompilationMode.VLLM_COMPILE,
-                compile_ranges_split_points=[8],
+                compile_ranges_endpoints=[8],
                 inductor_compile_config={
                     "post_grad_custom_post_pass": post_grad_range_checker,
                 },
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index 09fd1f75091e..4764a6ad158e 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -834,8 +834,8 @@ def list_to_str(lst: list | None) -> str:
                     "splitting_ops": list_to_str(cc.splitting_ops),
                     "cudagraph_mode": str(cc.cudagraph_mode),
                     "compile_sizes": list_to_str(cc.compile_sizes),
-                    "compile_ranges_split_points": list_to_str(
-                        cc.compile_ranges_split_points
+                    "compile_ranges_endpoints": list_to_str(
+                        cc.compile_ranges_endpoints
                     ),
                     "use_inductor_graph_partition": cc.use_inductor_graph_partition,
                     "inductor_passes": list_to_str(list(cc.inductor_passes.keys())),
diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 9cc2cbb49e45..68ff8d0e7be0 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -363,8 +363,8 @@ class CompilationConfig:
         [vllm.config.CompilationConfig.cudagraph_copy_inputs]
     - Inductor compilation:
         - [`compile_sizes`][vllm.config.CompilationConfig.compile_sizes]
-        - [`compile_ranges_split_points`]
-            [vllm.config.CompilationConfig.compile_ranges_split_points]
+        - [`compile_ranges_endpoints`]
+            [vllm.config.CompilationConfig.compile_ranges_endpoints]
         - [`inductor_compile_config`]
         [vllm.config.CompilationConfig.inductor_compile_config]
         - [`inductor_passes`][vllm.config.CompilationConfig.inductor_passes]
@@ -480,7 +480,7 @@ class CompilationConfig:
     to integers, it also supports "cudagraph_capture_sizes" to
     specify the sizes for cudagraph capture."""
 
-    compile_ranges_split_points: list[int] | None = None
+    compile_ranges_endpoints: list[int] | None = None
     """Split points that represent compile ranges for inductor.
     The compile ranges are
     [1, split_points[0]],
@@ -1244,9 +1244,9 @@ def adjust_cudagraph_sizes_for_mamba_cache(
 
     def get_compile_ranges(self) -> list[Range]:
         """Get the compile ranges for the compilation config."""
-        if self.compile_ranges_split_points is None:
+        if self.compile_ranges_endpoints is None:
             return []
-        split_points = sorted(set(self.compile_ranges_split_points))
+        split_points = sorted(set(self.compile_ranges_endpoints))
         return [
             Range(start=s + 1, end=e)
             for s, e in zip([0] + split_points[:-1], split_points)
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index fd5e3b464159..b7b77eed398e 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -1487,12 +1487,12 @@ def _set_compile_ranges(self):
         Set the compile ranges for the compilation config.
         """
         compilation_config = self.compilation_config
-        computed_compile_ranges_split_points = []
+        computed_compile_ranges_endpoints = []
 
         # The upper bound of the compile ranges is the max_num_batched_tokens.
         compile_range_end = self.scheduler_config.max_num_batched_tokens
         if compile_range_end is not None:
-            computed_compile_ranges_split_points.append(compile_range_end)
+            computed_compile_ranges_endpoints.append(compile_range_end)
 
         # Add the compile ranges for flashinfer
         if compilation_config.pass_config.fuse_allreduce_rms:
@@ -1504,7 +1504,7 @@ def _set_compile_ranges(self):
                     * self.model_config.dtype.itemsize
                 )
                 if compile_range_end is not None and max_token_num < compile_range_end:
-                    computed_compile_ranges_split_points.append(max_token_num)
+                    computed_compile_ranges_endpoints.append(max_token_num)
                 else:
                     logger.debug(
                         "Max num batched tokens below allreduce-rms fusion threshold, "
@@ -1539,7 +1539,7 @@ def _set_compile_ranges(self):
                 # Add split point at min_token_num - 1 to ensure SP applies
                 # starting from min_token_num
                 # This creates ranges: [1, min-1] (no SP), [min, max] (SP applies)
-                computed_compile_ranges_split_points.append(min_token_num - 1)
+                computed_compile_ranges_endpoints.append(min_token_num - 1)
 
         if compilation_config.pass_config.fuse_rope_kvcache:
             max_token_num = (
@@ -1547,7 +1547,7 @@ def _set_compile_ranges(self):
             )
             if max_token_num is not None:
                 if compile_range_end is not None and max_token_num < compile_range_end:
-                    computed_compile_ranges_split_points.append(max_token_num)
+                    computed_compile_ranges_endpoints.append(max_token_num)
                 else:
                     logger.debug(
                         "Max num batched tokens below rope+kvcache fusion threshold, "
@@ -1555,14 +1555,14 @@ def _set_compile_ranges(self):
                         compile_range_end,
                     )
 
-        if compilation_config.compile_ranges_split_points is not None:
-            for x in compilation_config.compile_ranges_split_points:
+        if compilation_config.compile_ranges_endpoints is not None:
+            for x in compilation_config.compile_ranges_endpoints:
                 assert isinstance(x, int)
                 assert x > 0, f"Invalid compile range split point: {x}"
                 if compile_range_end is not None and x < compile_range_end and x > 1:
-                    computed_compile_ranges_split_points.append(x)
-        compilation_config.compile_ranges_split_points = sorted(
-            computed_compile_ranges_split_points
+                    computed_compile_ranges_endpoints.append(x)
+        compilation_config.compile_ranges_endpoints = sorted(
+            computed_compile_ranges_endpoints
         )
 
     def try_verify_and_update_config(self):

From 36c7f0598bf83b7323d48d9cf1d03aac9b148301 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 4 Mar 2026 18:15:29 +0000
Subject: [PATCH 3/4] Fix remaining split_point references in comments and
 local variable

Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
---
 vllm/config/compilation.py | 12 ++++++------
 vllm/config/vllm.py        |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 68ff8d0e7be0..1db7800c5f72 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -481,11 +481,11 @@ class CompilationConfig:
     specify the sizes for cudagraph capture."""
 
     compile_ranges_endpoints: list[int] | None = None
-    """Split points that represent compile ranges for inductor.
+    """Endpoints for Inductor compile ranges.
     The compile ranges are
-    [1, split_points[0]],
-    [split_points[0] + 1, split_points[1]], ...,
-    [split_points[-1] + 1, max_num_batched_tokens].
+    [1, endpoints[0]],
+    [endpoints[0] + 1, endpoints[1]], ...,
+    [endpoints[-1] + 1, max_num_batched_tokens].
     Compile sizes are also used single element ranges,
     the range is represented as [compile_sizes[i], compile_sizes[i]].
 
@@ -1246,8 +1246,8 @@ def get_compile_ranges(self) -> list[Range]:
         """Get the compile ranges for the compilation config."""
         if self.compile_ranges_endpoints is None:
             return []
-        split_points = sorted(set(self.compile_ranges_endpoints))
+        endpoints = sorted(set(self.compile_ranges_endpoints))
         return [
             Range(start=s + 1, end=e)
-            for s, e in zip([0] + split_points[:-1], split_points)
+            for s, e in zip([0] + endpoints[:-1], endpoints)
         ]
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index b7b77eed398e..82a8f17a5db8 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -1536,7 +1536,7 @@ def _set_compile_ranges(self):
                 and min_token_num < max_num_batched_tokens
                 and min_token_num > 1
             ):
-                # Add split point at min_token_num - 1 to ensure SP applies
+                # Add endpoint at min_token_num - 1 to ensure SP applies
                 # starting from min_token_num
                 # This creates ranges: [1, min-1] (no SP), [min, max] (SP applies)
                 computed_compile_ranges_endpoints.append(min_token_num - 1)
@@ -1558,7 +1558,7 @@ def _set_compile_ranges(self):
         if compilation_config.compile_ranges_endpoints is not None:
             for x in compilation_config.compile_ranges_endpoints:
                 assert isinstance(x, int)
-                assert x > 0, f"Invalid compile range split point: {x}"
+                assert x > 0, f"Invalid compile range endpoint: {x}"
                 if compile_range_end is not None and x < compile_range_end and x > 1:
                     computed_compile_ranges_endpoints.append(x)
         compilation_config.compile_ranges_endpoints = sorted(

From 0c761fbd9f94e3a44085ed0b4c31991aed0ddabf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luka=20Govedi=C4=8D?=
 <ProExpertProg@users.noreply.github.com>
Date: Fri, 6 Mar 2026 08:07:41 -0500
Subject: [PATCH 4/4] Fix precommit comprehension syntax in compilation.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
---
 vllm/config/compilation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 1db7800c5f72..f37e206ff4b2 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -1248,6 +1248,5 @@ def get_compile_ranges(self) -> list[Range]:
             return []
         endpoints = sorted(set(self.compile_ranges_endpoints))
         return [
-            Range(start=s + 1, end=e)
-            for s, e in zip([0] + endpoints[:-1], endpoints)
+            Range(start=s + 1, end=e) for s, e in zip([0] + endpoints[:-1], endpoints)
         ]