diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 088d0b1af757..b6015d52144d 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -422,7 +422,7 @@ class CompilationConfig: When `enable_lora` is False, this option has no effect. """ - use_inductor_graph_partition: bool = False + use_inductor_graph_partition: bool = is_torch_equal_or_newer("2.9.0") """Use inductor graph partition to split the graph at cudagraph_unsafe ops. This partition happens at inductor codegen time after all passes and fusions are finished. It generates a single `call` function which wraps