vllm-project · yiz-liu · Jan 22, 2026 · Jan 8, 2026 · Jan 21, 2026
@@ -0,0 +1,54 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+from vllm_ascend.compilation.npugraph_ex_passes.utils.npugraph_ex_utils_check import \
+    extra_stream_scope_check
+
+
+def test_extra_stream_scope_check_logic():
+    """
+    Test the extra_stream_scope_check logic used in both fusion patterns.
+    This is a pure function test (copied logic for testability).
+    """
+
+    class MockNode:
+
+        def __init__(self, stream_label=None):
+            self.op = "call_function"
+            self.meta = {"stream_label": stream_label}
+
+    class MockMatch:
+
+        def __init__(self, nodes):
+            self.nodes = nodes
+
+    # Test 1: all default → OK
+    assert extra_stream_scope_check(
+        MockMatch([MockNode(None), MockNode(None)])) is True
+
+    # Test 2: same non-default → OK
+    assert extra_stream_scope_check(
+        MockMatch([MockNode("s1"), MockNode("s1")])) is True
+
+    # Test 3: mixed non-default → FAIL
+    assert extra_stream_scope_check(
+        MockMatch([MockNode("s1"), MockNode("s2")])) is False
+
+    # Test 4: default + non-default → FAIL
+    assert extra_stream_scope_check(
+        MockMatch([MockNode(None), MockNode("s1")])) is False
+
+    # Test 5: empty → OK
+    assert extra_stream_scope_check(MockMatch([])) is True
@@ -90,10 +90,6 @@ def npugraph_ex_compile(
         graph.recompile()
     import torchair
 
-    # TODO: use a better way to lazy register replacement, instead of import one by one
-    # As an example, we directly import here to register replacement.
-    # import vllm_ascend.compilation.npugraph_ex_passes.add_rms_norm_quant  # noqa
-
     torch.npu.set_compile_mode(jit_compile=False)
     config = torchair.CompilerConfig()
     # use aclgraph mode, avoid the transformation from fx graph to Ascend IR.

@@ -0,0 +1,51 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from torch import fx as fx
+from vllm.compilation.inductor_pass import get_pass_context
+from vllm.compilation.vllm_inductor_pass import VllmInductorPass
+from vllm.config import VllmConfig
+
+
+class NpuGraphEXPassManager:
+    """
+    A pass manager for npu_graph ex fusion passes.
+    It handles the configuration and execution of passes.
+    The counterpart in vllm is PostGradPassManager. Since torch_npu
+    does not support triton for now, we define our own pass manager.
+    """
+
+    def __init__(self):
+        self.passes: list[VllmInductorPass] = []
+
+    def __call__(self, graph: fx.Graph) -> fx.Graph:
+        compile_range = get_pass_context().compile_range
+
+        for pass_ in self.passes:
+            if pass_.is_applicable_for_range(compile_range):
+                pass_(graph)
+        graph.recompiler()
+        return graph
+
+    def add(self, pass_: VllmInductorPass):
+        assert isinstance(pass_, VllmInductorPass)
+        self.passes.append(pass_)
+
+    def configure(self, config: VllmConfig):
+        # By default, we enable the graph fusion and quantization fusion pass.
+        self.ascend_compilation_config: dict = config.additional_config.get("ascend_compilation_config", {})