diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index ee2ddad6f63..22a655c5ff5 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -68,15 +68,6 @@ jobs:
           pip install -r requirements-dev.txt
           pip install -v -e .
 
-      - name: Run vllm-project/vllm-ascend test (non triton)
-        env:
-          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
-        if: ${{ inputs.type == 'full' }}
-        run: |
-          pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
-          pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
-
       - name: Install Ascend toolkit & triton_ascend
         shell: bash -l {0}
         run: |
@@ -94,6 +85,8 @@ jobs:
         run: |
           # pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
           # pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
+          pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
+          pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
           pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
           pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py::test_qwen_pooling_classify_correctness
 
diff --git a/vllm_ascend/ops/triton/activation/swiglu_quant.py b/vllm_ascend/ops/triton/activation/swiglu_quant.py
index d857b3d474c..7ec2cbaf36a 100644
--- a/vllm_ascend/ops/triton/activation/swiglu_quant.py
+++ b/vllm_ascend/ops/triton/activation/swiglu_quant.py
@@ -1,8 +1,5 @@
 import torch
-from vllm.triton_utils import HAS_TRITON, tl, triton
-
-if HAS_TRITON:
-    import torch_npu._inductor  # noqa: F401
+from vllm.triton_utils import tl, triton
 
 from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
 
diff --git a/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py b/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py
index 4129fdd0f21..d809dcd46dc 100644
--- a/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py
+++ b/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py
@@ -10,10 +10,7 @@
 # ruff: noqa: E501
 # mypy: ignore-errors
 import torch
-from vllm.triton_utils import HAS_TRITON, tl, triton
-
-if HAS_TRITON:
-    import torch_npu._inductor  # noqa: F401
+from vllm.triton_utils import tl, triton
 
 
 @triton.jit
diff --git a/vllm_ascend/ops/triton/rope.py b/vllm_ascend/ops/triton/rope.py
index a3856ca3687..3700e329130 100644
--- a/vllm_ascend/ops/triton/rope.py
+++ b/vllm_ascend/ops/triton/rope.py
@@ -14,10 +14,7 @@
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 #
-from vllm.triton_utils import HAS_TRITON, tl, triton
-
-if HAS_TRITON:
-    import torch_npu._inductor  # noqa: F401
+from vllm.triton_utils import tl, triton
 
 from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
 
diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py
index 303dae362e1..31053450a70 100644
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -88,6 +88,11 @@ def __init__(
         # register patch for vllm
         from vllm_ascend.utils import adapt_patch
         adapt_patch()
+        # Import _inductor for graph mode execution with triton
+        # This lazy import avoids torch_npu re-initialization in patch
+        from vllm.triton_utils import HAS_TRITON
+        if HAS_TRITON:
+            import torch_npu._inductor  # noqa: F401
         # Register ops when worker init.
         from vllm_ascend import ops
         ops.register_dummy_fusion_op()