diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml
index c3f15a90f13..879bc4efe44 100644
--- a/.github/workflows/pr_test_full.yaml
+++ b/.github/workflows/pr_test_full.yaml
@@ -80,7 +80,7 @@ jobs:
     name: e2e-full
     strategy:
       matrix:
-        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8, v0.19.1]
+        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
     needs: [changes]
     if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }}
     uses: ./.github/workflows/_e2e_test.yaml
@@ -102,7 +102,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        vllm_version: [v0.19.1]
+        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
     needs: [parse-trigger]
     if: ${{ needs.parse-trigger.outputs.allowed == 'true' }}
     uses: ./.github/workflows/_e2e_test.yaml
diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml
index bc51592122f..2f6678c126c 100644
--- a/.github/workflows/pr_test_light.yaml
+++ b/.github/workflows/pr_test_light.yaml
@@ -154,7 +154,7 @@ jobs:
     if: ${{ needs.lint.result == 'success' && needs.changes.outputs.has_tests == 'true' }}
     strategy:
       matrix:
-        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8, v0.19.1]
+        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
     uses: ./.github/workflows/_optional_smart_e2e.yaml
     with:
       vllm: ${{ matrix.vllm_version }}
@@ -164,7 +164,7 @@ jobs:
     name: e2e-light
     strategy:
       matrix:
-        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8, v0.19.1]
+        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
     # Note (yikun): If CI resource are limited we can split job into two chain jobs
     needs: [lint, changes]
     # only trigger e2e test after lint passed and the change is e2e related with pull request.
diff --git a/.github/workflows/schedule_update_estimated_time.yaml b/.github/workflows/schedule_update_estimated_time.yaml
index 79586a891ec..b8a18b4c71b 100644
--- a/.github/workflows/schedule_update_estimated_time.yaml
+++ b/.github/workflows/schedule_update_estimated_time.yaml
@@ -23,7 +23,7 @@ jobs:
     name: e2e-test
     strategy:
       matrix:
-        vllm_version: [v0.19.1]
+        vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
         type: [full, light]
     uses: ./.github/workflows/_e2e_test.yaml
     with:
diff --git a/.github/workflows/schedule_vllm_e2e_test.yaml b/.github/workflows/schedule_vllm_e2e_test.yaml
index 8e610ff8b88..253aaf59df1 100644
--- a/.github/workflows/schedule_vllm_e2e_test.yaml
+++ b/.github/workflows/schedule_vllm_e2e_test.yaml
@@ -45,7 +45,7 @@ jobs:
           fail-fast: false
           matrix:
             part: [0, 1, 2, 3]
-            vllm: [v0.19.1]
+            vllm: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
         container:
           image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11
           env:
diff --git a/Dockerfile b/Dockerfile
index 473d7b084a5..221cafb89ba 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -48,8 +48,12 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.19.1
-RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+# ARG VLLM_TAG=v0.19.1
+# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
+RUN git init /vllm-workspace/vllm && \
+    git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
+    git -C /vllm-workspace/vllm checkout FETCH_HEAD
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
index 2362a579109..79000a1eb53 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -33,8 +33,12 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.19.1
-RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+# ARG VLLM_TAG=v0.19.1
+# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
+RUN git init /vllm-workspace/vllm && \
+    git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
+    git -C /vllm-workspace/vllm checkout FETCH_HEAD
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index 75865bc9f82..27ec4290229 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -32,8 +32,12 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.19.1
-RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+# ARG VLLM_TAG=v0.19.1
+# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
+RUN git init /vllm-workspace/vllm && \
+    git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
+    git -C /vllm-workspace/vllm checkout FETCH_HEAD
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index cda1c8d8b3f..eabf42a0874 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -50,8 +50,12 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.19.1
-RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+# ARG VLLM_TAG=v0.19.1
+# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
+RUN git init /vllm-workspace/vllm && \
+    git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
+    git -C /vllm-workspace/vllm checkout FETCH_HEAD
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index 0f30484a589..270a42672ca 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -49,8 +49,12 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.19.1
-RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+# ARG VLLM_TAG=v0.19.1
+# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
+RUN git init /vllm-workspace/vllm && \
+    git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
+    git -C /vllm-workspace/vllm checkout FETCH_HEAD
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index 47d0d2a79c0..ec5cabbe308 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -49,8 +49,12 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.19.1
-RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+# ARG VLLM_TAG=v0.19.1
+# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
+ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
+RUN git init /vllm-workspace/vllm && \
+    git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
+    git -C /vllm-workspace/vllm checkout FETCH_HEAD
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
diff --git a/docs/source/community/versioning_policy.md b/docs/source/community/versioning_policy.md
index 354ee1c1179..8f5bbb5b6be 100644
--- a/docs/source/community/versioning_policy.md
+++ b/docs/source/community/versioning_policy.md
@@ -62,7 +62,7 @@ For main branch of vLLM Ascend, we usually make it compatible with the latest vL
 
 | vLLM Ascend | vLLM         | Python           | Stable CANN | PyTorch/torch_npu  | Triton Ascend |
 |-------------|--------------|------------------|-------------|--------------------|---------------|
-|     main    | {{main_vllm_commit}}, {{main_vllm_tag}} | {{main_python_version}}   | {{main_cann_version}} | {{main_pytorch_torch_npu_version}} | {{main_triton_ascend_version}} |
+|     main    | {{main_vllm_commit}} | {{main_python_version}}   | {{main_cann_version}} | {{main_pytorch_torch_npu_version}} | {{main_triton_ascend_version}} |
 
 ## Release cadence
 
diff --git a/tests/e2e/singlecard/model_runner_v2/test_basic.py b/tests/e2e/singlecard/model_runner_v2/test_basic.py
index 034a5350df8..3edf1b4efc7 100644
--- a/tests/e2e/singlecard/model_runner_v2/test_basic.py
+++ b/tests/e2e/singlecard/model_runner_v2/test_basic.py
@@ -22,7 +22,6 @@
 from vllm import SamplingParams
 
 from tests.e2e.conftest import VllmRunner
-from vllm_ascend.utils import vllm_version_is
 
 MODELS = ["Qwen/Qwen3-0.6B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"]
 
@@ -30,7 +29,6 @@
 EGALE_MODELS = ["vllm-ascend/EAGLE-LLaMA3.1-Instruct-8B"]
 
 
-@pytest.mark.skipif(vllm_version_is("0.19.1"), reason="no need to support model_runner for v0.19.1")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
 @pytest.mark.parametrize("enforce_eager", [True])
@@ -65,7 +63,6 @@ def test_qwen3_dense_eager_mode(
         runner.model.generate(prompts, sampling_params)
 
 
-@pytest.mark.skipif(vllm_version_is("0.19.1"), reason="no need to support model_runner for v0.19.1")
 @pytest.mark.parametrize("model", MAIN_MODELS)
 @pytest.mark.parametrize("eagle_model", EGALE_MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
@@ -104,7 +101,6 @@ def test_egale_spec_decoding(
         runner.model.generate(prompts, sampling_params)
 
 
-@pytest.mark.skipif(vllm_version_is("0.19.1"), reason="no need to support model_runner for v0.19.1")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
 @pytest.mark.parametrize("enforce_eager", [False])
diff --git a/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py b/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py
index 806b20fa26a..2203a8e054c 100644
--- a/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py
+++ b/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py
@@ -625,11 +625,6 @@ def test_dflash_acceptance(
     method: str,
     num_speculative_tokens: int,
 ):
-    from vllm_ascend.utils import vllm_version_is
-
-    if vllm_version_is("0.19.1"):
-        pytest.skip("Dflash tests are not supported on vLLM version 0.19.1")
-
     main_model_name = DFLASH[method]["main"]
     spec_model_name = DFLASH[method]["spec"]
 
diff --git a/tests/ut/ops/test_mla.py b/tests/ut/ops/test_mla.py
index f3e553d8aa5..a79d582b411 100644
--- a/tests/ut/ops/test_mla.py
+++ b/tests/ut/ops/test_mla.py
@@ -8,7 +8,6 @@
 
 from tests.ut.base import TestBase
 from vllm_ascend.ops.mla import AscendMultiHeadLatentAttention, IndexerWrapper
-from vllm_ascend.utils import vllm_version_is
 
 
 class TestIndexerWrapper(TestBase):
@@ -19,11 +18,7 @@ def test_initialization(self):
         mock_indexer.topk_tokens = 2048
         mock_indexer.q_lora_rank = 1536
         mock_indexer.wq_b = nn.Linear(128, 128)
-        if vllm_version_is("0.19.1"):
-            mock_indexer.wk = nn.Linear(128, 128)
-            mock_indexer.weights_proj = nn.Linear(128, 128)
-        else:
-            mock_indexer.wk_weights_proj = nn.Linear(128, 128)
+        mock_indexer.wk_weights_proj = nn.Linear(128, 128)
         mock_indexer.k_norm = nn.LayerNorm(128)
         mock_indexer.softmax_scale = 0.123
         mock_indexer.topk_indices_buffer = torch.randn(10)
@@ -36,11 +31,7 @@ def test_initialization(self):
         self.assertEqual(wrapper.topk_tokens, 2048)
         self.assertEqual(wrapper.q_lora_rank, 1536)
         self.assertIs(wrapper.wq_b, mock_indexer.wq_b)
-        if vllm_version_is("0.19.1"):
-            self.assertIs(wrapper.wk, mock_indexer.wk)
-            self.assertIs(wrapper.weights_proj, mock_indexer.weights_proj)
-        else:
-            self.assertIs(wrapper.wk_weights_proj, mock_indexer.wk_weights_proj)
+        self.assertIs(wrapper.wk_weights_proj, mock_indexer.wk_weights_proj)
         self.assertIs(wrapper.k_norm, mock_indexer.k_norm)
         self.assertEqual(wrapper.softmax_scale, 0.123)
 
diff --git a/vllm_ascend/_310p/fused_moe/fused_moe.py b/vllm_ascend/_310p/fused_moe/fused_moe.py
index 224647beb29..7c81d6a7336 100644
--- a/vllm_ascend/_310p/fused_moe/fused_moe.py
+++ b/vllm_ascend/_310p/fused_moe/fused_moe.py
@@ -27,7 +27,6 @@
 from vllm_ascend.ops.fused_moe.moe_comm_method import FusedExpertsResult, _MoECommMethods
 from vllm_ascend.ops.fused_moe.moe_runtime_args import build_fused_experts_input
 from vllm_ascend.quantization.quant_type import QuantType
-from vllm_ascend.utils import vllm_version_is
 
 from .experts_selector import select_experts
 from .moe_comm_method import AllGatherCommImpl310
@@ -164,14 +163,13 @@ def __init__(self, *args, **kwargs):
 
         from vllm_ascend.ops.fused_moe.fused_moe import AscendMoERunner
 
-        is_legacy = vllm_version_is("0.19.1")
         self.runner = AscendMoERunner(
-            self if is_legacy else self.layer_name,
+            self.layer_name,
             self.moe_config,
             self.router,
             self._routed_input_transform,
-            self.gate if is_legacy else kwargs.pop("gate", None),
-            self.shared_experts if is_legacy else kwargs.pop("shared_experts", None),
+            kwargs.pop("gate", None),
+            kwargs.pop("shared_experts", None),
             self.quant_method,
             self.reduce_results,
             self.vllm_config.parallel_config.enable_dbo,
@@ -285,9 +283,8 @@ def __init__(
         # which at this point is still the stale runner built with shared_experts=None.
         from vllm_ascend.ops.fused_moe.fused_moe import AscendMoERunner
 
-        is_legacy = vllm_version_is("0.19.1")
         self.runner = AscendMoERunner(
-            self if is_legacy else self.layer_name,
+            self.layer_name,
             self.moe_config,
             self.router,
             self._routed_input_transform,
diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py
index ceea3511bfe..0b446a87a9c 100644
--- a/vllm_ascend/ascend_forward_context.py
+++ b/vllm_ascend/ascend_forward_context.py
@@ -20,7 +20,6 @@
     is_drafter_moe_model,
     is_moe_model,
     speculative_enable_dispatch_gmm_combine_decode,
-    vllm_version_is,
 )
 
 
@@ -156,10 +155,7 @@ def set_ascend_forward_context(
         dp_world_size = get_dp_group().world_size
         if dp_world_size > 1 and forward_context.dp_metadata is not None:
             dp_meta = forward_context.dp_metadata
-            if vllm_version_is("0.19.1"):
-                max_tokens_across_dp = dp_meta.max_tokens_across_dp_cpu.item()
-            else:
-                max_tokens_across_dp = dp_meta.num_tokens_across_dp_cpu.max().item()
+            max_tokens_across_dp = dp_meta.num_tokens_across_dp_cpu.max().item()
             if forward_context.flash_comm_v1_enabled or forward_context.flashcomm_v2_enabled:
                 padded_length = (max_tokens_across_dp + tp_world_size - 1) // tp_world_size * tp_world_size
                 pad_size = padded_length - num_tokens
diff --git a/vllm_ascend/attention/context_parallel/sfa_cp.py b/vllm_ascend/attention/context_parallel/sfa_cp.py
index 840bbae7893..83568a07acd 100644
--- a/vllm_ascend/attention/context_parallel/sfa_cp.py
+++ b/vllm_ascend/attention/context_parallel/sfa_cp.py
@@ -12,7 +12,6 @@
 from vllm_ascend.attention.sfa_v1 import AscendSFAImpl, AscendSFAMetadata, AscendSFAMetadataBuilder
 from vllm_ascend.attention.utils import AscendCommonAttentionMetadata, enabling_mlapo, split_decodes_and_prefills
 from vllm_ascend.ops.triton.rope import rope_forward_triton_siso
-from vllm_ascend.utils import vllm_version_is
 
 M = TypeVar("M", bound=AscendSFAMetadata)
 
@@ -414,12 +413,8 @@ def indexer_select_post_process(
         actual_seq_lengths_query: torch.Tensor,
         actual_seq_lengths_key: torch.Tensor,
     ):
-        if vllm_version_is("0.19.1"):
-            weights, _ = self.weights_proj(x)
-        else:
-            kw, _ = self.wk_weights_proj(x)
-            weights = kw[:, self.head_dim :]
-
+        kw, _ = self.wk_weights_proj(x)
+        weights = kw[:, self.head_dim :]
         q_li, _ = self.wq_b(q_c)  # [b,s,1536] @ [1536,64*128] = [b,s,64*128]
         q_li = q_li.view(-1, self.n_head, self.head_dim)  # [n_toks,64,128]
         if HAS_TRITON:
diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py
index 7b1da4fb18b..5506ae377b9 100644
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -55,7 +55,6 @@
     enable_dsa_cp_with_o_proj_tp,
     get_weight_prefetch_method,
     maybe_trans_nz,
-    vllm_version_is,
 )
 from vllm_ascend.worker.npu_input_batch import NPUInputBatch
 
@@ -439,11 +438,7 @@ def __init__(
         self.n_head: int = self.indexer.n_head  # 64
         self.head_dim: int = self.indexer.head_dim  # 128
         self.wq_b = self.indexer.wq_b
-        if vllm_version_is("0.19.1"):
-            self.wk = self.indexer.wk
-            self.weights_proj = self.indexer.weights_proj
-        else:
-            self.wk_weights_proj = self.indexer.wk_weights_proj
+        self.wk_weights_proj = self.indexer.wk_weights_proj
         self.k_norm = self.indexer.k_norm
         self.cp_size = 1
         self.is_rope_neox_style = True
@@ -912,11 +907,8 @@ def indexer_select_pre_process(
         cos: torch.Tensor,
         sin: torch.Tensor,
     ):
-        if vllm_version_is("0.19.1"):
-            k_li, _ = self.wk(x)  # [b,s,7168] @ [7168,128] = [b,s,128]
-        else:
-            kw, _ = self.wk_weights_proj(x)
-            k_li = kw[:, : self.head_dim]
+        kw, _ = self.wk_weights_proj(x)
+        k_li = kw[:, : self.head_dim]
         k_li = self.k_norm(k_li).unsqueeze(1)
         k_li = k_li.view(-1, 1, self.head_dim)
 
@@ -961,12 +953,8 @@ def indexer_select_post_process(
         actual_seq_lengths_query: torch.Tensor,
         actual_seq_lengths_key: torch.Tensor,
     ):
-        if vllm_version_is("0.19.1"):
-            weights, _ = self.weights_proj(x)
-        else:
-            kw, _ = self.wk_weights_proj(x)
-            weights = kw[:, self.head_dim :]
-
+        kw, _ = self.wk_weights_proj(x)
+        weights = kw[:, self.head_dim :]
         q_li, _ = self.wq_b(q_c)  # [b,s,1536] @ [1536,64*128] = [b,s,64*128]
         q_li = q_li.view(-1, self.n_head, self.head_dim)  # [n_toks,64,128]
         if HAS_TRITON:
diff --git a/vllm_ascend/core/recompute_scheduler.py b/vllm_ascend/core/recompute_scheduler.py
index ae6d1e669af..8cb23e82b4e 100644
--- a/vllm_ascend/core/recompute_scheduler.py
+++ b/vllm_ascend/core/recompute_scheduler.py
@@ -46,8 +46,6 @@
 from vllm.v1.spec_decode.metrics import SpecDecodingStats
 from vllm.v1.utils import ConstantList, record_function_or_nullcontext
 
-from vllm_ascend.utils import vllm_version_is
-
 
 # `spec_manager_map` in single_type_kv_cache_manager is a module-level dict
 # whose keys are class objects bound at import time.  When the async
@@ -209,11 +207,6 @@ def _update_waiting_for_remote_kv(self, request: Request) -> None:
             # Update the request state for scheduling.
             request.num_computed_tokens = num_computed_tokens
 
-            if vllm_version_is("0.19.1"):
-                # Count the number of prefix cached tokens.
-                if request.num_cached_tokens < 0:
-                    request.num_cached_tokens = request.num_computed_tokens
-
         self.finished_recving_kv_req_ids.remove(request.request_id)
 
     def schedule(self) -> RecomputeSchedulerOutput:
@@ -500,11 +493,7 @@ def schedule(self) -> RecomputeSchedulerOutput:
                             request_queue.pop_request()
                             step_skipped_waiting.prepend_request(request)
                             continue
-
-                        if vllm_version_is("0.19.1"):
-                            request.num_external_computed_tokens = ext_tokens
                         num_external_computed_tokens = ext_tokens
-
                         connector_prefix_cache_queries = request.num_tokens - num_new_local_computed_tokens
                         connector_prefix_cache_hits = num_external_computed_tokens
 
@@ -512,7 +501,7 @@ def schedule(self) -> RecomputeSchedulerOutput:
                     num_computed_tokens = num_new_local_computed_tokens + num_external_computed_tokens
                     assert num_computed_tokens <= request.num_tokens
 
-                    if not vllm_version_is("0.19.1") and request.prefill_stats is not None:
+                    if request.prefill_stats is not None:
                         request.prefill_stats.set(
                             num_prompt_tokens=request.num_prompt_tokens,
                             num_local_cached_tokens=num_new_local_computed_tokens,
@@ -691,10 +680,6 @@ def schedule(self) -> RecomputeSchedulerOutput:
                 token_budget -= num_new_tokens
                 request.status = RequestStatus.RUNNING
                 request.num_computed_tokens = num_computed_tokens
-                if vllm_version_is("0.19.1"):
-                    # Count the number of prefix cached tokens.
-                    if request.num_cached_tokens < 0:
-                        request.num_cached_tokens = num_computed_tokens
                 # Encoder-related.
                 if encoder_inputs_to_schedule:
                     scheduled_encoder_inputs[request_id] = encoder_inputs_to_schedule
@@ -956,11 +941,7 @@ def update_from_output(
             if new_token_ids or pooler_output is not None or kv_transfer_params or stopped:
                 # Add EngineCoreOutput for this Request.
                 prefill_kwargs: dict = {}
-                if not vllm_version_is("0.19.1"):
-                    prefill_kwargs["prefill_stats"] = request.take_prefill_stats()
-                else:
-                    prefill_kwargs["num_cached_tokens"] = request.num_cached_tokens
-                    prefill_kwargs["num_external_computed_tokens"] = request.num_external_computed_tokens
+                prefill_kwargs["prefill_stats"] = request.take_prefill_stats()
                 outputs[request.client_index].append(
                     EngineCoreOutput(
                         request_id=req_id,
@@ -994,8 +975,6 @@ def update_from_output(
             self.finish_requests(failed_kv_load_req_ids, RequestStatus.FINISHED_ERROR)
             for request in requests:
                 prefill_kwargs = {}
-                if vllm_version_is("0.19.1"):
-                    prefill_kwargs["num_cached_tokens"] = request.num_cached_tokens
                 outputs[request.client_index].append(
                     EngineCoreOutput(
                         request_id=request.request_id,
diff --git a/vllm_ascend/core/scheduler_dynamic_batch.py b/vllm_ascend/core/scheduler_dynamic_batch.py
index a055b2fde6d..1a9260d947d 100644
--- a/vllm_ascend/core/scheduler_dynamic_batch.py
+++ b/vllm_ascend/core/scheduler_dynamic_batch.py
@@ -31,8 +31,6 @@
 from vllm.v1.request import Request, RequestStatus
 from vllm.v1.structured_output import StructuredOutputManager
 
-from vllm_ascend.utils import vllm_version_is
-
 
 class BudgetRefiner:
     """This budget refiner can make dynamic adjustment to the token budget
@@ -491,10 +489,6 @@ def schedule(self) -> SchedulerOutput:
                 token_budget -= num_new_tokens
                 request.status = RequestStatus.RUNNING
                 request.num_computed_tokens = num_computed_tokens
-                if vllm_version_is("0.19.1"):
-                    # Count the number of prefix cached tokens.
-                    if request.num_cached_tokens < 0:
-                        request.num_cached_tokens = num_computed_tokens
                 # Encoder-related.
                 if encoder_inputs_to_schedule:
                     scheduled_encoder_inputs[request.request_id] = encoder_inputs_to_schedule
diff --git a/vllm_ascend/core/scheduler_profiling_chunk.py b/vllm_ascend/core/scheduler_profiling_chunk.py
index 02b891f74a4..df9766e20ca 100644
--- a/vllm_ascend/core/scheduler_profiling_chunk.py
+++ b/vllm_ascend/core/scheduler_profiling_chunk.py
@@ -41,7 +41,6 @@
 from vllm.v1.utils import record_function_or_nullcontext
 
 from vllm_ascend.core.profiling_chunk_predictor import ProfilingChunkManager
-from vllm_ascend.utils import vllm_version_is
 
 
 class ProfilingChunkScheduler(Scheduler):
@@ -482,9 +481,6 @@ def schedule(self) -> SchedulerOutput:  # noqa: C901
                             request_queue.pop_request()
                             step_skipped_waiting.prepend_request(request)
                             continue
-
-                        if vllm_version_is("0.19.1"):
-                            request.num_external_computed_tokens = ext_tokens
                         num_external_computed_tokens = ext_tokens
 
                         connector_prefix_cache_queries = request.num_tokens - num_new_local_computed_tokens
@@ -492,7 +488,7 @@ def schedule(self) -> SchedulerOutput:  # noqa: C901
 
                     num_computed_tokens = num_new_local_computed_tokens + num_external_computed_tokens
 
-                    if not vllm_version_is("0.19.1") and request.prefill_stats is not None:
+                    if request.prefill_stats is not None:
                         request.prefill_stats.set(
                             num_prompt_tokens=request.num_prompt_tokens,
                             num_local_cached_tokens=num_new_local_computed_tokens,
@@ -637,9 +633,6 @@ def schedule(self) -> SchedulerOutput:  # noqa: C901
                 time_budget -= self.profiling_chunk_manager.predict_time(num_new_tokens, request.num_computed_tokens)
                 request.status = RequestStatus.RUNNING
                 request.num_computed_tokens = num_computed_tokens
-                if vllm_version_is("0.19.1"):
-                    if request.num_cached_tokens < 0:
-                        request.num_cached_tokens = num_computed_tokens
                 if encoder_inputs_to_schedule:
                     scheduled_encoder_inputs[request_id] = encoder_inputs_to_schedule
                     for i in encoder_inputs_to_schedule:
diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py
index b0d7a946b33..8a4173fe9cf 100644
--- a/vllm_ascend/ops/fused_moe/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe/fused_moe.py
@@ -49,7 +49,6 @@
     npu_stream_switch,
     shared_expert_dp_enabled,
     shared_experts_calculation_stream,
-    vllm_version_is,
 )
 
 
@@ -357,14 +356,13 @@ def __init__(self, *args, **kwargs):
         setup_moe_comm_method(self.moe_config)
         self.quant_type = self._get_quant_type()
 
-        is_legacy = vllm_version_is("0.19.1")
         self.runner = AscendMoERunner(
-            self if is_legacy else self.layer_name,
+            self.layer_name,
             self.moe_config,
             self.router,
             self._routed_input_transform,
-            self.gate if is_legacy else kwargs.pop("gate", None),
-            self.shared_experts if is_legacy else kwargs.pop("shared_experts", None),
+            kwargs.pop("gate", None),
+            kwargs.pop("shared_experts", None),
             self.quant_method,
             self.reduce_results,
             self.vllm_config.parallel_config.enable_dbo,
@@ -583,9 +581,8 @@ def __init__(
         # NOTE: must use self._shared_experts here, not self.shared_experts —
         # FusedMoE.shared_experts is a property that reads self.runner.shared_experts,
         # which at this point is still the stale runner built with shared_experts=None.
-        is_legacy = vllm_version_is("0.19.1")
         self.runner = AscendMoERunner(
-            self if is_legacy else self.layer_name,
+            self.layer_name,
             self.moe_config,
             self.router,
             self._routed_input_transform,
diff --git a/vllm_ascend/ops/mla.py b/vllm_ascend/ops/mla.py
index 85a08dc194a..047b27eea9e 100644
--- a/vllm_ascend/ops/mla.py
+++ b/vllm_ascend/ops/mla.py
@@ -33,7 +33,7 @@
 
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.ascend_forward_context import _EXTRA_CTX
-from vllm_ascend.utils import is_vl_model, parse_layer_idx, vllm_version_is
+from vllm_ascend.utils import is_vl_model, parse_layer_idx
 
 
 class IndexerWrapper(nn.Module):
@@ -54,12 +54,7 @@ def __init__(self, vllm_indexer: nn.Module) -> None:
         self.topk_tokens: int = vllm_indexer.topk_tokens  # 2048
         self.q_lora_rank: int = vllm_indexer.q_lora_rank  # 1536
         self.wq_b = vllm_indexer.wq_b
-        # upstream ac3dac545 fused wk+weights_proj into wk_weights_proj
-        if vllm_version_is("0.19.1"):
-            self.wk = vllm_indexer.wk
-            self.weights_proj = vllm_indexer.weights_proj
-        else:
-            self.wk_weights_proj = vllm_indexer.wk_weights_proj
+        self.wk_weights_proj = vllm_indexer.wk_weights_proj
         self.k_norm = vllm_indexer.k_norm
         self.softmax_scale = vllm_indexer.softmax_scale
         vllm_indexer.topk_indices_buffer = None  # delete topk_indices_buffer
diff --git a/vllm_ascend/patch/platform/patch_balance_schedule.py b/vllm_ascend/patch/platform/patch_balance_schedule.py
index a590231abb8..5711352aff3 100644
--- a/vllm_ascend/patch/platform/patch_balance_schedule.py
+++ b/vllm_ascend/patch/platform/patch_balance_schedule.py
@@ -24,8 +24,6 @@
 from vllm.v1.structured_output import StructuredOutputManager
 from vllm.v1.utils import record_function_or_nullcontext
 
-from vllm_ascend.utils import vllm_version_is
-
 
 class BalanceScheduler(Scheduler):
     def __init__(
@@ -351,17 +349,14 @@ def schedule(self) -> SchedulerOutput:
                             skipped_waiting_requests.prepend_request(request)
                             continue
 
-                        if vllm_version_is("0.19.1"):
-                            request.num_external_computed_tokens = ext_tokens
                         num_external_computed_tokens = ext_tokens
-
                         connector_prefix_cache_queries = request.num_tokens - num_new_local_computed_tokens
                         connector_prefix_cache_hits = num_external_computed_tokens
 
                     # Total computed tokens (local + external).
                     num_computed_tokens = num_new_local_computed_tokens + num_external_computed_tokens
 
-                    if not vllm_version_is("0.19.1") and request.prefill_stats is not None:
+                    if request.prefill_stats is not None:
                         request.prefill_stats.set(
                             num_prompt_tokens=request.num_prompt_tokens,
                             num_local_cached_tokens=num_new_local_computed_tokens,
@@ -506,10 +501,6 @@ def schedule(self) -> SchedulerOutput:
                 token_budget -= num_new_tokens
                 request.status = RequestStatus.RUNNING
                 request.num_computed_tokens = num_computed_tokens
-                if vllm_version_is("0.19.1"):
-                    # Count the number of prefix cached tokens.
-                    if request.num_cached_tokens < 0:
-                        request.num_cached_tokens = num_computed_tokens
                 # Encoder-related.
                 if encoder_inputs_to_schedule:
                     scheduled_encoder_inputs[request_id] = encoder_inputs_to_schedule
diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py
index 5f27adcdc82..ff0c51d15df 100644
--- a/vllm_ascend/patch/worker/__init__.py
+++ b/vllm_ascend/patch/worker/__init__.py
@@ -17,7 +17,7 @@
 
 from vllm.triton_utils import HAS_TRITON
 
-from vllm_ascend.utils import is_310p, vllm_version_is
+from vllm_ascend.utils import is_310p
 
 if HAS_TRITON:
     import vllm_ascend.patch.worker.patch_triton
@@ -38,9 +38,8 @@
 if not is_310p():
     import vllm_ascend.patch.worker.patch_qwen3_5  # noqa
     import vllm_ascend.patch.worker.patch_gdn_attn  # noqa
+    import vllm_ascend.patch.worker.patch_qwen3_dflash  # noqa
 
-    if not vllm_version_is("0.19.1"):
-        import vllm_ascend.patch.worker.patch_qwen3_dflash  # noqa
 import vllm_ascend.patch.worker.patch_rejection_sampler  # noqa
 import vllm_ascend.patch.worker.patch_v2.patch_uva  # noqa
 import vllm_ascend.patch.worker.patch_huanyuan_vl  # noqa
diff --git a/vllm_ascend/patch/worker/patch_qwen3vl.py b/vllm_ascend/patch/worker/patch_qwen3vl.py
index 5ef8e97abc8..b5b77d8d1e5 100644
--- a/vllm_ascend/patch/worker/patch_qwen3vl.py
+++ b/vllm_ascend/patch/worker/patch_qwen3vl.py
@@ -5,11 +5,11 @@
 from vllm.model_executor.models.qwen3_vl import (
     Qwen3_VisionTransformer,
     Qwen3VLForConditionalGeneration,
+    pos_embed_interpolate_native,
 )
 
 from vllm_ascend.ascend_forward_context import _EXTRA_CTX
 from vllm_ascend.ops.rotary_embedding import AscendMRotaryEmbedding
-from vllm_ascend.utils import vllm_version_is
 
 
 def tensor_parallel_wrap(func):
@@ -73,24 +73,22 @@ def forward_with_split_qkv_rmsnorm_mrope(self, positions: torch.Tensor, hidden_s
     Qwen3VLForConditionalGeneration._get_deepstack_input_embeds
 )
 
-if not vllm_version_is("0.19.1"):
-    # Only patch for latest main
-    from vllm.model_executor.models.qwen3_vl import pos_embed_interpolate_native
 
-    def _fast_pos_embed_interpolate(self, grid_thw: list[list[int]]) -> torch.Tensor:
-        outputs = []
-        for t, h, w in grid_thw:
-            outputs.append(
-                pos_embed_interpolate_native(
-                    self.pos_embed.weight,
-                    t,
-                    h,
-                    w,
-                    self.num_grid_per_side,
-                    self.spatial_merge_size,
-                    self.dtype,
-                )
+def _fast_pos_embed_interpolate(self, grid_thw: list[list[int]]) -> torch.Tensor:
+    outputs = []
+    for t, h, w in grid_thw:
+        outputs.append(
+            pos_embed_interpolate_native(
+                self.pos_embed.weight,
+                t,
+                h,
+                w,
+                self.num_grid_per_side,
+                self.spatial_merge_size,
+                self.dtype,
             )
-        return torch.cat(outputs, dim=0)
+        )
+    return torch.cat(outputs, dim=0)
+
 
-    Qwen3_VisionTransformer.fast_pos_embed_interpolate = _fast_pos_embed_interpolate
+Qwen3_VisionTransformer.fast_pos_embed_interpolate = _fast_pos_embed_interpolate
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index 0ba64ee424c..08c33b9d530 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -48,7 +48,6 @@
     update_cudagraph_capture_sizes,
     is_310p,
     enable_sp,
-    vllm_version_is,
 )
 
 if TYPE_CHECKING:
@@ -757,10 +756,7 @@ def set_additional_forward_context(
             num_tokens = list(attn_metadata.values())[0].num_actual_tokens
         dp_world_size = get_dp_group().world_size
         if dp_world_size > 1 and dp_metadata is not None:
-            if vllm_version_is("0.19.1"):
-                max_tokens_across_dp = dp_metadata.max_tokens_across_dp_cpu.item()
-            else:
-                max_tokens_across_dp = dp_metadata.num_tokens_across_dp_cpu.max().item()
+            max_tokens_across_dp = dp_metadata.num_tokens_across_dp_cpu.max().item()
             if flash_comm_v1_enabled or flashcomm_v2_enabled:
                 padded_length = (max_tokens_across_dp + tp_world_size - 1) // tp_world_size * tp_world_size
                 pad_size = padded_length - num_tokens
diff --git a/vllm_ascend/spec_decode/__init__.py b/vllm_ascend/spec_decode/__init__.py
index 47929115690..9083e39587a 100644
--- a/vllm_ascend/spec_decode/__init__.py
+++ b/vllm_ascend/spec_decode/__init__.py
@@ -24,7 +24,6 @@
 from vllm_ascend.spec_decode.medusa_proposer import AscendMedusaProposer
 from vllm_ascend.spec_decode.ngram_proposer import AscendNgramProposer
 from vllm_ascend.spec_decode.suffix_proposer import AscendSuffixDecodingProposer
-from vllm_ascend.utils import vllm_version_is
 
 
 def get_spec_decode_method(method, vllm_config, device, runner):
@@ -37,10 +36,7 @@ def get_spec_decode_method(method, vllm_config, device, runner):
     elif method in ("eagle", "eagle3", "mtp"):
         return AscendEagleProposer(vllm_config, device, runner)
     elif method == "dflash":
-        if not vllm_version_is("0.19.1"):
-            return AscendDflashProposer(vllm_config, device, runner)
-        else:
-            raise ValueError(f"VLLM v0.19.1 doesn't support {method} now")
+        return AscendDflashProposer(vllm_config, device, runner)
     elif method == "draft_model":
         return AscendDraftModelProposer(vllm_config, device, runner)
     else:
diff --git a/vllm_ascend/spec_decode/eagle_proposer.py b/vllm_ascend/spec_decode/eagle_proposer.py
index 4a7385a537f..faff270b7a9 100644
--- a/vllm_ascend/spec_decode/eagle_proposer.py
+++ b/vllm_ascend/spec_decode/eagle_proposer.py
@@ -25,6 +25,7 @@
 from vllm.model_executor.models import supports_multimodal
 from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache
 from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
+from vllm.model_executor.models.qwen3_dflash import DFlashQwen3ForCausalLM
 from vllm.triton_utils import HAS_TRITON, triton
 from vllm.utils.math_utils import cdiv
 from vllm.utils.platform_utils import is_pin_memory_available
@@ -47,12 +48,7 @@
 from vllm_ascend.compilation.acl_graph import ACLGraphWrapper, update_full_graph_params
 from vllm_ascend.ops.triton.spec_decode.utils import prepare_inputs_padded_kernel
 from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
-from vllm_ascend.utils import enable_sp, lmhead_tp_enable, shared_expert_dp_enabled, vllm_version_is
-
-if not vllm_version_is("0.19.1"):
-    from vllm.model_executor.models.qwen3_dflash import DFlashQwen3ForCausalLM
-else:
-    DFlashQwen3ForCausalLM = None
+from vllm_ascend.utils import enable_sp, lmhead_tp_enable, shared_expert_dp_enabled
 
 # Currently we will fix block size to a small one since `num_reqs` can't be too large
 _PREPARE_INPUTS_BLOCK_SIZE = 4
diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py
index b1855f165e4..4a2f84ad798 100644
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -44,7 +44,10 @@
 from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
 from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput
 from vllm.v1.worker.gpu_worker import AsyncIntermediateTensors
-from vllm.v1.worker.worker_base import WorkerBase
+from vllm.v1.worker.worker_base import (
+    CompilationTimes,  # noqa: E402
+    WorkerBase,
+)
 from vllm.v1.worker.workspace import init_workspace_manager
 
 import vllm_ascend.envs as envs_ascend
@@ -60,13 +63,9 @@
     enable_sp,
     get_ascend_device_type,
     register_ascend_customop,
-    vllm_version_is,
 )
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
 
-if not vllm_version_is("0.19.1"):
-    from vllm.v1.worker.worker_base import CompilationTimes  # noqa: E402
-
 torch._dynamo.trace_rules.clear_lru_cache()  # noqa: E402
 from torch._dynamo.variables import TorchInGraphFunctionVariable  # noqa: E402
 from vllm.utils.torch_utils import set_random_seed  # noqa: E402
@@ -554,8 +553,6 @@ def compile_or_warm_up_model(self):
         # Reset the seed to ensure that the random state is not affected by
         # the model initialization and profiling.
         set_random_seed(self.model_config.seed)
-        if vllm_version_is("0.19.1"):
-            return self.vllm_config.compilation_config.compilation_time
 
         return CompilationTimes(
             language_model=self.vllm_config.compilation_config.compilation_time,