diff --git a/.github/workflows/dockerfiles/Dockerfile.lint b/.github/workflows/dockerfiles/Dockerfile.lint
index 10dbcaefabc..24fbaab9497 100644
--- a/.github/workflows/dockerfiles/Dockerfile.lint
+++ b/.github/workflows/dockerfiles/Dockerfile.lint
@@ -27,7 +27,7 @@ RUN apt-get update -y && \
 
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
 # For lint purpose, actually we need make a main2main matching.
-ARG VLLM_COMMIT=5af684c31912232e5c89484c2e8259e0fac6c55b
+ARG VLLM_COMMIT=6f786f2c506cb07f4566771fdc62e640e2c4a176
 RUN git init /vllm-workspace/vllm && \
     git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
     git -C /vllm-workspace/vllm checkout FETCH_HEAD
diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml
index 894c8f3e589..8e6b51fd7bf 100644
--- a/.github/workflows/pr_test_full.yaml
+++ b/.github/workflows/pr_test_full.yaml
@@ -80,7 +80,7 @@ jobs:
     name: e2e-full
     strategy:
       matrix:
-        vllm_version: [5af684c31912232e5c89484c2e8259e0fac6c55b, v0.19.0]
+        vllm_version: [6f786f2c506cb07f4566771fdc62e640e2c4a176, v0.19.0]
     needs: [changes]
     if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }}
     uses: ./.github/workflows/_e2e_test.yaml
diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml
index afc717a9091..767c3adcc7e 100644
--- a/.github/workflows/pr_test_light.yaml
+++ b/.github/workflows/pr_test_light.yaml
@@ -41,7 +41,7 @@ jobs:
   lint:
     uses: ./.github/workflows/_pre_commit.yml
     with:
-      vllm: 5af684c31912232e5c89484c2e8259e0fac6c55b
+      vllm: 6f786f2c506cb07f4566771fdc62e640e2c4a176
   changes:
     runs-on: linux-aarch64-a2b3-0
     outputs:
@@ -92,7 +92,7 @@ jobs:
     if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
     strategy:
       matrix:
-        vllm_version: [5af684c31912232e5c89484c2e8259e0fac6c55b, v0.19.0]
+        vllm_version: [6f786f2c506cb07f4566771fdc62e640e2c4a176, v0.19.0]
     uses: ./.github/workflows/_unit_test.yaml
     with:
       vllm: ${{ matrix.vllm_version }}
@@ -104,7 +104,7 @@ jobs:
     name: e2e-light
     strategy:
       matrix:
-        vllm_version: [5af684c31912232e5c89484c2e8259e0fac6c55b, v0.19.0]
+        vllm_version: [6f786f2c506cb07f4566771fdc62e640e2c4a176, v0.19.0]
     # Note (yikun): If CI resource are limited we can split job into two chain jobs
     needs: [lint, changes]
     # only trigger e2e test after lint passed and the change is e2e related with pull request.
diff --git a/.github/workflows/schedule_test_benchmarks.yaml b/.github/workflows/schedule_test_benchmarks.yaml
deleted file mode 100644
index f729208fb6b..00000000000
--- a/.github/workflows/schedule_test_benchmarks.yaml
+++ /dev/null
@@ -1,203 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# This file is a part of the vllm-ascend project.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-name: Performance Schedule Test
-# This workflow runs nightly benchmarks for vllm-ascend.
-
-on:
-  schedule:
-    # Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8)
-    - cron: "0 12 * * *"
-    - cron: "0 19 * * *"
-
-  workflow_dispatch:
-    # Allow manual triggering of the workflow
-
-  pull_request:
-    types: [ labeled ]
-
-# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
-# declared as "shell: bash -el {0}" on steps that need to be properly activated.
-# It's used to activate ascend-toolkit environment variables.
-defaults:
-  run:
-    shell: bash -el {0}
-
-# only 1 job can runs on static-8-01-cards
-concurrency:
-  group: static-8-01-cards
-  cancel-in-progress: false
-
-jobs:
-  test:
-    if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
-
-    name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
-    runs-on: 'linux-arm64-npu-static-8'
-    strategy:
-      matrix:
-        include:
-          - vllm_branch: v0.19.0
-            vllm_ascend_branch: main
-      max-parallel: 1
-    container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11
-      volumes:
-        - /usr/local/dcmi:/usr/local/dcmi
-        - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
-        - /usr/local/Ascend/driver/:/usr/local/Ascend/driver/
-        # Use self-host cache speed up pip and model download
-        - /home/action/.cache:/github/home/.cache/
-      options: >-
-        --device /dev/davinci0
-        --device /dev/davinci1
-        --device /dev/davinci_manager
-        --device /dev/devmm_svm
-        --device /dev/hisi_hdc
-      env:
-        VLLM_USE_MODELSCOPE: True
-        ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
-        ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
-    steps:
-      - name: Check npu and CANN info
-        run: |
-          npu-smi info
-          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
-
-      - name: Config mirrors
-        run: |
-          # keep using tuna's proxy since linux-arm64-npu-static-8 is in another region
-          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
-          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
-
-      - name: Install system dependencies
-        run: |
-          apt-get update -y
-          apt-get -y install git jq wget curl lsof gcc g++ cmake libnuma-dev
-
-      - name: Config git
-        run: |
-          git config --global --add safe.directory "$GITHUB_WORKSPACE"
-          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
-
-      - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-
-      - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v6
-        with:
-          repository: vllm-project/vllm
-          path: ./vllm-empty
-          ref: ${{  matrix.vllm_branch }}
-
-      - name: Install vllm-project/vllm from source
-        working-directory: ./vllm-empty
-        run: |
-          VLLM_TARGET_DEVICE=empty pip install -e .
-
-      - name: Install vllm-project/vllm-ascend
-        env:
-          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
-        run: |
-          pip install -e .
-          pip install -r benchmarks/requirements-bench.txt
-
-      - name: Run current commit benchmarks
-        if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
-        run: |
-          # Sometimes we only want to run benchmarks on the current commit
-          # This is useful for debugging or a release benchmark
-          bash benchmarks/scripts/run-performance-benchmarks.sh
-          # Convert the benchmark results to markdown format
-          python3 benchmarks/scripts/convert_json_to_markdown.py
-
-      - name: Generate step summary
-        if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
-        run: |
-          cat ./benchmarks/results/benchmark_results.md >> "$GITHUB_STEP_SUMMARY"
-
-      - name: Upload benchmark artifacts
-        if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'
-        uses: actions/upload-artifact@v7
-        with:
-          name: "benchmark-performance-${{ matrix.vllm_branch }}-${{ matrix.vllm_ascend_branch }}-report"
-          path: ./benchmarks/results/benchmark_results.md
-          if-no-files-found: warn
-          retention-days: 90
-          overwrite: true
-
-      - name: Install elastic_tool
-        if: github.event_name != 'pull_request'
-        run: |
-          pip install escli-tool==0.2.3
-
-      - name: Collect pr info from vllm-project/vllm-ascend
-        if: github.event_name != 'pull_request'
-        run: |
-          # Only get the pull request which may influences performance
-          git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' ':!benchmarks/*' > commit_log.txt
-          escli check commit_log.txt
-      
-      - name: Prepare benchmark script in advance
-        if: github.event_name != 'pull_request'
-        # This is for the benchmark iteration, which will change the benchmark scripts while checkouting each commit.
-        # We need ensure the benchmark scripts always available.
-        run: |
-          # Prepare the benchmark script in advance
-          mkdir -p /github/home/benchmarks
-          cp -r benchmarks/* /github/home/benchmarks/
-
-      - name: Run benchmark iteration
-        env:
-          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
-        if: github.event_name != 'pull_request'
-        run: |
-          while IFS= read -r line || [[ -n "$line" ]]; do
-            commit_id=${line%% *}
-            commit_title=${line#* }
-
-            git checkout "$commit_id"
-            commit_time=$(git show -s --format=%cd "$commit_id" --date=iso-strict)
-            commit_time_no_tz="${commit_time::19}"
-            pip install -e .
-
-            echo "------------------------"
-            echo "commit_id: $commit_id"
-            echo "commit_title: $commit_title"
-            echo "commit_time: $commit_time_no_tz"
-            echo "vllm branch: ${{ matrix.vllm_branch }}"
-            echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
-            echo "------------------------"
-
-            cd /github/home
-            ERROR_MSG=""
-            if ! bash benchmarks/scripts/run-performance-benchmarks.sh; then
-              ERROR_MSG="Benchmark failed to run"
-            fi
-            # send the result to es
-            escli add --vllm_branch "${{ matrix.vllm_branch }}" \
-            --vllm_ascend_branch "${{ matrix.vllm_ascend_branch }}" \
-            --commit_id "$commit_id" \
-            --commit_title "$commit_title" \
-            --created_at "$commit_time_no_tz" \
-            --res_dir ./benchmarks/results \
-            --error "$ERROR_MSG" \
-            rm -rf ./benchmarks/results
-            cd -
-          done < commit_log.txt
diff --git a/.github/workflows/schedule_update_estimated_time.yaml b/.github/workflows/schedule_update_estimated_time.yaml
index 01175419ffb..01bb5c87aff 100644
--- a/.github/workflows/schedule_update_estimated_time.yaml
+++ b/.github/workflows/schedule_update_estimated_time.yaml
@@ -23,7 +23,7 @@ jobs:
     name: e2e-test
     strategy:
       matrix:
-        vllm_version: [v0.18.0]
+        vllm_version: [v0.19.0]
         type: [full, light]
     uses: ./.github/workflows/_e2e_test.yaml
     with:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0950d4406bc..f01562dc68d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -80,7 +80,7 @@
     # CANN image tag
     "cann_image_tag": "8.5.1-910b-ubuntu22.04-py3.11",
     # vLLM commit hash for main branch
-    "main_vllm_commit": "5af684c31912232e5c89484c2e8259e0fac6c55b",
+    "main_vllm_commit": "6f786f2c506cb07f4566771fdc62e640e2c4a176",
     # vLLM tag for main branch
     "main_vllm_tag": "v0.19.0",
     # Python version for main branch
diff --git a/tests/e2e/singlecard/model_runner_v2/test_basic.py b/tests/e2e/singlecard/model_runner_v2/test_basic.py
index cd47bad20e5..59e03d4dd69 100644
--- a/tests/e2e/singlecard/model_runner_v2/test_basic.py
+++ b/tests/e2e/singlecard/model_runner_v2/test_basic.py
@@ -22,6 +22,7 @@
 from vllm import SamplingParams
 
 from tests.e2e.conftest import VllmRunner
+from vllm_ascend.utils import vllm_version_is
 
 MODELS = ["Qwen/Qwen3-0.6B"]
 
@@ -63,6 +64,7 @@ def test_qwen3_dense_eager_mode(
         runner.model.generate(prompts, sampling_params)
 
 
+@pytest.mark.skipif(vllm_version_is("0.19.0"), reason="no need to support model_runner for v0.19.0")
 @pytest.mark.parametrize("model", MAIN_MODELS)
 @pytest.mark.parametrize("eagle_model", EGALE_MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
diff --git a/vllm_ascend/patch/worker/patch_gdn_attn.py b/vllm_ascend/patch/worker/patch_gdn_attn.py
index 716b3af86fe..53181133dfd 100644
--- a/vllm_ascend/patch/worker/patch_gdn_attn.py
+++ b/vllm_ascend/patch/worker/patch_gdn_attn.py
@@ -23,6 +23,7 @@
     _validate_cu_seqlens,
     build_chunk_meta_device,
 )
+from vllm_ascend.utils import is_310p
 
 _GDN_CHUNK_SIZE = 64
 # Keep this aligned with solve_tril.LARGE_BLOCK_T in ops/triton/fla/solve_tril.py.
@@ -596,7 +597,7 @@ def _patched_build(
     return attn_metadata
 
 
-if not _IS_PATCHED:
+if not _IS_PATCHED and not is_310p():
     gdn_attn.GDNChunkedPrefillMetadata = GDNChunkedPrefillMetadata
     gdn_attn.GDNCausalConv1dHostMetadata = GDNCausalConv1dHostMetadata
     gdn_attn.GDNPrefillFallbackMeta = GDNPrefillFallbackMeta
diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py
index 10eb7622885..3ab2986c0ec 100644
--- a/vllm_ascend/quantization/modelslim_config.py
+++ b/vllm_ascend/quantization/modelslim_config.py
@@ -404,7 +404,7 @@ def from_config(cls, config: dict[str, Any]) -> "AscendModelSlimConfig":
         return cls(config)
 
     @classmethod
-    def override_quantization_method(cls, hf_quant_cfg, user_quant) -> str | None:
+    def override_quantization_method(cls, hf_quant_cfg, user_quant, hf_config: Any = None) -> str | None:
         if hf_quant_cfg is not None:
             quant_method = hf_quant_cfg.get("quant_method", None)
             if not quant_method and torch.npu.is_available():
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index af57b6bdc97..2992033423a 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -2449,7 +2449,6 @@ def _dummy_run(
         if create_mixed_batch:
             raise NotImplementedError("create_mixed_batch is used for warmup deepgemm, vllm-ascend does not need it")
         elif uniform_decode:
-            assert not create_mixed_batch
             num_reqs = min(max_num_reqs, cdiv(num_tokens, max_query_len))
             num_scheduled_tokens_list = [max_query_len] * num_reqs
             if num_tokens % max_query_len != 0:
@@ -3473,12 +3472,20 @@ def _check_and_update_cudagraph_mode(
         with update_pass_config(self):
             super()._check_and_update_cudagraph_mode(attention_backends, kv_cache_groups)
 
+
+        capture_descs = self.cudagraph_dispatcher.get_capture_descs()
+        capture_sizes = sorted({
+            desc.num_tokens
+            for _, descs in capture_descs
+            for desc in descs
+        })
+
         # NOTE: Since aclgraph_batch_sizes cannot be determined until here,
         # we set the graph params right before initializing the keys.
         if self.use_aclgraph:
-            set_graph_params(self.cudagraph_batch_sizes)
+            set_graph_params(capture_sizes)
             if self.speculative_config:
-                set_draft_graph_params(self.cudagraph_batch_sizes)
+                set_draft_graph_params(capture_sizes)
 
     def capture_model(self) -> None:
         gpu_model_runner_cls = next((cls for cls in self.__class__.__mro__ if cls.__name__ == "GPUModelRunner"), None)
diff --git a/vllm_ascend/worker/v2/spec_decode/eagle/speculator.py b/vllm_ascend/worker/v2/spec_decode/eagle/speculator.py
index 62e3d0e66fc..05fda17b111 100644
--- a/vllm_ascend/worker/v2/spec_decode/eagle/speculator.py
+++ b/vllm_ascend/worker/v2/spec_decode/eagle/speculator.py
@@ -66,6 +66,7 @@ def propose(
         dummy_run: bool = False,
         skip_attn_for_dummy_run: bool = False,
         mm_inputs: tuple[list[torch.Tensor], torch.Tensor] | None = None,
+        is_profile: Any = None,
     ):
         """Override GPU EagleSpeculator.propose for Ascend NPUs,
         because npu attention metadata needs more information,
@@ -92,6 +93,7 @@ def propose(
                 dummy_run,
                 skip_attn_for_dummy_run,
                 mm_inputs,
+                is_profile=is_profile,
             )
 
     def generate_draft(