vllm-project · wangxiyuan · Feb 14, 2026 · Feb 13, 2026
@@ -44,11 +44,11 @@ jobs:
       # Each version should be tested
       fail-fast: false
       matrix:
-        vllm_verison: [releases-v0.13.0, releases-v0.13.0-openeuler, main, main-openeuler]
+        vllm_version: [releases-v0.13.0, releases-v0.13.0-openeuler, main, main-openeuler]
     name: vLLM Ascend test
     runs-on: linux-aarch64-a2b3-1
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_version }}
     steps:
       - name: Check NPU/CANN and git info
         run: |

@@ -146,10 +146,10 @@ jobs:
       # Each version should be tested
       fail-fast: false
       matrix:
-        vllm_verison: [releases-v0.13.0, releases-v0.13.0-openeuler, main, main-openeuler]
+        vllm_version: [releases-v0.13.0, releases-v0.13.0-openeuler, main, main-openeuler]
     runs-on: linux-aarch64-a2b3-1
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_version }}
     steps:
       - name: Check NPU/CANN and git info
         run: |
@@ -183,4 +183,4 @@ jobs:
 
           # Run real test
           echo "Test:"
-          /vllm-workspace/vllm-ascend/tests/e2e/run_doctests.sh
+          /vllm-workspace/vllm-ascend/tests/e2e/run_doctests.sh
@@ -1,16 +1,19 @@
 default_install_hook_types:
   - pre-commit
   - commit-msg
+
 default_stages:
   - pre-commit # Run locally
   - manual # Run in CI
+
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   rev: v0.14.0
   hooks:
   - id: ruff-check
     args: [--output-format, github, --fix]
   - id: ruff-format
+
 - repo: https://github.com/codespell-project/codespell
   rev: v2.4.1
   hooks:
@@ -22,6 +25,7 @@ repos:
       ]
       additional_dependencies:
         - tomli
+
 - repo: https://github.com/crate-ci/typos
   rev: v1.32.0
   hooks:
@@ -30,24 +34,28 @@ repos:
       "--force-exclude",
       "--exclude", "csrc/**"
     ]
+
 # - repo: https://github.com/pre-commit/mirrors-clang-format
 #   rev: v20.1.3
 #   hooks:
 #   - id: clang-format
 #     files: ^csrc/.*\.(cpp|hpp|cc|hh|cxx|hxx)$
 #     types_or: [c++]
 #     args: [--style=google, --verbose]
+
 - repo: https://github.com/igorshubovych/markdownlint-cli
   rev: v0.45.0
   hooks:
   - id: markdownlint
     exclude: '.*\.inc\.md$|.*report_template\.md$|.*contributors\.md$|.*PULL_REQUEST_TEMPLATE\.md$'
     stages: [manual] # Only run in CI
+
 - repo: https://github.com/rhysd/actionlint
   rev: v1.7.7
   hooks:
   - id: actionlint
     exclude: '.*\.github/workflows/scripts/.*\.ya?ml$'
+
 - repo: local
   hooks:
   - id: png-lint

@@ -25,5 +25,5 @@ msgid "Adding a New Multi-Modal Model"
 msgstr "添加新的多模态模型"
 
 #: ../../developer_guide/modeling/adding_a_new_multimodal_model.md:3
-msgid "**_Comming soon ..._**"
+msgid "**_Coming soon ..._**"
 msgstr "**_敬请期待 ..._**"
@@ -636,7 +636,7 @@ This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 - Fix incorrect req block length in ascend scheduler [#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)
 - Fix header include issue in rope [#2398](https://github.com/vllm-project/vllm-ascend/pull/2398)
 - Fix mtp config bug [#2412](https://github.com/vllm-project/vllm-ascend/pull/2412)
-- Fix error info and adapt `attn_metedata` refactor [#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)
+- Fix error info and adapt `attn_metadata` refactor [#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)
 - Fix torchair runtime error caused by configuration mismatches and `.kv_cache_bytes` file missing [#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)
 - Move `with_prefill` allreduce from cpu to npu [#2230](https://github.com/vllm-project/vllm-ascend/pull/2230)
 

@@ -178,7 +178,7 @@ def abort_prefiller_request(self, server_idx: int, request_id):  # Changed to sy
         # No lock needed - atomic operation
         self.prefillers[server_idx].aborted_requests.add(request_id)
 
-    def aquire_aborted_prefiller_requests(self, server_idx: int):  # Changed to synchronous
+    def acquire_aborted_prefiller_requests(self, server_idx: int):  # Changed to synchronous
         """
         Get the set of aborted requests and clear it.
         This is used to release kv cache in prefiller node.
@@ -325,7 +325,7 @@ async def send_request_to_service(
     max_retries: int = 3,
     base_delay: float = 0.2,
 ):
-    proxy_state.aquire_aborted_prefiller_requests(prefiller_id)
+    proxy_state.acquire_aborted_prefiller_requests(prefiller_id)
     req_data = req_data.copy()
     req_data["stream"] = False
     req_data["max_tokens"] = 1

@@ -241,7 +241,7 @@ def abort_prefiller_request(self, server_idx: int, request_id):  # Changed to sy
             return
         self.prefillers[server_idx].aborted_requests.add(request_id)
 
-    def aquire_aborted_prefiller_requests(self, server_idx: int):  # Changed to synchronous
+    def acquire_aborted_prefiller_requests(self, server_idx: int):  # Changed to synchronous
         """
         Get the set of aborted requests and clear it.
         This is used to release kv cache in prefiller node.
@@ -582,7 +582,7 @@ async def send_request_to_service(
     max_retries: int = 3,
     base_delay: float = 0.2,
 ):
-    aborted_requests = proxy_state.aquire_aborted_prefiller_requests(prefiller_id)
+    aborted_requests = proxy_state.acquire_aborted_prefiller_requests(prefiller_id)
     req_data = req_data.copy()
     req_data["kv_transfer_params"] = {
         "do_remote_decode": True,

@@ -59,7 +59,7 @@ def calculate_average(lst):
     return total / count
 
 
-def layer_imblance_polt(y_list, label_names, device_num, output_path, file_name):
+def layer_imbalance_plot(y_list, label_names, device_num, output_path, file_name):
     plt.rcParams["font.sans-serif"] = ["Arial"]
     plt.rcParams["axes.unicode_minus"] = False
     x = [i for i in range(58)]
@@ -160,4 +160,4 @@ def deepseek_deploy(workload, num_redundancy_expert, num_groups, num_nodes, num_
     save_matrix_to_json(output_path, file_name, np.array(global_deployment))
     label_names = ["default deployment max load", "balanced load max load", "balanced load avg load"]
     new_file_name = f"{exp_name}_{num_devices}_{num_redundancy_expert}.png"
-    layer_imblance_polt(y_list, label_names, num_devices, output_path, new_file_name)
+    layer_imbalance_plot(y_list, label_names, num_devices, output_path, new_file_name)
@@ -283,10 +283,10 @@ async def _select_instance(api: str, req_data: Any, request_length: int):
     request_id = await proxy_state.next_req_id()
     # Select dp server based on priority score
     server_idx = proxy_state.select_server(priority_score)
-    choosen_server = proxy_state.dp_servers[server_idx]
-    logger.debug(f"Choose server {choosen_server.url} to process request {request_id}")
+    chosen_server = proxy_state.dp_servers[server_idx]
+    logger.debug(f"Choose server {chosen_server.url} to process request {request_id}")
     return InstanceInfo(
-        request_id=request_id, server_idx=server_idx, priority_score=priority_score, server_state=choosen_server
+        request_id=request_id, server_idx=server_idx, priority_score=priority_score, server_state=chosen_server
     )
 
 

@@ -29,11 +29,11 @@ def parse_args():
 vllm_start_port = args.vllm_start_port
 
 
-def run_command(visiable_devices, dp_rank, vllm_engine_port):
+def run_command(visible_devices, dp_rank, vllm_engine_port):
     command = [
         "bash",
         "./run_dp_template.sh",
-        visiable_devices,
+        visible_devices,
         str(vllm_engine_port),
         str(dp_size),
         str(dp_rank),
@@ -55,8 +55,8 @@ def run_command(visiable_devices, dp_rank, vllm_engine_port):
     for i in range(dp_size_local):
         dp_rank = dp_rank_start + i
         vllm_engine_port = vllm_start_port + i
-        visiable_devices = ",".join(str(x) for x in range(i * tp_size, (i + 1) * tp_size))
-        process = multiprocessing.Process(target=run_command, args=(visiable_devices, dp_rank, vllm_engine_port))
+        visible_devices = ",".join(str(x) for x in range(i * tp_size, (i + 1) * tp_size))
+        process = multiprocessing.Process(target=run_command, args=(visible_devices, dp_rank, vllm_engine_port))
         processes.append(process)
         process.start()
 

@@ -198,7 +198,7 @@ def run(self):
         try:
             print("Running bash build_aclnn.sh ...")
             subprocess.check_call(["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
-            print("buid_aclnn.sh executed successfully!")
+            print("build_aclnn.sh executed successfully!")
         except subprocess.CalledProcessError as e:
             print(f"Error running build_aclnn.sh: {e}")
             raise SystemExit(e.returncode)

@@ -10,7 +10,7 @@
 # isort: off
 from tests.e2e.nightly.multi_node.scripts.utils import (
     CONFIG_BASE_PATH, DEFAULT_SERVER_PORT, get_all_ipv4, get_cluster_ips,
-    get_net_interface, setup_logger, get_avaliable_port)
+    get_net_interface, setup_logger, get_available_port)
 # isort: on
 setup_logger()
 logger = logging.getLogger(__name__)
@@ -202,7 +202,7 @@ def __init__(
         master_ip = (self.disagg_cfg.master_ip_for_node(
             self.cur_index, self.nodes)
                      if self.disagg_cfg else self.nodes[0].ip)
-        self.proxy_port = get_avaliable_port()
+        self.proxy_port = get_available_port()
 
         self.envs = DistEnvBuilder(
             cur_node=self.cur_node,

@@ -75,7 +75,7 @@ def get_cluster_ips(word_size: int = 2) -> list[str]:
     return [resolver(dns) for dns in get_cluster_dns_list(word_size)]
 
 
-def get_avaliable_port(start_port: int = 6000, end_port: int = 7000) -> int:
+def get_available_port(start_port: int = 6000, end_port: int = 7000) -> int:
     import socket
     for port in range(start_port, end_port):
         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:

@@ -11,7 +11,7 @@
 enable_custom_op()
 
 
-class TestDisptachFFNCombine:
+class TestDispatchFFNCombine:
 
     def __init__(self, rank, world_size, port):
         self.rank = rank
@@ -208,7 +208,7 @@ def generate_random_tensor(self, size, dtype):
 
 
 def worker(rank: int, world_size: int, port: int, q: mp.SimpleQueue):
-    op = TestDisptachFFNCombine(rank, world_size, port)
+    op = TestDispatchFFNCombine(rank, world_size, port)
     op.generate_hcom()
     out1 = op.run_tensor_list()
     q.put(out1)

@@ -11,7 +11,7 @@
 enable_custom_op()
 
 
-class TestDisptachFFNCombine:
+class TestDispatchFFNCombine:
 
     def __init__(self, rank, world_size, port):
         self.rank = rank
@@ -208,7 +208,7 @@ def generate_random_tensor(self, size, dtype):
 
 
 def worker(rank: int, world_size: int, port: int, q: mp.SimpleQueue):
-    op = TestDisptachFFNCombine(rank, world_size, port)
+    op = TestDispatchFFNCombine(rank, world_size, port)
     op.generate_hcom()
     out1 = op.run_tensor_list()
     q.put(out1)

@@ -124,10 +124,10 @@ def create_test_data(
 
     logits = torch.randn(num_reqs, vocab_size, device=device, dtype=dtype)
 
-    repetiton_penalty = torch.ones(num_reqs, device=device, dtype=torch.float32)
+    repetition_penalty = torch.ones(num_reqs, device=device, dtype=torch.float32)
     for i in range(num_reqs):
         if torch.rand(1) > 0.3:
-            repetiton_penalty[i] = torch.rand(1, device=device).item() * 0.8 + 0.6
+            repetition_penalty[i] = torch.rand(1, device=device).item() * 0.8 + 0.6
 
     frequency_penalty = torch.zeros(num_reqs, device=device, dtype=torch.float32)
     for i in range(num_reqs):
@@ -168,7 +168,7 @@ def create_test_data(
             output_bin_counts[state_idx, token] = count
 
     sampling_metadata = SamplingMetadata(
-        repetition_penalty=repetiton_penalty,
+        repetition_penalty=repetition_penalty,
         frequency_penalty=frequency_penalty,
         presence_penalty=presence_penalty,
         temperature=temperature,
@@ -217,4 +217,3 @@ def test_apply_penalties_and_temperature(
         atol = 1e-02
         rtol = 1e-02
     assert torch.allclose(logits_triton, logits_pytorch_result, atol=atol, rtol=rtol)
-
@@ -18,7 +18,7 @@ def setUpClass(cls):
         }
 
     support_combine = [(0, 0), (1, 0), (0, 1)]
-    unsupport_combine = [(0, 2), (2, 1), (1, 2)]
+    unsupported_combine = [(0, 2), (2, 1), (1, 2)]
 
     def test_cumsum_group_list_supported_conversion(self):
         for src_list_type, dst_list_type in self.support_combine:
@@ -38,7 +38,7 @@ def test_cumsum_group_list_invalid_type_valueerror(self):
 
     def test_cumsum_group_list_unsupported_conversion_notimplementederror(
             self):
-        for src_list_type, dst_list_type in self.unsupport_combine:
+        for src_list_type, dst_list_type in self.unsupported_combine:
             with self.subTest(src=src_list_type, dst=dst_list_type):
                 with self.assertRaises(NotImplementedError) as excinfo:
                     cumsum_group_list(self.glist_dict[0], src_list_type,

@@ -1,6 +1,6 @@
 [files]
 # these files may be written in non english words
-extend-exclude = []
+extend-exclude = [".pre-commit-config.yaml",]
 ignore-hidden = true
 ignore-files = true
 ignore-dot = true
@@ -17,9 +17,9 @@ ignore-hex = true
 identifier-leading-digits = false
 locale = "en"
 extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
-    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
+    ".*UE8M0.*", ".*(UE4M3|ue4m3).*", ".*eles.*", ".*fo.*", ".*ba.*",
     ".*ot.*", ".*[Tt]h[rR].*"]
-extend-ignore-words-re = ["CANN", "cann","ND","alog"]
+extend-ignore-words-re = ["CANN", "cann","ND","alog","nd","BA","datas","ful","udo",]
 extend-ignore-re = []
 
 [default.extend-identifiers]

@@ -144,14 +144,14 @@ def _construct_weight_prefetch_config(self, additional_config):
         if os.getenv("VLLM_ASCEND_ENABLE_PREFETCH_MLP", "0") == "1":
             MAX_PREFETCH_WEIGHT_SIZE: int = 18 * 1024 * 1024
             gate_up_prefetch_size = int(os.getenv("VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE", MAX_PREFETCH_WEIGHT_SIZE))
-            down_prefetch_szie = int(os.getenv("VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE", MAX_PREFETCH_WEIGHT_SIZE))
+            down_prefetch_size = int(os.getenv("VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE", MAX_PREFETCH_WEIGHT_SIZE))
             self.weight_prefetch_config.set_mlp_pre_version_compatibale_config(
-                gate_up_prefetch_size, down_prefetch_szie
+                gate_up_prefetch_size, down_prefetch_size
             )
             logger.info_once(
                 f"MLP weight prefetch enabled from env variable VLLM_ASCEND_ENABLE_PREFETCH_MLP."
                 f"gate_up_prefetch_size={gate_up_prefetch_size}, "
-                f"down_prefetch_szie={down_prefetch_szie}."
+                f"down_prefetch_size={down_prefetch_size}."
             )
             warnings.warn(
                 "VLLM_ASCEND_ENABLE_PREFETCH_MLP is deprecated and will be removed in a v0.16.0 version. "

@@ -34,13 +34,13 @@
     from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass
 
 # computation-communication tiling block is 512
-ALLREDUCE_NORM_FUSE_THREHOLD = 512
+ALLREDUCE_NORM_FUSE_THRESHOLD = 512
 
 
 def get_compile_range_and_extra_stream_check():
     def check_func(match: Match) -> bool:
         compile_range = get_pass_context().compile_range
-        return extra_stream_scope_check(match) and compile_range.start > ALLREDUCE_NORM_FUSE_THREHOLD
+        return extra_stream_scope_check(match) and compile_range.start > ALLREDUCE_NORM_FUSE_THRESHOLD
 
     return check_func
 
@@ -176,5 +176,5 @@ def is_applicable_for_range(self, compile_range: Range) -> bool:
         """
         Check if the pass is applicable for the current configuration.
         """
-        applicable = compile_range.start > ALLREDUCE_NORM_FUSE_THREHOLD
+        applicable = compile_range.start > ALLREDUCE_NORM_FUSE_THRESHOLD
         return applicable