MetaX-MACA · ILikeIneine · Nov 14, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/.markdownlint.yaml b/.markdownlint.yaml
@@ -0,0 +1,12 @@
+MD007:
+  indent: 4
+MD013: false
+MD024:
+  siblings_only: true
+MD033: false
+MD045: false
+MD046: false
+MD051: false
+MD052: false
+MD053: false
+MD059: false
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,30 +6,19 @@ default_stages:
   - manual # Run in CI
 exclude: 'vllm/third_party/.*'
 repos:
-- repo: https://github.com/google/yapf
-  rev: v0.43.0
-  hooks:
-  - id: yapf
-    args: [--in-place, --verbose]
-    # Keep the same list from yapfignore here to avoid yapf failing without any inputs
-    exclude: '(.buildkite|benchmarks|build|examples)/.*'
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.7
+  rev: v0.14.0
   hooks:
-  - id: ruff
+  - id: ruff-check
     args: [--output-format, github, --fix]
   - id: ruff-format
-    files: ^(.buildkite|benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
-  rev: v1.34.0
+  rev: v1.38.1
   hooks:
   - id: typos
-- repo: https://github.com/PyCQA/isort
-  rev: 6.0.1
-  hooks:
-  - id: isort
+    args: [--force-exclude]
 - repo: https://github.com/pre-commit/mirrors-clang-format
-  rev: v20.1.3
+  rev: v21.1.2
   hooks:
   - id: clang-format
     exclude: 'csrc/(moe/topk_softmax_kernels.cu|quantization/gguf/(ggml-common.h|dequantize.cuh|vecdotq.cuh|mmq.cuh|mmvq.cuh))|vllm/third_party/.*'
@@ -40,44 +29,40 @@ repos:
   hooks:
   - id: actionlint
 - repo: https://github.com/astral-sh/uv-pre-commit
-  rev: 0.6.17
+  rev: 0.9.1
   hooks:
     - id: pip-compile
       args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cpu]
       files: ^requirements/test\.(in|txt)$
 - repo: local
   hooks:
   - id: mypy-local
-    name: Run mypy for local Python installation
-    entry: tools/mypy.sh 0 "local"
-    language: python
-    types: [python]
-    additional_dependencies: &mypy_deps [mypy==1.11.1, types-cachetools, types-setuptools, types-PyYAML, types-requests, pydantic]
+    name: Run mypy locally for lowest supported Python version
+    entry: python tools/pre_commit/mypy.py 0 "3.10"
     stages: [pre-commit] # Don't run in CI
+    <<: &mypy_common
+      language: python
+      types_or: [python, pyi]
+      require_serial: true
+      additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
   - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.10
-    entry: tools/mypy.sh 1 "3.10"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.10"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.11
-    entry: tools/mypy.sh 1 "3.11"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.11"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
     name: Run mypy for Python 3.12
-    entry: tools/mypy.sh 1 "3.12"
-    language: python
-    types: [python]
-    additional_dependencies: *mypy_deps
+    entry: python tools/pre_commit/mypy.py 1 "3.12"
+    <<: *mypy_common
     stages: [manual] # Only run in CI
   - id: shellcheck
     name: Lint shell scripts
-    entry: tools/shellcheck.sh
+    entry: tools/pre_commit/shellcheck.sh
     language: script
     types: [shell]
   - id: png-lint
@@ -116,7 +101,7 @@ repos:
     pass_filenames: false
   - id: enforce-import-regex-instead-of-re
     name: Enforce import regex as re
-    entry: python tools/enforce_regex_import.py
+    entry: python tools/pre_commit/enforce_regex_import.py
     language: python
     types: [python]
     pass_filenames: false

diff --git a/.shellcheckrc b/.shellcheckrc
@@ -0,0 +1,9 @@
+# rules currently disabled:
+#
+#   SC1091 (info): Not following: <sourced file> was not specified as input (see shellcheck -x)
+#   SC2004 (style): $/${} is unnecessary on arithmetic variables.
+#   SC2129 (style): Consider using { cmd1; cmd2; } >> file instead of individual redirects.
+#   SC2155 (warning): Declare and assign separately to avoid masking return values.
+#   SC2164 (warning): Use 'cd ... || exit' or 'cd ... || return' in case cd fails.
+#
+disable=SC1091,SC2004,SC2129,SC2155,SC2164
diff --git a/.yapfignore b/.yapfignore
@@ -1 +1,2 @@
 collect_env.py
+vllm/model_executor/layers/fla/ops/*.py
diff --git a/cmake/hipify.py b/cmake/hipify.py
@@ -15,7 +15,7 @@
 
 from torch.utils.hipify.hipify_python import hipify
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
     # Project directory where all the source + include files live.
@@ -33,15 +33,14 @@
     )
 
     # Source files to convert.
-    parser.add_argument("sources",
-                        help="Source files to hipify.",
-                        nargs="*",
-                        default=[])
+    parser.add_argument(
+        "sources", help="Source files to hipify.", nargs="*", default=[]
+    )
 
     args = parser.parse_args()
 
     # Limit include scope to project_dir only
-    includes = [os.path.join(args.project_dir, '*')]
+    includes = [os.path.join(args.project_dir, "*")]
 
     # Get absolute path for all source files.
     extra_files = [os.path.abspath(s) for s in args.sources]
@@ -50,25 +49,31 @@
     # The directory might already exist to hold object files so we ignore that.
     shutil.copytree(args.project_dir, args.output_dir, dirs_exist_ok=True)
 
-    hipify_result = hipify(project_directory=args.project_dir,
-                           output_directory=args.output_dir,
-                           header_include_dirs=[],
-                           includes=includes,
-                           extra_files=extra_files,
-                           show_detailed=True,
-                           is_pytorch_extension=True,
-                           hipify_extra_files_only=True)
+    hipify_result = hipify(
+        project_directory=args.project_dir,
+        output_directory=args.output_dir,
+        header_include_dirs=[],
+        includes=includes,
+        extra_files=extra_files,
+        show_detailed=True,
+        is_pytorch_extension=True,
+        hipify_extra_files_only=True,
+    )
 
     hipified_sources = []
     for source in args.sources:
         s_abs = os.path.abspath(source)
-        hipified_s_abs = (hipify_result[s_abs].hipified_path if
-                          (s_abs in hipify_result
-                           and hipify_result[s_abs].hipified_path is not None)
-                          else s_abs)
+        hipified_s_abs = (
+            hipify_result[s_abs].hipified_path
+            if (
+                s_abs in hipify_result
+                and hipify_result[s_abs].hipified_path is not None
+            )
+            else s_abs
+        )
         hipified_sources.append(hipified_s_abs)
 
-    assert (len(hipified_sources) == len(args.sources))
+    assert len(hipified_sources) == len(args.sources)
 
     # Print hipified source files.
     print("\n".join(hipified_sources))
diff --git a/csrc/cache.h b/csrc/cache.h
@@ -70,3 +70,18 @@ void indexer_k_quant_and_cache(
     torch::Tensor& slot_mapping,  // [num_tokens]
     int64_t quant_block_size,     // quantization block size
     const std::string& scale_fmt);
+
+// Extract function to gather quantized K cache
+void cp_gather_indexer_k_cache(
+    const torch::Tensor& kv_cache,     // [num_blocks, block_size, cache_stride]
+    torch::Tensor& dst_k,              // [num_tokens, head_dim]
+    const torch::Tensor& block_table,  // [batch_size, num_blocks]
+    const torch::Tensor& cu_seq_lens);  // [batch_size + 1]
+
+// Extract function to gather quantized K cache
+void cp_gather_indexer_k_quant_cache(
+    const torch::Tensor& kv_cache,  // [num_blocks, block_size, cache_stride]
+    torch::Tensor& dst_k,           // [num_tokens, head_dim]
+    torch::Tensor& dst_scale,  // [num_tokens, head_dim / quant_block_size * 4]
+    const torch::Tensor& block_table,   // [batch_size, num_blocks]
+    const torch::Tensor& cu_seq_lens);  // [batch_size + 1]
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		collect_env.py
		vllm/model_executor/layers/fla/ops/*.py