From e7f9ffd28e910c4a1842e4c8eec236f2b62c8219 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Tue, 19 May 2026 00:02:14 -0700 Subject: [PATCH 1/5] upd --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index d5c2f3d6b703..61d91db4af5d 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -58,7 +58,7 @@ dependencies = [ "scipy", "sentencepiece", "setproctitle", - "flash-attn-4>=4.0.0b9", + "flash-attn-4[cu13]", "sgl-deep-gemm==0.1.0", "sglang-kernel==0.4.2.post2", "soundfile==0.13.1", From 910ae3b1cfa01ffaed26b0d745c2130ea5d49033 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Tue, 19 May 2026 01:38:08 -0700 Subject: [PATCH 2/5] upd --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 61d91db4af5d..d5c2f3d6b703 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -58,7 +58,7 @@ dependencies = [ "scipy", "sentencepiece", "setproctitle", - "flash-attn-4[cu13]", + "flash-attn-4>=4.0.0b9", "sgl-deep-gemm==0.1.0", "sglang-kernel==0.4.2.post2", "soundfile==0.13.1", From 56ff529e44d6f6971efe4210369fd3fb62721c50 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Tue, 19 May 2026 01:47:01 -0700 Subject: [PATCH 3/5] Revert "[Fix] Try to fix error caused by latest cutedsl packages (#25690)" This reverts commit b79e4b1e687baa4cd36554856665b53acd9ce9b5. --- python/pyproject.toml | 4 ++-- scripts/ci/cuda/ci_install_dependency.sh | 21 ++------------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index d5c2f3d6b703..7ec785be51d8 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "datasets", "einops", "fastapi", - "flashinfer_python[cu13]==0.6.11.post1", # keep it aligned with jit-cache version in Dockerfile + "flashinfer_python==0.6.11.post1", # keep it aligned with jit-cache version in Dockerfile "flashinfer_cubin==0.6.11.post1", "gguf", "interegular", @@ -37,7 +37,7 @@ dependencies = [ "ninja", "easydict", # Required by remote model code (e.g. DeepSeek-OCR) loaded via trust_remote_code; validated by transformers 5.4+ check_imports "numpy", - "nvidia-cutlass-dsl[cu13]==4.5.0", + "nvidia-cutlass-dsl==4.5.0", "nvidia-ml-py", "openai-harmony==0.0.4", "openai==2.6.1", diff --git a/scripts/ci/cuda/ci_install_dependency.sh b/scripts/ci/cuda/ci_install_dependency.sh index 0f2146d649e3..398ba39931c4 100755 --- a/scripts/ci/cuda/ci_install_dependency.sh +++ b/scripts/ci/cuda/ci_install_dependency.sh @@ -186,8 +186,8 @@ uninstall_stale_flashinfer() { # Keep flashinfer packages if version matches to avoid re-downloading: # - flashinfer-cubin: 150+ MB # - flashinfer-jit-cache: 1.2+ GB - FLASHINFER_PYTHON_REQUIRED=$(grep -Po -m1 'flashinfer_python(\[[^]]+\])?==\K[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") - FLASHINFER_CUBIN_REQUIRED=$(grep -Po -m1 'flashinfer_cubin(\[[^]]+\])?==\K[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") + FLASHINFER_PYTHON_REQUIRED=$(grep -Po -m1 '(?<=flashinfer_python==)[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") + FLASHINFER_CUBIN_REQUIRED=$(grep -Po -m1 '(?<=flashinfer_cubin==)[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") FLASHINFER_CUBIN_INSTALLED=$(pip show flashinfer-cubin 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "") FLASHINFER_JIT_INSTALLED=$(pip show flashinfer-jit-cache 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//' || echo "") FLASHINFER_JIT_CU_VERSION=$(pip show flashinfer-jit-cache 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed -n 's/.*+//p' || echo "") @@ -335,22 +335,6 @@ download_flashinfer_cache() { mark_step_done "${FUNCNAME[0]}" } -purge_cutlass_libs_base() { - # nvidia-cutlass-dsl[cu13] extras are additive on PyPI: requires_dist always - # pulls -libs-base AND -libs-cu13 when [cu13] is requested. Both wheels write - # to the same site-packages paths with different content, leaving the wrapper - # (cutlass.py, cu13 style) mismatched with the binding (_gpu_ops_gen.py, base - # style) -> GPUModuleOp signature TypeError. See vllm-project/vllm#40082. - # Uninstall -libs-base, then force-reinstall -libs-cu13 so its files win. - $PIP_UNINSTALL_CMD nvidia-cutlass-dsl-libs-base $PIP_UNINSTALL_SUFFIX || true - CUTLASS_DSL_VERSION=$(grep -Po -m1 'nvidia-cutlass-dsl(\[[^]]+\])?==\K[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") - if [ -n "$CUTLASS_DSL_VERSION" ]; then - $PIP_CMD install --force-reinstall --no-deps "nvidia-cutlass-dsl-libs-cu13==${CUTLASS_DSL_VERSION}" $PIP_INSTALL_SUFFIX - fi - - mark_step_done "${FUNCNAME[0]}" -} - stabilize_flashinfer_jit_paths() { # In venv mode, FlashInfer JIT writes build.ninja with hardcoded -isystem # paths. Per-job venvs get unique paths, but the JIT cache is shared on the @@ -504,7 +488,6 @@ main() { install_sglang_kernel install_sglang_router download_flashinfer_cache - purge_cutlass_libs_base stabilize_flashinfer_jit_paths install_extra_deps install_test_tools From f040d0ba056b8caab9db8b2d69f324dec3a9551b Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Tue, 19 May 2026 01:48:14 -0700 Subject: [PATCH 4/5] upd --- python/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 7ec785be51d8..d5c2f3d6b703 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "datasets", "einops", "fastapi", - "flashinfer_python==0.6.11.post1", # keep it aligned with jit-cache version in Dockerfile + "flashinfer_python[cu13]==0.6.11.post1", # keep it aligned with jit-cache version in Dockerfile "flashinfer_cubin==0.6.11.post1", "gguf", "interegular", @@ -37,7 +37,7 @@ dependencies = [ "ninja", "easydict", # Required by remote model code (e.g. DeepSeek-OCR) loaded via trust_remote_code; validated by transformers 5.4+ check_imports "numpy", - "nvidia-cutlass-dsl==4.5.0", + "nvidia-cutlass-dsl[cu13]==4.5.0", "nvidia-ml-py", "openai-harmony==0.0.4", "openai==2.6.1", From 70683f5a0c3ec1f6f01281db82ac2fec5d82000f Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Tue, 19 May 2026 01:58:24 -0700 Subject: [PATCH 5/5] add --- scripts/ci/cuda/ci_install_dependency.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ci/cuda/ci_install_dependency.sh b/scripts/ci/cuda/ci_install_dependency.sh index 398ba39931c4..485d5b63c8cd 100755 --- a/scripts/ci/cuda/ci_install_dependency.sh +++ b/scripts/ci/cuda/ci_install_dependency.sh @@ -186,8 +186,8 @@ uninstall_stale_flashinfer() { # Keep flashinfer packages if version matches to avoid re-downloading: # - flashinfer-cubin: 150+ MB # - flashinfer-jit-cache: 1.2+ GB - FLASHINFER_PYTHON_REQUIRED=$(grep -Po -m1 '(?<=flashinfer_python==)[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") - FLASHINFER_CUBIN_REQUIRED=$(grep -Po -m1 '(?<=flashinfer_cubin==)[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") + FLASHINFER_PYTHON_REQUIRED=$(grep -Po -m1 'flashinfer_python(\[[^]]+\])?==\K[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") + FLASHINFER_CUBIN_REQUIRED=$(grep -Po -m1 'flashinfer_cubin(\[[^]]+\])?==\K[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") FLASHINFER_CUBIN_INSTALLED=$(pip show flashinfer-cubin 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "") FLASHINFER_JIT_INSTALLED=$(pip show flashinfer-jit-cache 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//' || echo "") FLASHINFER_JIT_CU_VERSION=$(pip show flashinfer-jit-cache 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed -n 's/.*+//p' || echo "")