From 5f206c388bdf7aa4cd4a01a3f832799fbd83f678 Mon Sep 17 00:00:00 2001 From: hongchao Date: Fri, 29 Aug 2025 10:12:13 +0000 Subject: [PATCH 1/2] fix undefined silu_and_mul_nvfp4_quant Signed-off-by: hongchao --- csrc/ops.h | 4 ++-- csrc/torch_bindings.cpp | 3 ++- vllm/compilation/fix_functionalization.py | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/csrc/ops.h b/csrc/ops.h index 78a487201bdd..7a176a5c0032 100644 --- a/csrc/ops.h +++ b/csrc/ops.h @@ -130,8 +130,8 @@ void silu_and_mul(torch::Tensor& out, torch::Tensor& input); void silu_and_mul_quant(torch::Tensor& out, torch::Tensor& input, torch::Tensor& scale); -#ifndef USE_ROCM - +#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ + (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120) void silu_and_mul_nvfp4_quant(torch::Tensor& out, torch::Tensor& output_block_scale, torch::Tensor& input, diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index b769c09adc0f..56626a02c027 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -115,7 +115,8 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { "silu_and_mul_quant(Tensor! result, Tensor input, Tensor scale) -> ()"); ops.impl("silu_and_mul_quant", torch::kCUDA, &silu_and_mul_quant); -#ifndef USE_ROCM +#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ + (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120) ops.def( "silu_and_mul_nvfp4_quant(Tensor! result, Tensor! result_block_scale, " "Tensor input, Tensor input_global_scale) -> ()"); diff --git a/vllm/compilation/fix_functionalization.py b/vllm/compilation/fix_functionalization.py index a36dd8b845f1..c49787bbd085 100644 --- a/vllm/compilation/fix_functionalization.py +++ b/vllm/compilation/fix_functionalization.py @@ -97,7 +97,9 @@ def __call__(self, graph: torch.fx.Graph): node, mutated_args, args=('result', 'input', 'scale')) - elif at_target == torch.ops._C.silu_and_mul_nvfp4_quant.default: + elif current_platform.has_device_capability( + 100 + ) and at_target == torch.ops._C.silu_and_mul_nvfp4_quant.default: mutated_args = {1: 'result', 2: 'result_block_scale'} self.defunctionalize(graph, node, From 551ea6567f11ef17063f6cb1fc2ab8cf91d8c928 Mon Sep 17 00:00:00 2001 From: Richard Zou Date: Fri, 29 Aug 2025 09:13:34 -0700 Subject: [PATCH 2/2] update Signed-off-by: Richard Zou --- vllm/compilation/fix_functionalization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/compilation/fix_functionalization.py b/vllm/compilation/fix_functionalization.py index c49787bbd085..6bc721eec3d4 100644 --- a/vllm/compilation/fix_functionalization.py +++ b/vllm/compilation/fix_functionalization.py @@ -97,8 +97,8 @@ def __call__(self, graph: torch.fx.Graph): node, mutated_args, args=('result', 'input', 'scale')) - elif current_platform.has_device_capability( - 100 + elif hasattr( + torch.ops._C, "silu_and_mul_nvfp4_quant" ) and at_target == torch.ops._C.silu_and_mul_nvfp4_quant.default: mutated_args = {1: 'result', 2: 'result_block_scale'} self.defunctionalize(graph,