From 628c9d2a7c53acbe4674590a93c5115f9335189b Mon Sep 17 00:00:00 2001 From: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> Date: Wed, 16 Apr 2025 05:17:14 -0700 Subject: [PATCH 1/2] Fix: nvbugs/5222698 variable not defined Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> --- tensorrt_llm/_torch/models/modeling_deepseekv3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/_torch/models/modeling_deepseekv3.py b/tensorrt_llm/_torch/models/modeling_deepseekv3.py index fdbe1cba5a9..ee30574d3ad 100644 --- a/tensorrt_llm/_torch/models/modeling_deepseekv3.py +++ b/tensorrt_llm/_torch/models/modeling_deepseekv3.py @@ -491,7 +491,7 @@ def forward( min_latency_mode = True if hidden_states.size( 0 - ) <= 128 and self.fusion_config.POST_MOE_FUSION and self.is_nvfp4 else False + ) <= 128 and self.fusion_config.POST_MOE_FUSION and self.is_nvfp4 and not using_prev_fusion else False if residual is None: residual = hidden_states @@ -510,6 +510,7 @@ def forward( ) if self.fusion_config.PRE_MOE_FUSION: + hidden_states_fp4 = None # Custom AR Fusion for DeepseekV3 if using_prev_fusion: # Custom AR Fusion for DeepseekV3 From 96d32bc7fc362da1d0bbc95991f1242f90a6cb9c Mon Sep 17 00:00:00 2001 From: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> Date: Thu, 17 Apr 2025 20:24:21 -0700 Subject: [PATCH 2/2] Tidy code Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> --- tensorrt_llm/_torch/models/modeling_deepseekv3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorrt_llm/_torch/models/modeling_deepseekv3.py b/tensorrt_llm/_torch/models/modeling_deepseekv3.py index ee30574d3ad..0993daf3ffc 100644 --- a/tensorrt_llm/_torch/models/modeling_deepseekv3.py +++ b/tensorrt_llm/_torch/models/modeling_deepseekv3.py @@ -489,9 +489,9 @@ def forward( using_prev_fusion = self.deepseek_allreduce_disabled or hidden_states.size( 0) > 128 - min_latency_mode = True if hidden_states.size( + min_latency_mode = hidden_states.size( 0 - ) <= 128 and self.fusion_config.POST_MOE_FUSION and self.is_nvfp4 and not using_prev_fusion else False + ) <= 128 and self.fusion_config.POST_MOE_FUSION and self.is_nvfp4 and not using_prev_fusion if residual is None: residual = hidden_states @@ -509,8 +509,8 @@ def forward( **kwargs, ) + hidden_states_fp4 = None if self.fusion_config.PRE_MOE_FUSION: - hidden_states_fp4 = None # Custom AR Fusion for DeepseekV3 if using_prev_fusion: # Custom AR Fusion for DeepseekV3