From 6ed8cfeb36ec42af4cc7b405b555522575b4a530 Mon Sep 17 00:00:00 2001 From: Pengbo Wang <221450789+pengbowang-nv@users.noreply.github.com> Date: Wed, 13 Aug 2025 08:31:34 +0000 Subject: [PATCH] fix deepseek r1 hang by not enabling mnnvl by default Signed-off-by: Pengbo Wang <221450789+pengbowang-nv@users.noreply.github.com> --- tensorrt_llm/_torch/distributed/ops.py | 3 +-- tests/integration/test_lists/waives.txt | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorrt_llm/_torch/distributed/ops.py b/tensorrt_llm/_torch/distributed/ops.py index c49d7806ee2..74ac9590a38 100644 --- a/tensorrt_llm/_torch/distributed/ops.py +++ b/tensorrt_llm/_torch/distributed/ops.py @@ -455,8 +455,7 @@ def __init__(self, self.workspace = get_allreduce_workspace(self.mapping) # Initialize MNNVL AllReduce if needed - if self.strategy in (AllReduceStrategy.AUTO, - AllReduceStrategy.MNNVL): + if self.strategy == AllReduceStrategy.MNNVL: if MNNVLAllReduce.is_mnnvl(self.mapping, dtype): try: self.mnnvl_allreduce = MNNVLAllReduce( diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index cb2d68fe503..3a4f5af16e5 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -288,7 +288,5 @@ examples/test_nemotron_nas.py::test_nemotron_nas_summary_1gpu[DeciLM-7B] SKIP (h accuracy/test_cli_flow.py::TestLongAlpaca7B::test_multiblock_aggressive SKIP (https://nvbugs/5444627) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687) -accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency] SKIP (https://nvbugs/5445466) -accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen] SKIP (https://nvbugs/5445466) examples/test_qwen2audio.py::test_llm_qwen2audio_single_gpu[qwen2_audio_7b_instruct] SKIP (https://nvbugs/5447530) examples/test_nemotron_nas.py::test_nemotron_nas_summary_2gpu[DeciLM-7B] SKIP (https://nvbugs/5444636)