diff --git a/tensorrt_llm/_torch/modules/fused_moe/quantization.py b/tensorrt_llm/_torch/modules/fused_moe/quantization.py index 388b1a51d6f..150a1081a27 100644 --- a/tensorrt_llm/_torch/modules/fused_moe/quantization.py +++ b/tensorrt_llm/_torch/modules/fused_moe/quantization.py @@ -479,10 +479,12 @@ def load_expert_w3_w1_weight(self, dst_w3_weight, dst_w1_weight = dst_w3_w1_weight.chunk(2, dim=0) if w1_weight is not None: - dst_w1_weight.copy_(w1_weight_shard.view(dst_w3_w1_weight.dtype), + dst_w1_weight.copy_(w1_weight_shard.contiguous().view( + dst_w3_w1_weight.dtype), non_blocking=True) if w3_weight is not None: - dst_w3_weight.copy_(w3_weight_shard.view(dst_w3_w1_weight.dtype), + dst_w3_weight.copy_(w3_weight_shard.contiguous().view( + dst_w3_w1_weight.dtype), non_blocking=True) # Helper function diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 2d7f718bb56..fe5229da3a3 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -411,6 +411,4 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mt accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5698897) unittest/_torch/modules/tests_lora_modules/test_lora_attention_pytorch_flow_vs_trt.py::TestLoraAttentionPytorchFlowVsTRT::test_lora_attention SKIP (https://nvbugs/5701421) unittest/llmapi/test_llm_pytorch.py::test_embedding_bias_with_torch_sampler_strategies SKIP (https://nvbugs/5702791) -accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4[tp4-cuda_graph=True] SKIP (https://nvbugs/5702793) -accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_chunked_prefill[tp4ep4-cuda_graph=True] SKIP (https://nvbugs/5702793) accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[no_cuda_graph_overlap-cutlass] SKIP (https://nvbugs/5702795)