NVIDIA · shuyixiong · Dec 2, 2025 · Dec 1, 2025 · Dec 1, 2025
@@ -479,10 +479,12 @@ def load_expert_w3_w1_weight(self,
 
         dst_w3_weight, dst_w1_weight = dst_w3_w1_weight.chunk(2, dim=0)
         if w1_weight is not None:
-            dst_w1_weight.copy_(w1_weight_shard.view(dst_w3_w1_weight.dtype),
+            dst_w1_weight.copy_(w1_weight_shard.contiguous().view(
+                dst_w3_w1_weight.dtype),
                                 non_blocking=True)
         if w3_weight is not None:
-            dst_w3_weight.copy_(w3_weight_shard.view(dst_w3_w1_weight.dtype),
+            dst_w3_weight.copy_(w3_weight_shard.contiguous().view(
+                dst_w3_w1_weight.dtype),
                                 non_blocking=True)
 
     # Helper function

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -411,6 +411,4 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mt
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5698897)
 unittest/_torch/modules/tests_lora_modules/test_lora_attention_pytorch_flow_vs_trt.py::TestLoraAttentionPytorchFlowVsTRT::test_lora_attention SKIP (https://nvbugs/5701421)
 unittest/llmapi/test_llm_pytorch.py::test_embedding_bias_with_torch_sampler_strategies SKIP (https://nvbugs/5702791)
-accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4[tp4-cuda_graph=True] SKIP (https://nvbugs/5702793)
-accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_chunked_prefill[tp4ep4-cuda_graph=True] SKIP (https://nvbugs/5702793)
 accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[no_cuda_graph_overlap-cutlass] SKIP (https://nvbugs/5702795)