@@ -1338,6 +1338,11 @@ def test_nvfp4_4gpus_online_eplb(self, fp8kv):
13381338    @parametrize_with_ids ("moe_backend" , ["CUTLASS" , "TRTLLM" ]) 
13391339    def  test_nvfp4 (self , fp8kv , attention_dp , cuda_graph , overlap_scheduler ,
13401340                   torch_compile , mtp_nextn , moe_backend ):
1341+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
1342+                                         or  get_sm_version () ==  121 ):
1343+             pytest .skip (
1344+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
1345+ 
13411346        kv_cache_config  =  KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13421347        torch_compile_config  =  TorchCompileConfig (
13431348            enable_fullgraph = True ,
@@ -1385,8 +1390,10 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
13851390                         torch_compile , mtp_nextn , moe_backend ):
13861391        if  torch_compile  and  pp_size  >  1 :
13871392            pytest .skip ("PP with torch.compile is not supported yet." )
1388-         if  moe_backend  ==  "TRTLLM"  and  get_sm_version () ==  120 :
1389-             pytest .skip ("MOE TRTLLM backend does not support SM version 120" )
1393+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
1394+                                         or  get_sm_version () ==  121 ):
1395+             pytest .skip (
1396+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
13901397        kv_cache_config  =  KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13911398        # Picewise Cuda Graph cannot be enabled for nvfp4 attention dp. 
13921399        torch_compile_config  =  TorchCompileConfig (
@@ -1601,6 +1608,11 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
16011608    def  test_nvfp4_multi_gpus (self , tp_size , pp_size , ep_size , mtp_nextn , fp8kv ,
16021609                              attention_dp , cuda_graph , overlap_scheduler ,
16031610                              max_batch_size , moe_backend ):
1611+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
1612+                                         or  get_sm_version () ==  121 ):
1613+             pytest .skip (
1614+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
1615+ 
16041616        kv_cache_config  =  KvCacheConfig (free_gpu_memory_fraction = 0.70 )
16051617        pytorch_config  =  dict (
16061618            disable_overlap_scheduler = not  overlap_scheduler ,
@@ -2157,6 +2169,11 @@ def test_nvfp4(
21572169        torch_compile ,
21582170    ):
21592171
2172+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
2173+                                         or  get_sm_version () ==  121 ):
2174+             pytest .skip (
2175+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
2176+ 
21602177        torch_compile_config  =  TorchCompileConfig (
21612178            enable_fullgraph = True ,
21622179            enable_piecewise_cuda_graph = cuda_graph  and  not  attention_dp ,
@@ -2277,6 +2294,11 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
22772294    def  test_nvfp4 (self , tp_size , pp_size , ep_size , attention_dp , cuda_graph ,
22782295                   overlap_scheduler , moe_backend , eagle3 ):
22792296
2297+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
2298+                                         or  get_sm_version () ==  121 ):
2299+             pytest .skip (
2300+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
2301+ 
22802302        pytorch_config  =  dict (
22812303            disable_overlap_scheduler = not  overlap_scheduler ,
22822304            cuda_graph_config = CudaGraphConfig () if  cuda_graph  else  None ,
0 commit comments