@@ -1328,6 +1328,11 @@ def test_nvfp4_4gpus_online_eplb(self, fp8kv):
13281328    @parametrize_with_ids ("moe_backend" , ["CUTLASS" , "TRTLLM" ]) 
13291329    def  test_nvfp4 (self , fp8kv , attention_dp , cuda_graph , overlap_scheduler ,
13301330                   torch_compile , mtp_nextn , moe_backend ):
1331+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
1332+                                         or  get_sm_version () ==  121 ):
1333+             pytest .skip (
1334+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
1335+ 
13311336        kv_cache_config  =  KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13321337        torch_compile_config  =  TorchCompileConfig (
13331338            enable_fullgraph = True ,
@@ -1375,8 +1380,10 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
13751380                         torch_compile , mtp_nextn , moe_backend ):
13761381        if  torch_compile  and  pp_size  >  1 :
13771382            pytest .skip ("PP with torch.compile is not supported yet." )
1378-         if  moe_backend  ==  "TRTLLM"  and  get_sm_version () ==  120 :
1379-             pytest .skip ("MOE TRTLLM backend does not support SM version 120" )
1383+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
1384+                                         or  get_sm_version () ==  121 ):
1385+             pytest .skip (
1386+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
13801387        kv_cache_config  =  KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13811388        # Picewise Cuda Graph cannot be enabled for nvfp4 attention dp. 
13821389        torch_compile_config  =  TorchCompileConfig (
@@ -1591,6 +1598,11 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
15911598    def  test_nvfp4_multi_gpus (self , tp_size , pp_size , ep_size , mtp_nextn , fp8kv ,
15921599                              attention_dp , cuda_graph , overlap_scheduler ,
15931600                              max_batch_size , moe_backend ):
1601+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
1602+                                         or  get_sm_version () ==  121 ):
1603+             pytest .skip (
1604+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
1605+ 
15941606        kv_cache_config  =  KvCacheConfig (free_gpu_memory_fraction = 0.70 )
15951607        pytorch_config  =  dict (
15961608            disable_overlap_scheduler = not  overlap_scheduler ,
@@ -2148,6 +2160,11 @@ def test_nvfp4(
21482160        torch_compile ,
21492161    ):
21502162
2163+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
2164+                                         or  get_sm_version () ==  121 ):
2165+             pytest .skip (
2166+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
2167+ 
21512168        torch_compile_config  =  TorchCompileConfig (
21522169            enable_fullgraph = True ,
21532170            enable_piecewise_cuda_graph = cuda_graph  and  not  attention_dp ,
@@ -2268,6 +2285,11 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
22682285    def  test_nvfp4 (self , tp_size , pp_size , ep_size , attention_dp , cuda_graph ,
22692286                   overlap_scheduler , moe_backend , eagle3 ):
22702287
2288+         if  moe_backend  ==  "TRTLLM"  and  (get_sm_version () ==  120 
2289+                                         or  get_sm_version () ==  121 ):
2290+             pytest .skip (
2291+                 "MOE TRTLLM backend does not support SM version 120 or 121" )
2292+ 
22712293        pytorch_config  =  dict (
22722294            disable_overlap_scheduler = not  overlap_scheduler ,
22732295            cuda_graph_config = CudaGraphConfig () if  cuda_graph  else  None ,
0 commit comments