@@ -1338,6 +1338,11 @@ def test_nvfp4_4gpus_online_eplb(self, fp8kv):
13381338 @parametrize_with_ids ("moe_backend" , ["CUTLASS" , "TRTLLM" ])
13391339 def test_nvfp4 (self , fp8kv , attention_dp , cuda_graph , overlap_scheduler ,
13401340 torch_compile , mtp_nextn , moe_backend ):
1341+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1342+ or get_sm_version () == 121 ):
1343+ pytest .skip (
1344+ "MOE TRTLLM backend does not support SM version 120 or 121" )
1345+
13411346 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13421347 torch_compile_config = TorchCompileConfig (
13431348 enable_fullgraph = True ,
@@ -1385,8 +1390,10 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
13851390 torch_compile , mtp_nextn , moe_backend ):
13861391 if torch_compile and pp_size > 1 :
13871392 pytest .skip ("PP with torch.compile is not supported yet." )
1388- if moe_backend == "TRTLLM" and get_sm_version () == 120 :
1389- pytest .skip ("MOE TRTLLM backend does not support SM version 120" )
1393+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1394+ or get_sm_version () == 121 ):
1395+ pytest .skip (
1396+ "MOE TRTLLM backend does not support SM version 120 or 121" )
13901397 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13911398 # Picewise Cuda Graph cannot be enabled for nvfp4 attention dp.
13921399 torch_compile_config = TorchCompileConfig (
@@ -1601,6 +1608,11 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
16011608 def test_nvfp4_multi_gpus (self , tp_size , pp_size , ep_size , mtp_nextn , fp8kv ,
16021609 attention_dp , cuda_graph , overlap_scheduler ,
16031610 max_batch_size , moe_backend ):
1611+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1612+ or get_sm_version () == 121 ):
1613+ pytest .skip (
1614+ "MOE TRTLLM backend does not support SM version 120 or 121" )
1615+
16041616 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.70 )
16051617 pytorch_config = dict (
16061618 disable_overlap_scheduler = not overlap_scheduler ,
@@ -2157,6 +2169,11 @@ def test_nvfp4(
21572169 torch_compile ,
21582170 ):
21592171
2172+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
2173+ or get_sm_version () == 121 ):
2174+ pytest .skip (
2175+ "MOE TRTLLM backend does not support SM version 120 or 121" )
2176+
21602177 torch_compile_config = TorchCompileConfig (
21612178 enable_fullgraph = True ,
21622179 enable_piecewise_cuda_graph = cuda_graph and not attention_dp ,
@@ -2277,6 +2294,11 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
22772294 def test_nvfp4 (self , tp_size , pp_size , ep_size , attention_dp , cuda_graph ,
22782295 overlap_scheduler , moe_backend , eagle3 ):
22792296
2297+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
2298+ or get_sm_version () == 121 ):
2299+ pytest .skip (
2300+ "MOE TRTLLM backend does not support SM version 120 or 121" )
2301+
22802302 pytorch_config = dict (
22812303 disable_overlap_scheduler = not overlap_scheduler ,
22822304 cuda_graph_config = CudaGraphConfig () if cuda_graph else None ,
0 commit comments