@@ -1565,6 +1565,11 @@ def test_nvfp4_4gpus_online_eplb(self, fp8kv):
15651565 @parametrize_with_ids ("moe_backend" , ["CUTLASS" , "TRTLLM" ])
15661566 def test_nvfp4 (self , fp8kv , attention_dp , cuda_graph , overlap_scheduler ,
15671567 torch_compile , mtp_nextn , moe_backend ):
1568+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1569+ or get_sm_version () == 121 ):
1570+ pytest .skip (
1571+ "MOE TRTLLM backend does not support SM version 120 or 121" )
1572+
15681573 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.75 )
15691574 torch_compile_config = TorchCompileConfig (
15701575 enable_fullgraph = True ,
@@ -1613,8 +1618,10 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
16131618 torch_compile , mtp_nextn , moe_backend ):
16141619 if torch_compile and pp_size > 1 :
16151620 pytest .skip ("PP with torch.compile is not supported yet." )
1616- if moe_backend == "TRTLLM" and get_sm_version () == 120 :
1617- pytest .skip ("MOE TRTLLM backend does not support SM version 120" )
1621+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1622+ or get_sm_version () == 121 ):
1623+ pytest .skip (
1624+ "MOE TRTLLM backend does not support SM version 120 or 121" )
16181625 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.75 )
16191626 # Picewise Cuda Graph cannot be enabled for nvfp4 attention dp.
16201627 torch_compile_config = TorchCompileConfig (
@@ -1885,6 +1892,11 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
18851892 def test_nvfp4_multi_gpus (self , tp_size , pp_size , ep_size , mtp_nextn , fp8kv ,
18861893 attention_dp , cuda_graph , overlap_scheduler ,
18871894 max_batch_size , moe_backend ):
1895+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1896+ or get_sm_version () == 121 ):
1897+ pytest .skip (
1898+ "MOE TRTLLM backend does not support SM version 120 or 121" )
1899+
18881900 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.70 )
18891901 pytorch_config = dict (
18901902 disable_overlap_scheduler = not overlap_scheduler ,
@@ -2509,6 +2521,11 @@ def test_nvfp4(
25092521 torch_compile ,
25102522 ):
25112523
2524+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
2525+ or get_sm_version () == 121 ):
2526+ pytest .skip (
2527+ "MOE TRTLLM backend does not support SM version 120 or 121" )
2528+
25122529 torch_compile_config = TorchCompileConfig (
25132530 enable_fullgraph = True ,
25142531 enable_piecewise_cuda_graph = cuda_graph ,
@@ -2700,6 +2717,11 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
27002717 def test_nvfp4 (self , tp_size , pp_size , ep_size , attention_dp , cuda_graph ,
27012718 overlap_scheduler , moe_backend , eagle3 ):
27022719
2720+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
2721+ or get_sm_version () == 121 ):
2722+ pytest .skip (
2723+ "MOE TRTLLM backend does not support SM version 120 or 121" )
2724+
27032725 pytorch_config = dict (
27042726 disable_overlap_scheduler = not overlap_scheduler ,
27052727 cuda_graph_config = CudaGraphConfig () if cuda_graph else None ,
0 commit comments