@@ -1328,6 +1328,11 @@ def test_nvfp4_4gpus_online_eplb(self, fp8kv):
13281328 @parametrize_with_ids ("moe_backend" , ["CUTLASS" , "TRTLLM" ])
13291329 def test_nvfp4 (self , fp8kv , attention_dp , cuda_graph , overlap_scheduler ,
13301330 torch_compile , mtp_nextn , moe_backend ):
1331+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1332+ or get_sm_version () == 121 ):
1333+ pytest .skip (
1334+ "MOE TRTLLM backend does not support SM version 120 or 121" )
1335+
13311336 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13321337 torch_compile_config = TorchCompileConfig (
13331338 enable_fullgraph = True ,
@@ -1375,8 +1380,10 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
13751380 torch_compile , mtp_nextn , moe_backend ):
13761381 if torch_compile and pp_size > 1 :
13771382 pytest .skip ("PP with torch.compile is not supported yet." )
1378- if moe_backend == "TRTLLM" and get_sm_version () == 120 :
1379- pytest .skip ("MOE TRTLLM backend does not support SM version 120" )
1383+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1384+ or get_sm_version () == 121 ):
1385+ pytest .skip (
1386+ "MOE TRTLLM backend does not support SM version 120 or 121" )
13801387 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.75 )
13811388 # Picewise Cuda Graph cannot be enabled for nvfp4 attention dp.
13821389 torch_compile_config = TorchCompileConfig (
@@ -1591,6 +1598,11 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
15911598 def test_nvfp4_multi_gpus (self , tp_size , pp_size , ep_size , mtp_nextn , fp8kv ,
15921599 attention_dp , cuda_graph , overlap_scheduler ,
15931600 max_batch_size , moe_backend ):
1601+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
1602+ or get_sm_version () == 121 ):
1603+ pytest .skip (
1604+ "MOE TRTLLM backend does not support SM version 120 or 121" )
1605+
15941606 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.70 )
15951607 pytorch_config = dict (
15961608 disable_overlap_scheduler = not overlap_scheduler ,
@@ -2148,6 +2160,11 @@ def test_nvfp4(
21482160 torch_compile ,
21492161 ):
21502162
2163+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
2164+ or get_sm_version () == 121 ):
2165+ pytest .skip (
2166+ "MOE TRTLLM backend does not support SM version 120 or 121" )
2167+
21512168 torch_compile_config = TorchCompileConfig (
21522169 enable_fullgraph = True ,
21532170 enable_piecewise_cuda_graph = cuda_graph and not attention_dp ,
@@ -2268,6 +2285,11 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
22682285 def test_nvfp4 (self , tp_size , pp_size , ep_size , attention_dp , cuda_graph ,
22692286 overlap_scheduler , moe_backend , eagle3 ):
22702287
2288+ if moe_backend == "TRTLLM" and (get_sm_version () == 120
2289+ or get_sm_version () == 121 ):
2290+ pytest .skip (
2291+ "MOE TRTLLM backend does not support SM version 120 or 121" )
2292+
22712293 pytorch_config = dict (
22722294 disable_overlap_scheduler = not overlap_scheduler ,
22732295 cuda_graph_config = CudaGraphConfig () if cuda_graph else None ,
0 commit comments