Skip to content

Commit 7e4022a

Browse files
skip trtllm moe backend for sm120
Signed-off-by: Pamela <[email protected]>
1 parent 6fda8dd commit 7e4022a

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,11 @@ def test_nvfp4_4gpus_online_eplb(self, fp8kv):
13281328
@parametrize_with_ids("moe_backend", ["CUTLASS", "TRTLLM"])
13291329
def test_nvfp4(self, fp8kv, attention_dp, cuda_graph, overlap_scheduler,
13301330
torch_compile, mtp_nextn, moe_backend):
1331+
if moe_backend == "TRTLLM" and (get_sm_version() == 120
1332+
or get_sm_version() == 121):
1333+
pytest.skip(
1334+
"MOE TRTLLM backend does not support SM version 120 or 121")
1335+
13311336
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
13321337
torch_compile_config = TorchCompileConfig(
13331338
enable_fullgraph=True,
@@ -1375,8 +1380,10 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
13751380
torch_compile, mtp_nextn, moe_backend):
13761381
if torch_compile and pp_size > 1:
13771382
pytest.skip("PP with torch.compile is not supported yet.")
1378-
if moe_backend == "TRTLLM" and get_sm_version() == 120:
1379-
pytest.skip("MOE TRTLLM backend does not support SM version 120")
1383+
if moe_backend == "TRTLLM" and (get_sm_version() == 120
1384+
or get_sm_version() == 121):
1385+
pytest.skip(
1386+
"MOE TRTLLM backend does not support SM version 120 or 121")
13801387
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
13811388
# Picewise Cuda Graph cannot be enabled for nvfp4 attention dp.
13821389
torch_compile_config = TorchCompileConfig(
@@ -1591,6 +1598,11 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
15911598
def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
15921599
attention_dp, cuda_graph, overlap_scheduler,
15931600
max_batch_size, moe_backend):
1601+
if moe_backend == "TRTLLM" and (get_sm_version() == 120
1602+
or get_sm_version() == 121):
1603+
pytest.skip(
1604+
"MOE TRTLLM backend does not support SM version 120 or 121")
1605+
15941606
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.70)
15951607
pytorch_config = dict(
15961608
disable_overlap_scheduler=not overlap_scheduler,
@@ -2148,6 +2160,11 @@ def test_nvfp4(
21482160
torch_compile,
21492161
):
21502162

2163+
if moe_backend == "TRTLLM" and (get_sm_version() == 120
2164+
or get_sm_version() == 121):
2165+
pytest.skip(
2166+
"MOE TRTLLM backend does not support SM version 120 or 121")
2167+
21512168
torch_compile_config = TorchCompileConfig(
21522169
enable_fullgraph=True,
21532170
enable_piecewise_cuda_graph=cuda_graph and not attention_dp,
@@ -2268,6 +2285,11 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
22682285
def test_nvfp4(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
22692286
overlap_scheduler, moe_backend, eagle3):
22702287

2288+
if moe_backend == "TRTLLM" and (get_sm_version() == 120
2289+
or get_sm_version() == 121):
2290+
pytest.skip(
2291+
"MOE TRTLLM backend does not support SM version 120 or 121")
2292+
22712293
pytorch_config = dict(
22722294
disable_overlap_scheduler=not overlap_scheduler,
22732295
cuda_graph_config=CudaGraphConfig() if cuda_graph else None,

0 commit comments

Comments
 (0)