diff --git a/tests/attention/test_trtllm_gen_attention.py b/tests/attention/test_trtllm_gen_attention.py index 6bd2065b3d..7ce086a6ac 100755 --- a/tests/attention/test_trtllm_gen_attention.py +++ b/tests/attention/test_trtllm_gen_attention.py @@ -349,8 +349,8 @@ def test_trtllm_batch_prefill( max_kv_len, ): compute_capability = get_compute_capability(torch.device(device="cuda")) - if compute_capability[0] in [11, 12]: - pytest.skip("trtllm-gen does not support SM110/SM120/SM121 GPUs.") + if compute_capability[0] != 10: + pytest.skip("These tests are only guaranteed to work on SM100 and SM103 GPUs.") # Set up test parameters torch.manual_seed(0) head_dim = 128 @@ -918,8 +918,8 @@ def test_trtllm_gen_prefill_deepseek( batch_size, s_qo, s_kv, num_kv_heads, head_grp_size, causal ): compute_capability = get_compute_capability(torch.device(device="cuda")) - if compute_capability[0] in [11, 12]: - pytest.skip("trtllm-gen does not support SM110/SM120/SM121 GPUs.") + if compute_capability[0] != 10: + pytest.skip("These tests are only guaranteed to work on SM100 and SM103 GPUs.") if s_qo > s_kv: pytest.skip("s_qo > s_kv, skipping test as causal") diff --git a/tests/attention/test_trtllm_gen_mla.py b/tests/attention/test_trtllm_gen_mla.py index 72a4a61cdf..db6d827d67 100644 --- a/tests/attention/test_trtllm_gen_mla.py +++ b/tests/attention/test_trtllm_gen_mla.py @@ -33,8 +33,8 @@ def test_trtllm_batch_decode_mla( enable_pdl: bool, ): compute_capability = get_compute_capability(torch.device(device="cuda")) - if compute_capability[0] in [11, 12]: - pytest.skip("trtllm-gen does not support SM110/SM120/SM121 GPUs.") + if compute_capability[0] != 10: + pytest.skip("These tests are only guaranteed to work on SM100 and SM103 GPUs.") if dynamic_scale and dtype != torch.float8_e4m3fn: pytest.skip("Dynamic scale is not supported for non-fp8 dtype") diff --git a/tests/gemm/test_bmm_fp8.py b/tests/gemm/test_bmm_fp8.py index f750edddc2..326fd2ab9c 100644 --- a/tests/gemm/test_bmm_fp8.py +++ b/tests/gemm/test_bmm_fp8.py @@ -17,7 +17,8 @@ @pytest.mark.parametrize("backend", ["cudnn", "cublas", "cutlass", "auto"]) @pytest.mark.parametrize("auto_tuning", [True, False]) def test_bmm_fp8(b, m, n, k, input_dtype, mat2_dtype, res_dtype, backend, auto_tuning): - if get_compute_capability(torch.device("cuda"))[0] == 12 and backend in [ + compute_capability = get_compute_capability(torch.device("cuda")) + if compute_capability[0] == 12 and backend in [ "cutlass", "auto", ]: @@ -25,6 +26,10 @@ def test_bmm_fp8(b, m, n, k, input_dtype, mat2_dtype, res_dtype, backend, auto_t pytest.xfail( "Not all test cases for CUTLASS bmm_fp8 on SM120/121 are passing at this moment" ) + if backend == "cutlass" and compute_capability[0] not in [10, 11, 12]: + pytest.skip( + "bmm_fp8 with cutlass backend is only supported on SM100, SM110, and SM120/121 GPUs." + ) if input_dtype == torch.float8_e5m2 and mat2_dtype == torch.float8_e5m2: pytest.skip("Invalid combination: both input and mat2 are e5m2") if input_dtype == torch.float8_e5m2 or mat2_dtype == torch.float8_e5m2: diff --git a/tests/gemm/test_groupwise_scaled_gemm_fp8.py b/tests/gemm/test_groupwise_scaled_gemm_fp8.py index e42f35dc65..641f2fdeee 100755 --- a/tests/gemm/test_groupwise_scaled_gemm_fp8.py +++ b/tests/gemm/test_groupwise_scaled_gemm_fp8.py @@ -43,6 +43,11 @@ def test_fp8_blockscale_gemm( scale_major_mode, out_dtype, ): + compute_capability = get_compute_capability(torch.device(device="cuda")) + if compute_capability[0] not in [10, 11, 12]: + pytest.skip( + "gemm_fp8_nt_blockscaled is only supported on SM100/103, SM110, and SM120/121 GPUs." + ) torch.random.manual_seed(0) tile_size = 128 @@ -83,8 +88,8 @@ def test_fp8_groupwise_gemm( scale_major_mode, backend, ): + compute_capability = get_compute_capability(torch.device(device="cuda")) if backend == "trtllm": - compute_capability = get_compute_capability(torch.device(device="cuda")) if compute_capability[0] != 10: pytest.skip( "gemm_fp8_nt_groupwise is only supported on SM100, SM103 in trtllm backend." @@ -93,7 +98,10 @@ def test_fp8_groupwise_gemm( pytest.skip("trtllm only supports MN scale_major_mode") if k < 256: pytest.skip("k < 256") - + if backend == "cutlass" and compute_capability[0] not in [10, 11, 12]: + pytest.skip( + "gemm_fp8_nt_groupwise with cutlass backend is only supported on SM100/103, SM110, and SM120/121 GPUs." + ) torch.random.manual_seed(0) tile_size = 128 out_dtype = torch.bfloat16 @@ -146,12 +154,17 @@ def test_fp8_groupwise_group_gemm( scale_major_mode, out_dtype, ): - if group_size > 1 and torch.cuda.get_device_capability()[0] in [ + compute_capability = get_compute_capability(torch.device(device="cuda")) + if group_size > 1 and compute_capability[0] in [ 12, ]: pytest.skip( "group_gemm_fp8_nt_groupwise has correctness issues for num_groups > 1 on SM120/121" ) + if compute_capability[0] not in [10, 12]: + pytest.skip( + "group_gemm_fp8_nt_groupwise is only supported on SM100/103, and SM120/121 GPUs." + ) torch.random.manual_seed(0) tile_size = 128 diff --git a/tests/gemm/test_groupwise_scaled_gemm_mxfp4.py b/tests/gemm/test_groupwise_scaled_gemm_mxfp4.py index fefe3f3e28..4b37cde14c 100644 --- a/tests/gemm/test_groupwise_scaled_gemm_mxfp4.py +++ b/tests/gemm/test_groupwise_scaled_gemm_mxfp4.py @@ -256,8 +256,10 @@ def test_mxfp8_mxfp4_groupwise_group_gemm( ): compute_capability = get_compute_capability(torch.device(device="cuda")) # TODO: We need to add gemm_mxfp4_nt_groupwise support for sm120/121 at some point. - if compute_capability[0] == 12: - pytest.skip("gemm_mxfp4_nt_groupwise is not supported in SM120/SM121.") + if compute_capability[0] not in [10]: + pytest.skip( + "gemm_mxfp4_nt_groupwise is only supported on SM100 and SM103 GPUs." + ) torch.random.manual_seed(0) tile_size = 32 alignment_n = 8 diff --git a/tests/gemm/test_mm_fp4.py b/tests/gemm/test_mm_fp4.py index 95975f6031..4c90bf3fe9 100644 --- a/tests/gemm/test_mm_fp4.py +++ b/tests/gemm/test_mm_fp4.py @@ -27,6 +27,12 @@ def test_mm_fp4( use_nvfp4 = fp4_type == "nvfp4" compute_capability = get_compute_capability(torch.device(device="cuda")) + compute_capability_number = compute_capability[0] * 10 + compute_capability[1] + if not mm_fp4.is_backend_supported(backend, compute_capability_number): + pytest.skip( + f"Skipping test for {backend} because it is not supported on compute capability {compute_capability_number}." + ) + if backend == "trtllm": if res_dtype == torch.float16: pytest.skip("Skipping test for trtllm fp4 with float16") diff --git a/tests/moe/test_trtllm_gen_fused_moe.py b/tests/moe/test_trtllm_gen_fused_moe.py index 03ef12d31c..a093d4c0aa 100644 --- a/tests/moe/test_trtllm_gen_fused_moe.py +++ b/tests/moe/test_trtllm_gen_fused_moe.py @@ -2035,8 +2035,8 @@ def test_moe_quantization_classes( Each quantization class clearly shows which precision is being used. """ compute_capability = get_compute_capability(torch.device(device="cuda")) - if compute_capability[0] in [11, 12]: - pytest.skip("trtllm-gen does not support SM110/SM120/SM121 GPUs.") + if compute_capability[0] not in [10]: + pytest.skip("These tests are only guaranteed to work on SM100 and SM103 GPUs.") # Skip incompatible combinations if gated_act_type == GatedActType.GeGlu and ( type(moe_impl) is not FP4Moe