diff --git a/tests/kernels/moe/test_unquantized_backend_selection.py b/tests/kernels/moe/test_unquantized_backend_selection.py index fcb79ee8f296..bf5a547fe3df 100644 --- a/tests/kernels/moe/test_unquantized_backend_selection.py +++ b/tests/kernels/moe/test_unquantized_backend_selection.py @@ -9,6 +9,7 @@ UnquantizedMoeBackend, select_unquantized_moe_backend, ) +from vllm.platforms import current_platform @pytest.mark.parametrize( @@ -65,6 +66,9 @@ def test_select_default_backend_by_platform( "vllm.model_executor.layers.fused_moe.oracle.unquantized.is_supported_config_trtllm_bf16", return_value=(True, None), ) +@pytest.mark.skipif( + not current_platform.is_cuda(), reason="Only supported on NVIDIA platforms." +) def test_select_cuda_flashinfer_trtllm_backend( mock_has_flashinfer, mock_is_supported_trtllm, monkeypatch ): @@ -101,6 +105,9 @@ def test_select_cuda_flashinfer_trtllm_backend( "vllm.model_executor.layers.fused_moe.oracle.unquantized.is_supported_config_trtllm_bf16", return_value=(False, None), ) +@pytest.mark.skipif( + not current_platform.is_cuda(), reason="Only supported on NVIDIA platforms." +) def test_select_cuda_flashinfer_cutlass_backend( mock_has_flashinfer, mock_is_supported_trtllm, monkeypatch ):