diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py index 70f83667679..043f822c0da 100644 --- a/tests/integration/defs/conftest.py +++ b/tests/integration/defs/conftest.py @@ -1912,6 +1912,9 @@ def get_sm_version(): skip_no_nvls = pytest.mark.skipif(not ipc_nvls_supported(), reason="NVLS is not supported") +skip_no_hopper = pytest.mark.skipif( + get_sm_version() != 90, + reason="This test is only supported in Hopper architecture") def skip_fp8_pre_ada(use_fp8): diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index 4ca1d0104a4..10b950f4faa 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -16,6 +16,7 @@ import os import pytest +from defs.conftest import skip_no_hopper from defs.trt_test_alternative import check_call @@ -140,6 +141,7 @@ def test_disaggregated_overlap(disaggregated_test_root, llm_venv, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root, @@ -161,6 +163,7 @@ def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu( @@ -182,6 +185,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp( @@ -203,12 +207,14 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -224,6 +230,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, check_call(cmd, shell=True, env=env, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( @@ -244,6 +251,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( check_call(cmd, shell=True, env=env, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp( @@ -266,6 +274,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap( @@ -287,6 +296,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph( @@ -309,6 +319,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph( @@ -331,6 +342,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one( @@ -353,6 +365,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp( @@ -375,11 +388,13 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_tp1_attention_dp_overlap_one_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py index 5280d2c134c..dc9ae6376df 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py +++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py @@ -5,6 +5,7 @@ import cloudpickle import pytest +from defs.conftest import skip_no_hopper from mpi4py import MPI from mpi4py.futures import MPIPoolExecutor @@ -199,6 +200,7 @@ def test_disaggregated_simple_llama(model, generation_overlap, ]) +@skip_no_hopper @pytest.mark.parametrize("model", ["DeepSeek-V3-Lite-fp8/fp8"]) @pytest.mark.parametrize("generation_overlap", [False, True]) @pytest.mark.parametrize("enable_cuda_graph", [False, True])