From a3086ad7056dc5fbae742c2e87a11c93c52359aa Mon Sep 17 00:00:00 2001 From: Chuang Zhu <111838961+chuangz0@users.noreply.github.com> Date: Mon, 21 Apr 2025 06:34:15 +0000 Subject: [PATCH 1/2] skip disagg deepseek test if sm!=90 Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com> --- .../defs/disaggregated/test_disaggregated.py | 50 +++++++++++++++++++ .../test_disaggregated_single_gpu.py | 6 ++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index 4ca1d0104a4..5c8a126cf02 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -18,6 +18,8 @@ import pytest from defs.trt_test_alternative import check_call +from tensorrt_llm._utils import get_sm_version + @pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'], indirect=True) @@ -145,6 +147,10 @@ def test_disaggregated_overlap(disaggregated_test_root, llm_venv, def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -166,6 +172,10 @@ def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root, def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -187,6 +197,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu( def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -209,6 +223,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -229,6 +247,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -249,6 +271,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( def test_disaggregated_deepseek_v3_lite_fp8_attention_dp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -271,6 +297,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp( def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap( disaggregated_test_root, llm_venv, disaggregated_example_root, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -292,6 +322,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap( def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -314,6 +348,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph( def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -336,6 +374,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph( def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -358,6 +400,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one( def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -380,6 +426,10 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp( def test_disaggregated_deepseek_v3_lite_fp8_tp1_attention_dp_overlap_one_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py index 5280d2c134c..3d11689f143 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py +++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py @@ -11,7 +11,7 @@ from tensorrt_llm import DisaggregatedParams, SamplingParams from tensorrt_llm._torch import LLM from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig -from tensorrt_llm._utils import set_mpi_comm +from tensorrt_llm._utils import get_sm_version, set_mpi_comm from tensorrt_llm.llmapi import KvCacheConfig, MpiCommSession cloudpickle.register_pickle_by_value(sys.modules[__name__]) @@ -204,6 +204,10 @@ def test_disaggregated_simple_llama(model, generation_overlap, @pytest.mark.parametrize("enable_cuda_graph", [False, True]) def test_disaggregated_simple_deepseek(model, generation_overlap, enable_cuda_graph): + if (get_sm_version() != 90): + pytest.skip( + f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" + ) verify_disaggregated( model, generation_overlap, enable_cuda_graph, "What is the capital of Germany?", From 141127c43129f81a11ffee8d6ab369659d73d520 Mon Sep 17 00:00:00 2001 From: Chuang Zhu <111838961+chuangz0@users.noreply.github.com> Date: Tue, 22 Apr 2025 10:10:28 +0000 Subject: [PATCH 2/2] skip_no_hopper Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com> --- tests/integration/defs/conftest.py | 3 + .../defs/disaggregated/test_disaggregated.py | 65 +++++-------------- .../test_disaggregated_single_gpu.py | 8 +-- 3 files changed, 21 insertions(+), 55 deletions(-) diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py index 70f83667679..043f822c0da 100644 --- a/tests/integration/defs/conftest.py +++ b/tests/integration/defs/conftest.py @@ -1912,6 +1912,9 @@ def get_sm_version(): skip_no_nvls = pytest.mark.skipif(not ipc_nvls_supported(), reason="NVLS is not supported") +skip_no_hopper = pytest.mark.skipif( + get_sm_version() != 90, + reason="This test is only supported in Hopper architecture") def skip_fp8_pre_ada(use_fp8): diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index 5c8a126cf02..10b950f4faa 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -16,10 +16,9 @@ import os import pytest +from defs.conftest import skip_no_hopper from defs.trt_test_alternative import check_call -from tensorrt_llm._utils import get_sm_version - @pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'], indirect=True) @@ -142,15 +141,12 @@ def test_disaggregated_overlap(disaggregated_test_root, llm_venv, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -167,15 +163,12 @@ def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -192,15 +185,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -217,16 +207,14 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) + src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -242,15 +230,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, check_call(cmd, shell=True, env=env, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -266,15 +251,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( check_call(cmd, shell=True, env=env, cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -292,15 +274,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap( disaggregated_test_root, llm_venv, disaggregated_example_root, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -317,15 +296,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -343,15 +319,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -369,15 +342,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -395,15 +365,12 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", @@ -421,15 +388,13 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp( cwd=llm_venv.get_working_directory()) +@skip_no_hopper @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_tp1_attention_dp_overlap_one_mtp( disaggregated_test_root, disaggregated_example_root, llm_venv, deepseek_v3_model_root): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) + src_dst_dict = { deepseek_v3_model_root: f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py index 3d11689f143..dc9ae6376df 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py +++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py @@ -5,13 +5,14 @@ import cloudpickle import pytest +from defs.conftest import skip_no_hopper from mpi4py import MPI from mpi4py.futures import MPIPoolExecutor from tensorrt_llm import DisaggregatedParams, SamplingParams from tensorrt_llm._torch import LLM from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig -from tensorrt_llm._utils import get_sm_version, set_mpi_comm +from tensorrt_llm._utils import set_mpi_comm from tensorrt_llm.llmapi import KvCacheConfig, MpiCommSession cloudpickle.register_pickle_by_value(sys.modules[__name__]) @@ -199,15 +200,12 @@ def test_disaggregated_simple_llama(model, generation_overlap, ]) +@skip_no_hopper @pytest.mark.parametrize("model", ["DeepSeek-V3-Lite-fp8/fp8"]) @pytest.mark.parametrize("generation_overlap", [False, True]) @pytest.mark.parametrize("enable_cuda_graph", [False, True]) def test_disaggregated_simple_deepseek(model, generation_overlap, enable_cuda_graph): - if (get_sm_version() != 90): - pytest.skip( - f"DeepSeek FP8 is not supported in this SM version {get_sm_version()}" - ) verify_disaggregated( model, generation_overlap, enable_cuda_graph, "What is the capital of Germany?",