Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions tests/integration/defs/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1912,6 +1912,9 @@ def get_sm_version():

skip_no_nvls = pytest.mark.skipif(not ipc_nvls_supported(),
reason="NVLS is not supported")
skip_no_hopper = pytest.mark.skipif(
get_sm_version() != 90,
reason="This test is only supported in Hopper architecture")


def skip_fp8_pre_ada(use_fp8):
Expand Down
15 changes: 15 additions & 0 deletions tests/integration/defs/disaggregated/test_disaggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os

import pytest
from defs.conftest import skip_no_hopper
from defs.trt_test_alternative import check_call


Expand Down Expand Up @@ -140,6 +141,7 @@ def test_disaggregated_overlap(disaggregated_test_root, llm_venv,
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root,
Expand All @@ -161,6 +163,7 @@ def test_disaggregated_deepseek_v3_lite_fp8(disaggregated_test_root,
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu(
Expand All @@ -182,6 +185,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp(
Expand All @@ -203,12 +207,14 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root,
disaggregated_example_root,
llm_venv,
deepseek_v3_model_root):

src_dst_dict = {
deepseek_v3_model_root:
f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8",
Expand All @@ -224,6 +230,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root,
check_call(cmd, shell=True, env=env, cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu(
Expand All @@ -244,6 +251,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu(
check_call(cmd, shell=True, env=env, cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_attention_dp(
Expand All @@ -266,6 +274,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap(
Expand All @@ -287,6 +296,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph(
Expand All @@ -309,6 +319,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_overlap_cuda_graph(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph(
Expand All @@ -331,6 +342,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one(
Expand All @@ -353,6 +365,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp(
Expand All @@ -375,11 +388,13 @@ def test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp(
cwd=llm_venv.get_working_directory())


@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_tp1_attention_dp_overlap_one_mtp(
disaggregated_test_root, disaggregated_example_root, llm_venv,
deepseek_v3_model_root):

src_dst_dict = {
deepseek_v3_model_root:
f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import cloudpickle
import pytest
from defs.conftest import skip_no_hopper
from mpi4py import MPI
from mpi4py.futures import MPIPoolExecutor

Expand Down Expand Up @@ -199,6 +200,7 @@ def test_disaggregated_simple_llama(model, generation_overlap,
])


@skip_no_hopper
@pytest.mark.parametrize("model", ["DeepSeek-V3-Lite-fp8/fp8"])
@pytest.mark.parametrize("generation_overlap", [False, True])
@pytest.mark.parametrize("enable_cuda_graph", [False, True])
Expand Down