Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions tests/integration/defs/cpp_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
include_test_map = {
"gpt": ("Gpt[^j]", ),
"gpt_executor": ("GptExecutor", ),
"gpt_session": ("GptSession", ),
"gpt_tests": ("GptTests", ),
"gptj": ("Gptj", ),
"llama": ("Llama", ),
"chatglm": ("ChatGlm", ),
Expand Down Expand Up @@ -70,6 +72,16 @@ def generate_result_file_name(test_list: List[str],
yield "fp8"


def generate_excluded_test_list(test_list):
if "gpt" in test_list:
if "gpt_session" not in test_list:
yield "GptSession"
if "gpt_executor" not in test_list:
yield "GptExecutor"
if "gpt_tests" not in test_list:
yield "GptTests"


def find_dir_containing(files: Sequence[str],
start_dir: Optional[_pl.Path] = None) -> _pl.Path:
if start_dir is None:
Expand Down Expand Up @@ -621,8 +633,8 @@ def prepare_model_tests(model_name: str,
beams_arg = ['--beams', '1,2']
model_name = 'enc_dec'

# share the same script for gpt and gpt_executor
if model_name == 'gpt_executor':
# share the same script for gpt related tests
if model_name == 'gpt_executor' or model_name == 'gpt_session' or model_name == 'gpt_tests':
model_name = 'gpt'

build_engines = [
Expand Down Expand Up @@ -716,8 +728,7 @@ def run_single_gpu_tests(build_dir: _pl.Path,

excluded_tests = ["FP8"] if not run_fp8 else []

if "gpt" in test_list and "gpt_executor" not in test_list:
excluded_tests.append("GptExecutor")
excluded_tests.extend(list(generate_excluded_test_list(test_list)))

ctest = ["ctest", "--output-on-failure", "--output-junit", resultFileName]

Expand All @@ -726,7 +737,15 @@ def run_single_gpu_tests(build_dir: _pl.Path,
if excluded_tests:
ctest.extend(["-E", "|".join(excluded_tests)])

parallel = default_test_parallel
gpt_tests = {"gpt", "gpt_session", "gpt_tests", "gpt_executor"}

# gpt* tests are not parallelized as it would cause OOM because kv cache memory allocations
# exist in multiple running tests
if gpt_tests.intersection(test_list):
parallel = 1
else:
parallel = default_test_parallel

if parallel_override := _os.environ.get("LLM_TEST_PARALLEL_OVERRIDE",
None):
parallel = int(parallel_override)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/defs/test_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,8 @@ def test_unit_tests(build_google_tests, build_dir, lora_setup):
indirect=True)
@pytest.mark.parametrize("model", [
"bart", "chatglm", "eagle", "encoder", "enc_dec_language_adapter", "gpt",
"gpt_executor", "llama", "mamba", "medusa", "recurrentgemma", "redrafter",
"t5"
"gpt_executor", "gpt_session", "gpt_tests", "llama", "mamba", "medusa",
"recurrentgemma", "redrafter", "t5"
])
@pytest.mark.parametrize("run_fp8", [False, True], ids=["", "fp8"])
def test_model(build_google_tests, model, prepare_model, run_model_tests,
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_lists/test-db/l0_a30.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ l0_a30:
- test_cpp.py::test_unit_tests[80]
- test_cpp.py::test_model[gpt-80]
- test_cpp.py::test_model[gpt_executor-80]
- test_cpp.py::test_model[gpt_session-80]
- test_cpp.py::test_model[gpt_tests-80]
- test_cpp.py::test_benchmarks[gpt-80]
- condition:
ranges:
Expand Down