2121include_test_map = {
2222 "gpt" : ("Gpt[^j]" , ),
2323 "gpt_executor" : ("GptExecutor" , ),
24+ "gpt_session" : ("GptSession" , ),
25+ "gpt_tests" : ("GptTests" , ),
2426 "gptj" : ("Gptj" , ),
2527 "llama" : ("Llama" , ),
2628 "chatglm" : ("ChatGlm" , ),
@@ -70,6 +72,16 @@ def generate_result_file_name(test_list: List[str],
7072 yield "fp8"
7173
7274
75+ def generate_excluded_test_list (test_list ):
76+ if "gpt" in test_list :
77+ if "gpt_session" not in test_list :
78+ yield "GptSession"
79+ if "gpt_executor" not in test_list :
80+ yield "GptExecutor"
81+ if "gpt_tests" not in test_list :
82+ yield "GptTests"
83+
84+
7385def find_dir_containing (files : Sequence [str ],
7486 start_dir : Optional [_pl .Path ] = None ) -> _pl .Path :
7587 if start_dir is None :
@@ -621,8 +633,8 @@ def prepare_model_tests(model_name: str,
621633 beams_arg = ['--beams' , '1,2' ]
622634 model_name = 'enc_dec'
623635
624- # share the same script for gpt and gpt_executor
625- if model_name == 'gpt_executor' :
636+ # share the same script for gpt related tests
637+ if model_name == 'gpt_executor' or model_name == 'gpt_session' or model_name == 'gpt_tests' :
626638 model_name = 'gpt'
627639
628640 build_engines = [
@@ -716,8 +728,7 @@ def run_single_gpu_tests(build_dir: _pl.Path,
716728
717729 excluded_tests = ["FP8" ] if not run_fp8 else []
718730
719- if "gpt" in test_list and "gpt_executor" not in test_list :
720- excluded_tests .append ("GptExecutor" )
731+ excluded_tests .extend (list (generate_excluded_test_list (test_list )))
721732
722733 ctest = ["ctest" , "--output-on-failure" , "--output-junit" , resultFileName ]
723734
@@ -726,7 +737,15 @@ def run_single_gpu_tests(build_dir: _pl.Path,
726737 if excluded_tests :
727738 ctest .extend (["-E" , "|" .join (excluded_tests )])
728739
729- parallel = default_test_parallel
740+ gpt_tests = {"gpt" , "gpt_session" , "gpt_tests" , "gpt_executor" }
741+
742+ # gpt* tests are not parallelized as it would cause OOM because kv cache memory allocations
743+ # exist in multiple running tests
744+ if gpt_tests .intersection (test_list ):
745+ parallel = 1
746+ else :
747+ parallel = default_test_parallel
748+
730749 if parallel_override := _os .environ .get ("LLM_TEST_PARALLEL_OVERRIDE" ,
731750 None ):
732751 parallel = int (parallel_override )
0 commit comments