2121include_test_map  =  {
2222    "gpt" : ("Gpt[^j]" , ),
2323    "gpt_executor" : ("GptExecutor" , ),
24+     "gpt_session" : ("GptSession" , ),
25+     "gpt_tests" : ("GptTests" , ),
2426    "gptj" : ("Gptj" , ),
2527    "llama" : ("Llama" , ),
2628    "chatglm" : ("ChatGlm" , ),
@@ -70,6 +72,16 @@ def generate_result_file_name(test_list: List[str],
7072        yield  "fp8" 
7173
7274
75+ def  generate_excluded_test_list (test_list ):
76+     if  "gpt"  in  test_list :
77+         if  "gpt_session"  not  in test_list :
78+             yield  "GptSession" 
79+         if  "gpt_executor"  not  in test_list :
80+             yield  "GptExecutor" 
81+         if  "gpt_tests"  not  in test_list :
82+             yield  "GptTests" 
83+ 
84+ 
7385def  find_dir_containing (files : Sequence [str ],
7486                        start_dir : Optional [_pl .Path ] =  None ) ->  _pl .Path :
7587    if  start_dir  is  None :
@@ -621,8 +633,8 @@ def prepare_model_tests(model_name: str,
621633            beams_arg  =  ['--beams' , '1,2' ]
622634        model_name  =  'enc_dec' 
623635
624-     # share the same script for gpt and gpt_executor  
625-     if  model_name  ==  'gpt_executor' :
636+     # share the same script for gpt related tests  
637+     if  model_name  ==  'gpt_executor'   or   model_name   ==   'gpt_session'   or   model_name   ==   'gpt_tests' :
626638        model_name  =  'gpt' 
627639
628640    build_engines  =  [
@@ -716,8 +728,7 @@ def run_single_gpu_tests(build_dir: _pl.Path,
716728
717729    excluded_tests  =  ["FP8" ] if  not  run_fp8  else  []
718730
719-     if  "gpt"  in  test_list  and  "gpt_executor"  not  in test_list :
720-         excluded_tests .append ("GptExecutor" )
731+     excluded_tests .extend (list (generate_excluded_test_list (test_list )))
721732
722733    ctest  =  ["ctest" , "--output-on-failure" , "--output-junit" , resultFileName ]
723734
@@ -726,7 +737,15 @@ def run_single_gpu_tests(build_dir: _pl.Path,
726737        if  excluded_tests :
727738            ctest .extend (["-E" , "|" .join (excluded_tests )])
728739
729-         parallel  =  default_test_parallel 
740+         gpt_tests  =  {"gpt" , "gpt_session" , "gpt_tests" , "gpt_executor" }
741+ 
742+         # gpt* tests are not parallelized as it would cause OOM because kv cache memory allocations 
743+         # exist in multiple running tests 
744+         if  gpt_tests .intersection (test_list ):
745+             parallel  =  1 
746+         else :
747+             parallel  =  default_test_parallel 
748+ 
730749        if  parallel_override  :=  _os .environ .get ("LLM_TEST_PARALLEL_OVERRIDE" ,
731750                                                None ):
732751            parallel  =  int (parallel_override )
0 commit comments