@@ -180,23 +180,23 @@ def latency_command(
180180 logger .info ("Preparing to run latency benchmark..." )
181181 # Parameters from CLI
182182 # Model, experiment, and engine params
183- dataset_path : Path = params .pop ("dataset" )
184- num_requests : int = params .pop ("num_requests" )
183+ dataset_path : Path = params .get ("dataset" )
184+ num_requests : int = params .get ("num_requests" )
185185 model : str = bench_env .model
186186 checkpoint_path : Path = bench_env .checkpoint_path or bench_env .model
187- engine_dir : Path = params .pop ("engine_dir" )
188- concurrency : int = params .pop ("concurrency" )
189- beam_width : int = params .pop ("beam_width" )
187+ engine_dir : Path = params .get ("engine_dir" )
188+ concurrency : int = params .get ("concurrency" )
189+ beam_width : int = params .get ("beam_width" )
190190 warmup : int = params .get ("warmup" )
191- modality : str = params .pop ("modality" )
192- max_input_len : int = params .pop ("max_input_len" )
193- max_seq_len : int = params .pop ("max_seq_len" )
191+ modality : str = params .get ("modality" )
192+ max_input_len : int = params .get ("max_input_len" )
193+ max_seq_len : int = params .get ("max_seq_len" )
194194 backend : str = params .get ("backend" )
195195 model_type = get_model_config (model , checkpoint_path ).model_type
196196
197197 # Runtime Options
198- kv_cache_percent = params .pop ("kv_cache_free_gpu_mem_fraction" )
199- medusa_choices = params .pop ("medusa_choices" )
198+ kv_cache_percent = params .get ("kv_cache_free_gpu_mem_fraction" )
199+ medusa_choices = params .get ("medusa_choices" )
200200
201201 # Reporting Options
202202 report_json : Path = params .pop ("report_json" )
0 commit comments