@@ -81,7 +81,7 @@ def send_batch_request(endpoint, prompts, gen_tokens, request_id):
8181 }
8282 data = {"text" : prompts , "sampling_params" : sampling_params }
8383
84- start_time = time .time ()
84+ start_time = time .perf_counter ()
8585 try :
8686 response = requests .post (
8787 endpoint .base_url + "/generate" , json = data , timeout = 3600
@@ -90,7 +90,7 @@ def send_batch_request(endpoint, prompts, gen_tokens, request_id):
9090 error = response .json ()
9191 raise RuntimeError (f"Request { request_id } failed: { error } " )
9292 result = response .json ()
93- elapsed_time = (time .time () - start_time ) * 1000 # Convert to ms
93+ elapsed_time = (time .perf_counter () - start_time ) * 1000 # Convert to ms
9494 avg_per_prompt = elapsed_time / len (prompts ) if prompts else 0
9595 return request_id , elapsed_time , avg_per_prompt , True , len (prompts )
9696 except Exception as e :
@@ -104,7 +104,7 @@ def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
104104 num_requests = len (batched_prompts )
105105
106106 # Record start time for total latency
107- benchmark_start_time = time .time ()
107+ benchmark_start_time = time .perf_counter ()
108108
109109 for i , batch_prompts in enumerate (batched_prompts ):
110110 request_id = i + 1
@@ -119,7 +119,7 @@ def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
119119 results .append (result )
120120
121121 # Calculate total latency
122- total_latency = (time .time () - benchmark_start_time ) * 1000 # Convert to ms
122+ total_latency = (time .perf_counter () - benchmark_start_time ) * 1000 # Convert to ms
123123
124124 return results , total_latency
125125
0 commit comments