@@ -56,8 +56,8 @@ class BenchmarkMetrics:
56
56
total_input : int
57
57
total_output : int
58
58
request_throughput : float
59
- input_throughput : float
60
59
output_throughput : float
60
+ total_token_throughput : float
61
61
mean_ttft_ms : float
62
62
median_ttft_ms : float
63
63
std_ttft_ms : float
@@ -283,8 +283,8 @@ def calculate_metrics(
283
283
total_input = total_input ,
284
284
total_output = sum (actual_output_lens ),
285
285
request_throughput = completed / dur_s ,
286
- input_throughput = total_input / dur_s ,
287
286
output_throughput = sum (actual_output_lens ) / dur_s ,
287
+ total_token_throughput = (total_input + sum (actual_output_lens )) / dur_s ,
288
288
mean_ttft_ms = np .mean (ttfts or 0 ) *
289
289
1000 , # ttfts is empty if streaming is not supported by backend
290
290
std_ttft_ms = np .std (ttfts or 0 ) * 1000 ,
@@ -426,19 +426,19 @@ async def benchmark(
426
426
metrics .total_output ))
427
427
print ("{:<40} {:<10.2f}" .format ("Request throughput (req/s):" ,
428
428
metrics .request_throughput ))
429
- print ("{:<40} {:<10.2f}" .format ("Input token throughput (tok/s):" ,
430
- metrics .input_throughput ))
431
429
print ("{:<40} {:<10.2f}" .format ("Output token throughput (tok/s):" ,
432
430
metrics .output_throughput ))
431
+ print ("{:<40} {:<10.2f}" .format ("Total Token throughput (tok/s):" ,
432
+ metrics .total_token_throughput ))
433
433
434
434
result = {
435
435
"duration" : benchmark_duration ,
436
436
"completed" : metrics .completed ,
437
437
"total_input_tokens" : metrics .total_input ,
438
438
"total_output_tokens" : metrics .total_output ,
439
439
"request_throughput" : metrics .request_throughput ,
440
- "input_throughput" : metrics .input_throughput ,
441
440
"output_throughput" : metrics .output_throughput ,
441
+ "total_token_throughput" : metrics .total_token_throughput ,
442
442
"input_lens" : [output .prompt_len for output in outputs ],
443
443
"output_lens" : actual_output_lens ,
444
444
"ttfts" : [output .ttft for output in outputs ],
0 commit comments