Skip to content

Commit 77d9e51

Browse files
comaniacmgoin
andauthored
[MISC] Replace input token throughput with total token throughput (vllm-project#8164)
Co-authored-by: Michael Goin <[email protected]>
1 parent e02ce49 commit 77d9e51

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

benchmarks/benchmark_serving.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ class BenchmarkMetrics:
5656
total_input: int
5757
total_output: int
5858
request_throughput: float
59-
input_throughput: float
6059
output_throughput: float
60+
total_token_throughput: float
6161
mean_ttft_ms: float
6262
median_ttft_ms: float
6363
std_ttft_ms: float
@@ -283,8 +283,8 @@ def calculate_metrics(
283283
total_input=total_input,
284284
total_output=sum(actual_output_lens),
285285
request_throughput=completed / dur_s,
286-
input_throughput=total_input / dur_s,
287286
output_throughput=sum(actual_output_lens) / dur_s,
287+
total_token_throughput=(total_input + sum(actual_output_lens)) / dur_s,
288288
mean_ttft_ms=np.mean(ttfts or 0) *
289289
1000, # ttfts is empty if streaming is not supported by backend
290290
std_ttft_ms=np.std(ttfts or 0) * 1000,
@@ -426,19 +426,19 @@ async def benchmark(
426426
metrics.total_output))
427427
print("{:<40} {:<10.2f}".format("Request throughput (req/s):",
428428
metrics.request_throughput))
429-
print("{:<40} {:<10.2f}".format("Input token throughput (tok/s):",
430-
metrics.input_throughput))
431429
print("{:<40} {:<10.2f}".format("Output token throughput (tok/s):",
432430
metrics.output_throughput))
431+
print("{:<40} {:<10.2f}".format("Total Token throughput (tok/s):",
432+
metrics.total_token_throughput))
433433

434434
result = {
435435
"duration": benchmark_duration,
436436
"completed": metrics.completed,
437437
"total_input_tokens": metrics.total_input,
438438
"total_output_tokens": metrics.total_output,
439439
"request_throughput": metrics.request_throughput,
440-
"input_throughput": metrics.input_throughput,
441440
"output_throughput": metrics.output_throughput,
441+
"total_token_throughput": metrics.total_token_throughput,
442442
"input_lens": [output.prompt_len for output in outputs],
443443
"output_lens": actual_output_lens,
444444
"ttfts": [output.ttft for output in outputs],

0 commit comments

Comments
 (0)