File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -126,11 +126,12 @@ run_benchmark() {
126126 # get a basic qps by using request-rate inf
127127 bm_log=" $LOG_FOLDER /bm_log_${max_num_seqs} _${max_num_batched_tokens} _requestrate_inf.txt"
128128 prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 ))
129- python benchmarks/benchmark_serving.py \
129+ adjusted_input_len=$(( INPUT_LEN - prefix_len ))
130+ python3 benchmarks/benchmark_serving.py \
130131 --backend vllm \
131132 --model $MODEL \
132133 --dataset-name random \
133- --random-input-len $INPUT_LEN \
134+ --random-input-len $adjusted_input_len \
134135 --random-output-len $OUTPUT_LEN \
135136 --ignore-eos \
136137 --disable-tqdm \
@@ -159,11 +160,11 @@ run_benchmark() {
159160 curl -X POST http://0.0.0.0:8004/reset_prefix_cache
160161 sleep 5
161162 bm_log=" $LOG_FOLDER /bm_log_${max_num_seqs} _${max_num_batched_tokens} _requestrate_${request_rate} .txt"
162- python benchmarks/benchmark_serving.py \
163+ python3 benchmarks/benchmark_serving.py \
163164 --backend vllm \
164165 --model $MODEL \
165166 --dataset-name random \
166- --random-input-len $INPUT_LEN \
167+ --random-input-len $adjusted_input_len \
167168 --random-output-len $OUTPUT_LEN \
168169 --ignore-eos \
169170 --disable-tqdm \
You can’t perform that action at this time.
0 commit comments