Skip to content

Commit 4f76a05

Browse files
[BugFix] Update python to python3 calls for image; fix prefix & input calculations. (#21391)
Signed-off-by: Eric Hanley <[email protected]> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent f154bb9 commit 4f76a05

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

benchmarks/auto_tune/auto_tune.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,12 @@ run_benchmark() {
126126
# get a basic qps by using request-rate inf
127127
bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_inf.txt"
128128
prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 ))
129-
python benchmarks/benchmark_serving.py \
129+
adjusted_input_len=$(( INPUT_LEN - prefix_len ))
130+
python3 benchmarks/benchmark_serving.py \
130131
--backend vllm \
131132
--model $MODEL \
132133
--dataset-name random \
133-
--random-input-len $INPUT_LEN \
134+
--random-input-len $adjusted_input_len \
134135
--random-output-len $OUTPUT_LEN \
135136
--ignore-eos \
136137
--disable-tqdm \
@@ -159,11 +160,11 @@ run_benchmark() {
159160
curl -X POST http://0.0.0.0:8004/reset_prefix_cache
160161
sleep 5
161162
bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt"
162-
python benchmarks/benchmark_serving.py \
163+
python3 benchmarks/benchmark_serving.py \
163164
--backend vllm \
164165
--model $MODEL \
165166
--dataset-name random \
166-
--random-input-len $INPUT_LEN \
167+
--random-input-len $adjusted_input_len \
167168
--random-output-len $OUTPUT_LEN \
168169
--ignore-eos \
169170
--disable-tqdm \

0 commit comments

Comments
 (0)