Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ upload_to_buildkite() {
$BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
}

clear_torch_compile_cache() {
# https://github.com/vllm-project/vllm/issues/13392
rm -rf ~/.cache/vllm/torch_compile_cache
}

run_latency_tests() {
# run latency tests using `benchmark_latency.py`
# $1: a json file specifying latency test cases
Expand Down Expand Up @@ -151,6 +156,8 @@ run_latency_tests() {
continue
fi

clear_torch_compile_cache

latency_command="python3 benchmark_latency.py \
--output-json $RESULTS_FOLDER/${test_name}.json \
$latency_args"
Expand Down Expand Up @@ -209,6 +216,8 @@ run_throughput_tests() {
continue
fi

clear_torch_compile_cache

throughput_command="python3 benchmark_throughput.py \
--output-json $RESULTS_FOLDER/${test_name}.json \
$throughput_args"
Expand Down Expand Up @@ -279,6 +288,8 @@ run_serving_tests() {
continue
fi

clear_torch_compile_cache

server_command="python3 \
-m vllm.entrypoints.openai.api_server \
$server_args"
Expand Down