diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh index 9425cb07ec01..c32def586704 100644 --- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh +++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh @@ -118,6 +118,11 @@ upload_to_buildkite() { $BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*" } +clear_torch_compile_cache() { + # https://github.com/vllm-project/vllm/issues/13392 + rm -rf ~/.cache/vllm/torch_compile_cache +} + run_latency_tests() { # run latency tests using `benchmark_latency.py` # $1: a json file specifying latency test cases @@ -151,6 +156,8 @@ run_latency_tests() { continue fi + clear_torch_compile_cache + latency_command="python3 benchmark_latency.py \ --output-json $RESULTS_FOLDER/${test_name}.json \ $latency_args" @@ -209,6 +216,8 @@ run_throughput_tests() { continue fi + clear_torch_compile_cache + throughput_command="python3 benchmark_throughput.py \ --output-json $RESULTS_FOLDER/${test_name}.json \ $throughput_args" @@ -279,6 +288,8 @@ run_serving_tests() { continue fi + clear_torch_compile_cache + server_command="python3 \ -m vllm.entrypoints.openai.api_server \ $server_args"