vllm-project · huydhn · Feb 17, 2025
diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -118,6 +118,11 @@ upload_to_buildkite() {
   $BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
 }
 
+clear_torch_compile_cache() {
+  # https://github.com/vllm-project/vllm/issues/13392
+  rm -rf ~/.cache/vllm/torch_compile_cache
+}
+
 run_latency_tests() {
   # run latency tests using `benchmark_latency.py`
   # $1: a json file specifying latency test cases
@@ -151,6 +156,8 @@ run_latency_tests() {
       continue
     fi
 
+    clear_torch_compile_cache
+
     latency_command="python3 benchmark_latency.py \
       --output-json $RESULTS_FOLDER/${test_name}.json \
       $latency_args"
@@ -209,6 +216,8 @@ run_throughput_tests() {
       continue
     fi
 
+    clear_torch_compile_cache
+
     throughput_command="python3 benchmark_throughput.py \
       --output-json $RESULTS_FOLDER/${test_name}.json \
       $throughput_args"
@@ -279,6 +288,8 @@ run_serving_tests() {
       continue
     fi
 
+    clear_torch_compile_cache
+
     server_command="python3 \
       -m vllm.entrypoints.openai.api_server \
       $server_args"