diff --git a/vllm/env_override.py b/vllm/env_override.py
index 181d000a68a7..5358568fc180 100644
--- a/vllm/env_override.py
+++ b/vllm/env_override.py
@@ -105,6 +105,14 @@ def _maybe_set_cuda_compatibility_path():
 # see https://github.com/vllm-project/vllm/issues/10619
 torch._inductor.config.compile_threads = 1
 
+# Enable Triton autotuning result caching to disk by default.
+# Without this, Triton re-runs autotuning on every process restart,
+# adding significant latency to the first inference request.
+# This writes autotuning results to TRITON_CACHE_DIR.
+# It can still be overridden by setting TRITON_CACHE_AUTOTUNING=0
+# in the environment.
+os.environ.setdefault("TRITON_CACHE_AUTOTUNING", "1")
+
 # ===================================================
 # torch 2.9 Inductor PythonWrapperCodegen monkeypatch
 # ===================================================