Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions vllm/env_override.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,3 +758,51 @@ def _exec_then_patch(module):


_patch_cpp_indirect_assert_if_needed()

# ===================================================
# Triton Autotuner disable
# ===================================================
# Replace Autotuner.run so it always picks configs[0] and skips benchmarking.
# Used to eliminate autotuning variability when measuring kernel perf.
# Gated on VLLM_TRITON_FORCE_FIRST_CONFIG=1 so it is opt-in.
from vllm.triton_utils import HAS_TRITON # noqa: E402


def _disable_triton_autotuner():
if not HAS_TRITON:
return
if os.environ.get("VLLM_TRITON_FORCE_FIRST_CONFIG", "0").strip().lower() not in (
"1",
"true",
):
return
import importlib

Autotuner = importlib.import_module("triton.runtime.autotuner").Autotuner
seen_kernels: set[str] = set()

def _run_first_config(self, *args, **kwargs):
config = self.configs[0]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The code assumes self.configs is non-empty. If a kernel is defined with an empty list of configurations (which is allowed in Triton), this will raise an IndexError. The original Triton Autotuner.run implementation handles this by checking if self.configs is empty and falling back to a direct call.

Suggested change
config = self.configs[0]
if not self.configs:
return self.fn(*args, **kwargs)
config = self.configs[0]

self.best_config = config
kernel_name = getattr(self.fn, "__name__", repr(self.fn))
if kernel_name not in seen_kernels:
seen_kernels.add(kernel_name)
logger.info(
"[triton-autotune-disabled] kernel=%s configs=%d picked=%s",
kernel_name,
len(self.configs),
config,
)
if config.pre_hook is not None:
full_nargs = {
**dict(zip(self.arg_names, args)),
**kwargs,
**config.all_kwargs(),
}
config.pre_hook(full_nargs)
return self.fn.run(*args, **kwargs, **config.all_kwargs())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using self.fn.run(...) will cause an AttributeError on Triton 3.x when the autotuner wraps a Heuristics object, as Heuristics does not have a run method in newer Triton versions. It is safer and more compatible to call self.fn(...) directly, which is what the upstream Triton Autotuner.run does.

Suggested change
return self.fn.run(*args, **kwargs, **config.all_kwargs())
return self.fn(*args, **kwargs, **config.all_kwargs())


Autotuner.run = _run_first_config


_disable_triton_autotuner()
Loading