-
-
Notifications
You must be signed in to change notification settings - Fork 17.7k
[Perf] Add VLLM_TRITON_FORCE_FIRST_CONFIG to skip Triton autotuning #42425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -758,3 +758,51 @@ def _exec_then_patch(module): | |||||
|
|
||||||
|
|
||||||
| _patch_cpp_indirect_assert_if_needed() | ||||||
|
|
||||||
| # =================================================== | ||||||
| # Triton Autotuner disable | ||||||
| # =================================================== | ||||||
| # Replace Autotuner.run so it always picks configs[0] and skips benchmarking. | ||||||
| # Used to eliminate autotuning variability when measuring kernel perf. | ||||||
| # Gated on VLLM_TRITON_FORCE_FIRST_CONFIG=1 so it is opt-in. | ||||||
| from vllm.triton_utils import HAS_TRITON # noqa: E402 | ||||||
|
|
||||||
|
|
||||||
| def _disable_triton_autotuner(): | ||||||
| if not HAS_TRITON: | ||||||
| return | ||||||
| if os.environ.get("VLLM_TRITON_FORCE_FIRST_CONFIG", "0").strip().lower() not in ( | ||||||
| "1", | ||||||
| "true", | ||||||
| ): | ||||||
| return | ||||||
| import importlib | ||||||
|
|
||||||
| Autotuner = importlib.import_module("triton.runtime.autotuner").Autotuner | ||||||
| seen_kernels: set[str] = set() | ||||||
|
|
||||||
| def _run_first_config(self, *args, **kwargs): | ||||||
| config = self.configs[0] | ||||||
| self.best_config = config | ||||||
| kernel_name = getattr(self.fn, "__name__", repr(self.fn)) | ||||||
| if kernel_name not in seen_kernels: | ||||||
| seen_kernels.add(kernel_name) | ||||||
| logger.info( | ||||||
| "[triton-autotune-disabled] kernel=%s configs=%d picked=%s", | ||||||
| kernel_name, | ||||||
| len(self.configs), | ||||||
| config, | ||||||
| ) | ||||||
| if config.pre_hook is not None: | ||||||
| full_nargs = { | ||||||
| **dict(zip(self.arg_names, args)), | ||||||
| **kwargs, | ||||||
| **config.all_kwargs(), | ||||||
| } | ||||||
| config.pre_hook(full_nargs) | ||||||
| return self.fn.run(*args, **kwargs, **config.all_kwargs()) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
Suggested change
|
||||||
|
|
||||||
| Autotuner.run = _run_first_config | ||||||
|
|
||||||
|
|
||||||
| _disable_triton_autotuner() | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code assumes
self.configsis non-empty. If a kernel is defined with an empty list of configurations (which is allowed in Triton), this will raise anIndexError. The original TritonAutotuner.runimplementation handles this by checking ifself.configsis empty and falling back to a direct call.