-
-
Notifications
You must be signed in to change notification settings - Fork 11.4k
[Kernel] Split moe tuned configs #24113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Signed-off-by: Jee Jee Li <[email protected]>
Signed-off-by: Jee Jee Li <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code Review
This pull request refactors the MoE kernel configuration loading by introducing versioned directories for Triton. This is a good step towards better maintainability. I've identified a potential issue with the strict version matching for Triton configurations, which could lead to silent performance regressions. I've provided a suggestion to make the version matching more robust. The rest of the changes, including moving existing configs to a legacy_configs directory, look good.
| def _check_config_file_path(path: str, | ||
| extra_info: str = "" | ||
| ) -> Optional[dict[int, Any]]: | ||
| if os.path.exists(path): | ||
| with open(path) as f: | ||
| logger.info( | ||
| "Using configuration from %s for MoE layer. %s", | ||
| path, | ||
| extra_info, | ||
| ) | ||
| return {int(key): val for key, val in json.load(f).items()} | ||
| return None | ||
|
|
||
| # note that we prioritize user defined config | ||
| # P1 User-specified configuration (highest priority) | ||
| user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER | ||
| if user_defined_config_folder is not None: | ||
| user_defined_config_file_path = os.path.join( | ||
| user_defined_config_folder, json_file_name) | ||
| config_file_paths.append(user_defined_config_file_path) | ||
|
|
||
| if val := _check_config_file_path(user_defined_config_file_path): | ||
| return val | ||
| # P2 Current Triton version configuration | ||
| triton_version = triton.__version__ | ||
| triton_version_name = f"triton_{triton_version.replace('.', '_')}" | ||
| cur_triton_file_path = os.path.join( | ||
| os.path.dirname(os.path.realpath(__file__)), | ||
| "configs", | ||
| triton_version_name, | ||
| json_file_name, | ||
| ) | ||
| if val := _check_config_file_path(cur_triton_file_path): | ||
| return val | ||
| # P3 Legacy configuration | ||
| default_config_file_path = os.path.join( | ||
| os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name) | ||
| config_file_paths.append(default_config_file_path) | ||
|
|
||
| for config_file_path in config_file_paths: | ||
| if os.path.exists(config_file_path): | ||
| with open(config_file_path) as f: | ||
| logger.info("Using configuration from %s for MoE layer.", | ||
| config_file_path) | ||
| # If a configuration has been found, return it | ||
| return {int(key): val for key, val in json.load(f).items()} | ||
| os.path.dirname(os.path.realpath(__file__)), | ||
| "configs", | ||
| "legacy_configs", | ||
| json_file_name, | ||
| ) | ||
|
|
||
| if val := _check_config_file_path( | ||
| default_config_file_path, | ||
| extra_info= | ||
| "Loading config from the legacy configuration may be suboptimal, please update the corresponding config.", # noqa: E501 | ||
| ): | ||
| return val | ||
|
|
||
| # If no optimized configuration is available, we will use the default | ||
| # configuration | ||
| cur_triton_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), | ||
| "configs", triton_version_name) | ||
| logger.warning( | ||
| ("Using default MoE config. Performance might be sub-optimal! " | ||
| "Config file not found at %s"), config_file_paths) | ||
| "Config file not found at %s"), cur_triton_dir) | ||
| return None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current logic for selecting a Triton-versioned configuration is too strict. It checks for an exact version match (e.g., triton_3_4_0). If a user updates to a new patch release of Triton (e.g., 3.4.1), this check will fail, and vLLM will fall back to legacy or default configurations, which could cause a silent performance regression. This is a high-severity risk for a performance-critical library.
To make this more robust, I suggest implementing a fallback mechanism to use the latest compatible configuration. The suggested code below finds the latest available configuration version that is less than or equal to the current Triton version. It also improves logging by listing all checked paths if no configuration is found.
def _check_config_file_path(path: str,
extra_info: str = ''
) -> Optional[dict[int, Any]]:
if os.path.exists(path):
with open(path) as f:
log_msg = f'Using configuration from {path} for MoE layer.'
if extra_info:
log_msg += f' {extra_info}'
logger.info(log_msg)
return {int(key): val for key, val in json.load(f).items()}
return None
paths_checked = []
# P1 User-specified configuration (highest priority)
user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER
if user_defined_config_folder is not None:
user_defined_config_file_path = os.path.join(
user_defined_config_folder, json_file_name)
paths_checked.append(user_defined_config_file_path)
if val := _check_config_file_path(user_defined_config_file_path):
return val
# P2 Current Triton version configuration
from packaging.version import parse as parse_version
current_triton_version = parse_version(triton.__version__)
configs_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'configs')
available_versions = []
if os.path.exists(configs_dir):
for dirname in os.listdir(configs_dir):
if dirname.startswith('triton_'):
try:
version_str = dirname.replace('triton_', '').replace(
'_', '.')
available_versions.append(parse_version(version_str))
except Exception:
# Ignore directories that don't match the version format
continue
# Find the latest version that is not newer than the current triton version
compatible_versions = sorted(
[v for v in available_versions if v <= current_triton_version],
reverse=True)
for version in compatible_versions:
version_name = f"triton_{str(version).replace('.', '_')}"
config_path = os.path.join(configs_dir, version_name,
json_file_name)
paths_checked.append(config_path)
if val := _check_config_file_path(config_path):
return val
# P3 Legacy configuration
default_config_file_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'configs',
'legacy_configs',
json_file_name,
)
paths_checked.append(default_config_file_path)
if val := _check_config_file_path(
default_config_file_path,
extra_info=
'Loading config from the legacy configuration may be suboptimal, please update the corresponding config.', # noqa: E501
):
return val
# If no optimized configuration is available, we will use the default
# configuration
logger.warning(
('Using default MoE config. Performance might be sub-optimal! '
'Config file not found. Paths checked: %s'), paths_checked)
return NoneSigned-off-by: Jee Jee Li <[email protected]>
Signed-off-by: Jee Jee Li <[email protected]>
Signed-off-by: Jee Jee Li <[email protected]>
youkaichao
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the idea looks good to me
Signed-off-by: Jee Jee Li <[email protected]>
|
This pull request has merge conflicts that must be resolved before it can be |
Purpose
Part of #24112
Test Plan
Test Result
Essential Elements of an Effective PR Description Checklist
supported_models.mdandexamplesfor a new model.