Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"48": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"64": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"96": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"128": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"256": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 4
},
"512": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 32,
"num_warps": 4,
"num_stages": 3
},
"1024": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"1536": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"2048": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 4
},
"3072": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
},
"4096": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 3
}
}
57 changes: 42 additions & 15 deletions vllm/model_executor/layers/fused_moe/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,32 +701,59 @@ def get_moe_configs(
block_shape = [block_n, block_k] if block_n and block_k else None
json_file_name = get_config_file_name(E, N, dtype, block_shape)

config_file_paths = []
def _check_config_file_path(path: str,
extra_info: str = ""
) -> Optional[dict[int, Any]]:
if os.path.exists(path):
with open(path) as f:
logger.info(
"Using configuration from %s for MoE layer. %s",
path,
extra_info,
)
return {int(key): val for key, val in json.load(f).items()}
return None

# note that we prioritize user defined config
# P1 User-specified configuration (highest priority)
user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER
if user_defined_config_folder is not None:
user_defined_config_file_path = os.path.join(
user_defined_config_folder, json_file_name)
config_file_paths.append(user_defined_config_file_path)

if val := _check_config_file_path(user_defined_config_file_path):
return val
# P2 Current Triton version configuration
triton_version = triton.__version__
triton_version_name = f"triton_{triton_version.replace('.', '_')}"
cur_triton_file_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"configs",
triton_version_name,
json_file_name,
)
if val := _check_config_file_path(cur_triton_file_path):
return val
# P3 Legacy configuration
default_config_file_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name)
config_file_paths.append(default_config_file_path)

for config_file_path in config_file_paths:
if os.path.exists(config_file_path):
with open(config_file_path) as f:
logger.info("Using configuration from %s for MoE layer.",
config_file_path)
# If a configuration has been found, return it
return {int(key): val for key, val in json.load(f).items()}
os.path.dirname(os.path.realpath(__file__)),
"configs",
"legacy_configs",
json_file_name,
)

if val := _check_config_file_path(
default_config_file_path,
extra_info=
"Loading config from the legacy configuration may be suboptimal, please update the corresponding config.", # noqa: E501
):
return val

# If no optimized configuration is available, we will use the default
# configuration
cur_triton_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"configs", triton_version_name)
logger.warning(
("Using default MoE config. Performance might be sub-optimal! "
"Config file not found at %s"), config_file_paths)
"Config file not found at %s"), cur_triton_dir)
return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current logic for selecting a Triton-versioned configuration is too strict. It checks for an exact version match (e.g., triton_3_4_0). If a user updates to a new patch release of Triton (e.g., 3.4.1), this check will fail, and vLLM will fall back to legacy or default configurations, which could cause a silent performance regression. This is a high-severity risk for a performance-critical library.

To make this more robust, I suggest implementing a fallback mechanism to use the latest compatible configuration. The suggested code below finds the latest available configuration version that is less than or equal to the current Triton version. It also improves logging by listing all checked paths if no configuration is found.

    def _check_config_file_path(path: str,
                                extra_info: str = ''
                                ) -> Optional[dict[int, Any]]:
        if os.path.exists(path):
            with open(path) as f:
                log_msg = f'Using configuration from {path} for MoE layer.'
                if extra_info:
                    log_msg += f' {extra_info}'
                logger.info(log_msg)
                return {int(key): val for key, val in json.load(f).items()}
        return None

    paths_checked = []

    # P1 User-specified configuration (highest priority)
    user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER
    if user_defined_config_folder is not None:
        user_defined_config_file_path = os.path.join(
            user_defined_config_folder, json_file_name)
        paths_checked.append(user_defined_config_file_path)
        if val := _check_config_file_path(user_defined_config_file_path):
            return val

    # P2 Current Triton version configuration
    from packaging.version import parse as parse_version
    current_triton_version = parse_version(triton.__version__)
    configs_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'configs')

    available_versions = []
    if os.path.exists(configs_dir):
        for dirname in os.listdir(configs_dir):
            if dirname.startswith('triton_'):
                try:
                    version_str = dirname.replace('triton_', '').replace(
                        '_', '.')
                    available_versions.append(parse_version(version_str))
                except Exception:
                    # Ignore directories that don't match the version format
                    continue

    # Find the latest version that is not newer than the current triton version
    compatible_versions = sorted(
        [v for v in available_versions if v <= current_triton_version],
        reverse=True)

    for version in compatible_versions:
        version_name = f"triton_{str(version).replace('.', '_')}"
        config_path = os.path.join(configs_dir, version_name,
                                   json_file_name)
        paths_checked.append(config_path)
        if val := _check_config_file_path(config_path):
            return val

    # P3 Legacy configuration
    default_config_file_path = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        'configs',
        'legacy_configs',
        json_file_name,
    )
    paths_checked.append(default_config_file_path)
    if val := _check_config_file_path(
            default_config_file_path,
            extra_info=
            'Loading config from the legacy configuration may be suboptimal, please update the corresponding config.',  # noqa: E501
    ):
        return val

    # If no optimized configuration is available, we will use the default
    # configuration
    logger.warning(
        ('Using default MoE config. Performance might be sub-optimal! '
         'Config file not found. Paths checked: %s'), paths_checked)
    return None



Expand Down
Loading