diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 06edfb0552e8..69f5bdc681fa 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -696,37 +696,64 @@ def get_moe_configs( be picked and the associated configuration chosen to invoke the kernel. """ - # First look up if an optimized configuration is available in the configs - # directory + def _check_config_file_path(path: str, + extra_info: str = "" + ) -> Optional[dict[int, Any]]: + if os.path.exists(path): + with open(path) as f: + logger.info( + "Using configuration from %s for MoE layer. %s", + path, + extra_info, + ) + return {int(key): val for key, val in json.load(f).items()} + return None + block_shape = [block_n, block_k] if block_n and block_k else None json_file_name = get_config_file_name(E, N, dtype, block_shape) - config_file_paths = [] - - # note that we prioritize user defined config + # P1 User-specified configuration (highest priority) user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER if user_defined_config_folder is not None: user_defined_config_file_path = os.path.join( user_defined_config_folder, json_file_name) - config_file_paths.append(user_defined_config_file_path) + if val := _check_config_file_path(user_defined_config_file_path): + return val + + # P2 Current Triton version configuration + triton_version = triton.__version__ + triton_version_name = f"triton_{triton_version.replace('.', '_')}" + cur_triton_file_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "triton_configs", + triton_version_name, + json_file_name, + ) + if val := _check_config_file_path(cur_triton_file_path): + return val + # P3 Legacy configuration default_config_file_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name) - config_file_paths.append(default_config_file_path) - - for config_file_path in config_file_paths: - if os.path.exists(config_file_path): - with open(config_file_path) as f: - logger.info("Using configuration from %s for MoE layer.", - config_file_path) - # If a configuration has been found, return it - return {int(key): val for key, val in json.load(f).items()} + os.path.dirname(os.path.realpath(__file__)), + "triton_configs", + "legacy_configs", + json_file_name, + ) + + if val := _check_config_file_path( + default_config_file_path, + extra_info= + "Loading config from the legacy configuration may be suboptimal, please update the corresponding config.", # noqa: E501 + ): + return val # If no optimized configuration is available, we will use the default # configuration + cur_triton_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "triton_configs", triton_version_name) logger.warning( ("Using default MoE config. Performance might be sub-optimal! " - "Config file not found at %s"), config_file_paths) + "Config file not found at %s"), cur_triton_dir) return None diff --git a/vllm/model_executor/layers/fused_moe/configs/README b/vllm/model_executor/layers/fused_moe/triton_configs/README.md similarity index 93% rename from vllm/model_executor/layers/fused_moe/configs/README rename to vllm/model_executor/layers/fused_moe/triton_configs/README.md index 85970e2d1cea..6b7729f20003 100644 --- a/vllm/model_executor/layers/fused_moe/configs/README +++ b/vllm/model_executor/layers/fused_moe/triton_configs/README.md @@ -1,8 +1,12 @@ +# Fused MoE Kernel Configurations + This directory contains tuned configurations for different settings of the fused_moe kernel. For different settings of + - E (number of experts) - N (intermediate size) - device_name (torch.cuda.get_device_name()) + the JSON file contains a mapping from M (batch size) to the chosen configuration. The example configurations provided are for the Mixtral model for TP2 on H100 diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20-3e.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H20-3e.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20-3e.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H20-3e.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=192,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=352,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=352,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=352,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=352,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20-3e.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20-3e.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20-3e.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20-3e.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=384,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=704,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=704,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=704,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=704,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=768,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=96,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=128,N=96,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_B200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=NVIDIA_B200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_B200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=NVIDIA_B200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_B200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=NVIDIA_B200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_B200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=NVIDIA_B200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=NVIDIA_H100.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1024,device_name=NVIDIA_H100.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_H20-3e.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=160,N=192,device_name=NVIDIA_H20-3e.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_H20-3e.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=160,N=192,device_name=NVIDIA_H20-3e.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=320,device_name=NVIDIA_H20-3e.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=160,N=320,device_name=NVIDIA_H20-3e.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=160,N=320,device_name=NVIDIA_H20-3e.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=160,N=320,device_name=NVIDIA_H20-3e.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=62,N=256,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=62,N=256,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=62,N=256,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=62,N=256,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=62,N=512,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=62,N=512,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=62,N=512,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=62,N=512,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1280,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=2560,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=2560,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=3072,device_name=NVIDIA_H20,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=3072,device_name=NVIDIA_H20,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=3072,device_name=NVIDIA_H20,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=3072,device_name=NVIDIA_H20,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=3072,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=3072,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=3072,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=3072,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=320,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=384,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=384,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=384,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=384,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=640,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=768,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=768,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=896,device_name=NVIDIA_H20.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=896,device_name=NVIDIA_H20.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=64,N=896,device_name=NVIDIA_H20.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=64,N=896,device_name=NVIDIA_H20.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=72,N=384,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=72,N=384,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=72,N=384,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=72,N=384,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=72,N=768,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=72,N=768,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=72,N=768,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=72,N=768,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=14336,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=1792,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=2048,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_L40S.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=3584,device_name=NVIDIA_L40S.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=4096,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H200.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=7168,device_name=NVIDIA_H200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to vllm/model_executor/layers/fused_moe/triton_configs/legacy_configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=384,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=384,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json new file mode 100644 index 000000000000..94a4794ba996 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "1536": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "3072": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=640,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=160,N=640,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=160,N=640,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=160,N=640,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=640,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=160,N=640,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=160,N=640,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=160,N=640,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=640,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=160,N=640,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=160,N=640,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=160,N=640,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=20,N=2560,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=40,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=40,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=40,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=40,N=2560,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=40,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=40,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=40,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=40,N=2560,device_name=NVIDIA_GB200,dtype=fp8_w8a8,block_shape=[128,128].json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=40,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=40,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json similarity index 100% rename from vllm/model_executor/layers/fused_moe/configs/E=40,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json rename to vllm/model_executor/layers/fused_moe/triton_configs/triton_3_4_0/E=40,N=2560,device_name=NVIDIA_H100,dtype=fp8_w8a8,block_shape=[128,128].json