Skip to content

Commit 94163e3

Browse files
jeejeeleexuebwang-amd
authored andcommitted
[Bugfix] Fix Qwen3-coder moe tuned config (vllm-project#24072)
Signed-off-by: Jee Jee Li <[email protected]> Signed-off-by: xuebwang-amd <[email protected]>
1 parent 7ac6695 commit 94163e3

File tree

2 files changed

+45
-41
lines changed

2 files changed

+45
-41
lines changed

benchmarks/kernels/benchmark_moe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,11 @@ def _distribute(method: str, inputs: list[Any]) -> list[Any]:
678678
is_fp16 = not (use_fp8_w8a8 or use_int8_w8a16)
679679
search_space = get_configs_compute_bound(is_fp16, block_quant_shape)
680680
print(f"Start tuning over {len(search_space)} configurations...")
681-
681+
if use_deep_gemm:
682+
raise ValueError(
683+
"Tuning with --use-deep-gemm is not supported as it only tunes Triton "
684+
"kernels. Please remove the flag."
685+
)
682686
start = time.time()
683687
configs = _distribute(
684688
"tune",

vllm/model_executor/layers/fused_moe/configs/E=20,N=2560,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,18 @@
1818
"4": {
1919
"BLOCK_SIZE_M": 16,
2020
"BLOCK_SIZE_N": 128,
21-
"BLOCK_SIZE_K": 128,
22-
"GROUP_SIZE_M": 1,
21+
"BLOCK_SIZE_K": 256,
22+
"GROUP_SIZE_M": 32,
2323
"num_warps": 4,
2424
"num_stages": 3
2525
},
2626
"8": {
2727
"BLOCK_SIZE_M": 16,
2828
"BLOCK_SIZE_N": 128,
29-
"BLOCK_SIZE_K": 256,
29+
"BLOCK_SIZE_K": 128,
3030
"GROUP_SIZE_M": 64,
3131
"num_warps": 4,
32-
"num_stages": 4
32+
"num_stages": 3
3333
},
3434
"16": {
3535
"BLOCK_SIZE_M": 16,
@@ -58,7 +58,7 @@
5858
"48": {
5959
"BLOCK_SIZE_M": 64,
6060
"BLOCK_SIZE_N": 128,
61-
"BLOCK_SIZE_K": 128,
61+
"BLOCK_SIZE_K": 256,
6262
"GROUP_SIZE_M": 64,
6363
"num_warps": 4,
6464
"num_stages": 4
@@ -74,73 +74,73 @@
7474
"96": {
7575
"BLOCK_SIZE_M": 64,
7676
"BLOCK_SIZE_N": 128,
77-
"BLOCK_SIZE_K": 128,
78-
"GROUP_SIZE_M": 16,
77+
"BLOCK_SIZE_K": 256,
78+
"GROUP_SIZE_M": 32,
7979
"num_warps": 4,
80-
"num_stages": 3
80+
"num_stages": 4
8181
},
8282
"128": {
83-
"BLOCK_SIZE_M": 128,
84-
"BLOCK_SIZE_N": 256,
83+
"BLOCK_SIZE_M": 64,
84+
"BLOCK_SIZE_N": 128,
8585
"BLOCK_SIZE_K": 128,
86-
"GROUP_SIZE_M": 1,
86+
"GROUP_SIZE_M": 64,
8787
"num_warps": 4,
88-
"num_stages": 2
88+
"num_stages": 4
8989
},
9090
"256": {
91-
"BLOCK_SIZE_M": 16,
91+
"BLOCK_SIZE_M": 64,
9292
"BLOCK_SIZE_N": 128,
9393
"BLOCK_SIZE_K": 256,
94-
"GROUP_SIZE_M": 64,
94+
"GROUP_SIZE_M": 1,
9595
"num_warps": 4,
96-
"num_stages": 3
96+
"num_stages": 4
9797
},
9898
"512": {
99-
"BLOCK_SIZE_M": 256,
100-
"BLOCK_SIZE_N": 256,
99+
"BLOCK_SIZE_M": 64,
100+
"BLOCK_SIZE_N": 128,
101101
"BLOCK_SIZE_K": 256,
102102
"GROUP_SIZE_M": 64,
103-
"num_warps": 8,
103+
"num_warps": 4,
104104
"num_stages": 4
105105
},
106106
"1024": {
107-
"BLOCK_SIZE_M": 256,
108-
"BLOCK_SIZE_N": 256,
107+
"BLOCK_SIZE_M": 64,
108+
"BLOCK_SIZE_N": 128,
109109
"BLOCK_SIZE_K": 256,
110-
"GROUP_SIZE_M": 16,
110+
"GROUP_SIZE_M": 64,
111111
"num_warps": 4,
112112
"num_stages": 4
113113
},
114114
"1536": {
115115
"BLOCK_SIZE_M": 64,
116-
"BLOCK_SIZE_N": 256,
117-
"BLOCK_SIZE_K": 128,
118-
"GROUP_SIZE_M": 1,
119-
"num_warps": 8,
120-
"num_stages": 4
121-
},
122-
"2048": {
123-
"BLOCK_SIZE_M": 32,
124116
"BLOCK_SIZE_N": 128,
125117
"BLOCK_SIZE_K": 256,
126118
"GROUP_SIZE_M": 16,
127-
"num_warps": 8,
128-
"num_stages": 5
119+
"num_warps": 4,
120+
"num_stages": 3
129121
},
130-
"3072": {
131-
"BLOCK_SIZE_M": 128,
122+
"2048": {
123+
"BLOCK_SIZE_M": 64,
132124
"BLOCK_SIZE_N": 128,
133125
"BLOCK_SIZE_K": 128,
134126
"GROUP_SIZE_M": 64,
135127
"num_warps": 4,
136-
"num_stages": 4
128+
"num_stages": 3
129+
},
130+
"3072": {
131+
"BLOCK_SIZE_M": 64,
132+
"BLOCK_SIZE_N": 128,
133+
"BLOCK_SIZE_K": 128,
134+
"GROUP_SIZE_M": 32,
135+
"num_warps": 4,
136+
"num_stages": 3
137137
},
138138
"4096": {
139-
"BLOCK_SIZE_M": 128,
140-
"BLOCK_SIZE_N": 256,
141-
"BLOCK_SIZE_K": 256,
142-
"GROUP_SIZE_M": 64,
143-
"num_warps": 8,
144-
"num_stages": 5
139+
"BLOCK_SIZE_M": 64,
140+
"BLOCK_SIZE_N": 128,
141+
"BLOCK_SIZE_K": 128,
142+
"GROUP_SIZE_M": 16,
143+
"num_warps": 4,
144+
"num_stages": 3
145145
}
146146
}

0 commit comments

Comments
 (0)