Skip to content

Commit 70e2815

Browse files
mgoinamd-xiaoyu12
authored andcommitted
[CI Perf] Prune tests in tests/kernels/moe/ (vllm-project#22939)
Signed-off-by: mgoin <[email protected]> Signed-off-by: Xiao Yu <[email protected]>
1 parent e6a07f6 commit 70e2815

File tree

6 files changed

+46
-31
lines changed

6 files changed

+46
-31
lines changed

tests/kernels/moe/test_batched_moe.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,11 @@ def make_tensors(config: BatchedMMConfig):
8989
return BatchedMMTensors(A, B, C, num_expert_tokens)
9090

9191

92-
@pytest.mark.parametrize("num_experts", [8, 16, 32])
93-
@pytest.mark.parametrize("max_tokens_per_expert",
94-
[32, 64, 128, 192, 224, 256, 512])
95-
@pytest.mark.parametrize("K", [128, 256, 1024])
96-
@pytest.mark.parametrize("N", [128, 256, 1024])
97-
@pytest.mark.parametrize(
98-
"dtype",
99-
[torch.float8_e4m3fn, torch.float32, torch.float16, torch.bfloat16])
92+
@pytest.mark.parametrize("num_experts", [8, 32])
93+
@pytest.mark.parametrize("max_tokens_per_expert", [32, 224, 512])
94+
@pytest.mark.parametrize("K", [128, 1024])
95+
@pytest.mark.parametrize("N", [128, 1024])
96+
@pytest.mark.parametrize("dtype", [torch.float8_e4m3fn, torch.bfloat16])
10097
@pytest.mark.parametrize("block_shape", [None, [128, 128]])
10198
@pytest.mark.parametrize("per_act_token_quant", [False, True])
10299
def test_batched_mm(num_experts: int, max_tokens_per_expert: int, K: int,

tests/kernels/moe/test_count_expert_num_tokens.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,7 @@ def do_test_compute_expert_num_tokens(num_tokens: int, num_topk: int,
113113
rtol=0)
114114

115115

116-
@pytest.mark.parametrize(
117-
"num_tokens", [1, 4, 8, 11, 19, 128, 127, 405, 1024, 3333, 6666, 7317])
116+
@pytest.mark.parametrize("num_tokens", [1, 4, 8, 11, 127, 128, 3333, 7317])
118117
@pytest.mark.parametrize("num_topk", [2, 6, 8])
119118
@pytest.mark.parametrize("num_experts", [64])
120119
@pytest.mark.parametrize("ep_size", [1, 2, 4])
@@ -126,7 +125,7 @@ def test_compute_expert_num_tokens(num_tokens: int, num_topk: int,
126125
ep_size, topk_ids_dtype)
127126

128127

129-
@pytest.mark.parametrize("numel", list(range(1, 8192, 11)))
128+
@pytest.mark.parametrize("numel", list(range(1, 8192, 111)))
130129
@pytest.mark.parametrize("num_experts", [32])
131130
@pytest.mark.parametrize("ep_size", [2])
132131
@pytest.mark.parametrize("topk_ids_dtype", [torch.int64])

tests/kernels/moe/test_moe.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,24 @@
4242
EP_SIZE = [1, 4]
4343
TOP_KS = [2, 6]
4444

45+
FUSED_MOE_MNK_FACTORS = [
46+
(1, 128, 128),
47+
(1, 2048, 128),
48+
(33, 2048, 128),
49+
(222, 1024, 1024),
50+
(32768, 128, 128),
51+
(32768, 2048, 511),
52+
(40000, 1024, 1024),
53+
]
54+
55+
FUSED_MOE_WN16_MNK_FACTORS = [
56+
(1, 128, 128),
57+
(1, 1024, 1024),
58+
(32, 2048, 128),
59+
(32, 1024, 1024),
60+
(222, 2048, 1024),
61+
]
62+
4563
vllm_config = VllmConfig()
4664
vllm_config.scheduler_config.max_num_seqs = 128
4765
vllm_config.scheduler_config.max_model_len = 8192
@@ -116,13 +134,11 @@ def run_moe_test(
116134
return baseline_output
117135

118136

119-
@pytest.mark.parametrize("m", [1, 33, 64, 222, 32768, 40000])
120-
@pytest.mark.parametrize("n", [128, 1024, 2048])
121-
@pytest.mark.parametrize("k", [128, 511, 1024])
137+
@pytest.mark.parametrize("m,n,k", FUSED_MOE_MNK_FACTORS)
122138
@pytest.mark.parametrize("e", NUM_EXPERTS)
123139
@pytest.mark.parametrize("topk", TOP_KS)
124140
@pytest.mark.parametrize("ep_size", EP_SIZE)
125-
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
141+
@pytest.mark.parametrize("dtype", [torch.bfloat16])
126142
@pytest.mark.parametrize("padding", [True, False])
127143
@pytest.mark.parametrize("chunk_size", [8192])
128144
def test_fused_moe(
@@ -235,13 +251,11 @@ def m_fused_moe(
235251
use_cudagraph=use_cudagraph)
236252

237253

238-
@pytest.mark.parametrize("m", [1, 32, 222])
239-
@pytest.mark.parametrize("n", [128, 1024, 2048])
240-
@pytest.mark.parametrize("k", [128, 1024])
254+
@pytest.mark.parametrize("m,n,k", FUSED_MOE_WN16_MNK_FACTORS)
241255
@pytest.mark.parametrize("e", NUM_EXPERTS)
242256
@pytest.mark.parametrize("topk", TOP_KS)
243257
@pytest.mark.parametrize("ep_size", EP_SIZE)
244-
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
258+
@pytest.mark.parametrize("dtype", [torch.bfloat16])
245259
@pytest.mark.parametrize("group_size", [64, 128])
246260
@pytest.mark.parametrize("has_zp", [True, False])
247261
@pytest.mark.parametrize("weight_bits", [4, 8])
@@ -352,8 +366,7 @@ def test_fused_moe_wn16(m: int, n: int, k: int, e: int, topk: int,
352366
torch.testing.assert_close(triton_output, torch_output, atol=2e-2, rtol=0)
353367

354368

355-
@pytest.mark.parametrize("dtype",
356-
[torch.float32, torch.float16, torch.bfloat16])
369+
@pytest.mark.parametrize("dtype", [torch.bfloat16])
357370
@pytest.mark.parametrize("padding", [True, False])
358371
@pytest.mark.parametrize(
359372
"use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False])

tests/kernels/moe/test_moe_align_block_size.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
from vllm.platforms import current_platform
1616
from vllm.utils import round_up
1717

18-
NUM_TOKENS = [1, 3, 7, 16, 256, 2256, 4096]
19-
NUM_EXPERTS = [32, 160, 256, 257, 512]
18+
NUM_TOKENS = [1, 3, 256, 2256, 4096]
19+
NUM_EXPERTS = [32, 160, 256, 257]
2020
TOP_KS = [1, 2, 16, 32]
21-
BLOCK_SIZES = [32, 64, 128, 256]
21+
BLOCK_SIZES = [32, 128]
2222
current_platform.seed_everything(0)
2323

2424

tests/kernels/moe/test_moe_permute_unpermute.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from vllm.platforms import current_platform
1919

2020
NUM_EXPERTS = [16, 64, 256]
21-
TOP_KS = [2, 4, 6, 8]
21+
TOP_KS = [2, 6, 8]
2222
EP_SIZE = [1, 4, 16]
2323
current_platform.seed_everything(0)
2424

@@ -177,11 +177,11 @@ def torch_unpermute(permuted_hidden_states: torch.Tensor,
177177
return output
178178

179179

180-
@pytest.mark.parametrize("n_token", [1, 33, 64, 222, 1024, 2048, 3000, 5000])
181-
@pytest.mark.parametrize("n_hidden", [2048, 4096, 7168])
180+
@pytest.mark.parametrize("n_token", [1, 33, 1024, 5000])
181+
@pytest.mark.parametrize("n_hidden", [2048, 7168])
182182
@pytest.mark.parametrize("n_expert", NUM_EXPERTS)
183183
@pytest.mark.parametrize("topk", TOP_KS)
184-
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
184+
@pytest.mark.parametrize("dtype", [torch.bfloat16])
185185
@pytest.mark.parametrize("ep_size", EP_SIZE)
186186
@pytest.mark.parametrize("align_block_size", [None, 128])
187187
def test_moe_permute_unpermute(n_token: int, n_hidden: int, topk: int,

tests/kernels/moe/test_pplx_moe.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@
4444
reason="Requires PPLX kernels",
4545
)
4646

47+
BATCHED_MOE_MNK_FACTORS = [
48+
(1, 128, 128),
49+
(33, 2048, 128),
50+
(64, 128, 2048),
51+
(222, 128, 128),
52+
(222, 2048, 1024),
53+
]
54+
4755
PPLX_COMBOS = [
4856
# TODO: figure out why this fails, seems to be test problem
4957
#(1, 128, 128),
@@ -152,9 +160,7 @@ def torch_batched_moe(
152160
return torch_finalize(out, topk_weight, topk_ids)
153161

154162

155-
@pytest.mark.parametrize("m", [1, 33, 64, 222])
156-
@pytest.mark.parametrize("n", [128, 1024, 2048])
157-
@pytest.mark.parametrize("k", [128, 512, 1024])
163+
@pytest.mark.parametrize("m,n,k", BATCHED_MOE_MNK_FACTORS)
158164
@pytest.mark.parametrize("e", NUM_EXPERTS)
159165
@pytest.mark.parametrize("topk", TOP_KS)
160166
@pytest.mark.parametrize("dtype", [torch.bfloat16])

0 commit comments

Comments
 (0)