Skip to content

Commit 8ff19f5

Browse files
committed
Update combine config
1 parent d72817e commit 8ff19f5

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

deep_ep/buffer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def get_combine_config(num_ranks: int) -> Config:
247247
2: Config(Buffer.num_sms, 10, 256, 6, 128),
248248
4: Config(Buffer.num_sms, 9, 256, 6, 128),
249249
8: Config(Buffer.num_sms, 4, 256, 6, 128),
250-
16: Config(Buffer.num_sms, 4, 288, 16, 128),
250+
16: Config(Buffer.num_sms, 4, 288, 12, 128),
251251
24: Config(Buffer.num_sms, 1, 288, 8, 128),
252252
32: Config(Buffer.num_sms, 1, 288, 8, 128),
253253
64: Config(Buffer.num_sms, 1, 288, 20, 128),

tests/test_internode.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ def check_data(check_x, recv_gbl_rank_prefix_sum):
209209

210210
# Tune combine performance
211211
best_time, best_results = 1e10, None
212-
for nvl_chunk_size in range(1, 13, 1):
213-
for rdma_chunk_size in range(8, 33, 4):
212+
for nvl_chunk_size in range(1, 8, 1):
213+
for rdma_chunk_size in range(12 if num_nodes == 2 else 8, 33, 2):
214214
config = deep_ep.Config(num_sms, nvl_chunk_size, nvl_buffer_size, rdma_chunk_size, rdma_buffer_size)
215215
tune_args = {'x': recv_x, 'handle': handle, 'config': config}
216216
t, notify_t = bench_kineto(lambda: buffer.combine(**tune_args), ('combine', 'notify'))

0 commit comments

Comments
 (0)