Skip to content

Commit d72817e

Browse files
ZhiyiHu1999zhiyi Hu
andauthored
fix hang due to small rdma_chunk_size (#317)
Co-authored-by: zhiyi Hu <[email protected]>
1 parent 5b549c8 commit d72817e

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

csrc/kernels/internode.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1851,6 +1851,7 @@ void combine(cudaDataType_t type,
18511851
EP_HOST_ASSERT(num_forwarder_warps > NUM_MAX_NVL_PEERS and num_forwarder_warps % num_rdma_ranks == 0);
18521852
EP_HOST_ASSERT(num_max_nvl_chunked_recv_tokens % num_rdma_ranks == 0);
18531853
EP_HOST_ASSERT(num_max_nvl_chunked_recv_tokens / num_rdma_ranks > std::max(num_max_rdma_chunked_send_tokens, num_max_nvl_chunked_send_tokens));
1854+
EP_HOST_ASSERT(num_max_rdma_chunked_send_tokens >= num_warps_per_forwarder);
18541855
EP_HOST_ASSERT(type == CUDA_R_16BF);
18551856

18561857
SETUP_LAUNCH_CONFIG(num_channels * 2, (num_forwarder_warps + 1) * 32, stream);

0 commit comments

Comments
 (0)