@@ -39,7 +39,7 @@ int init_hexagon_user_dma() {
3939 return DMA_SUCCESS;
4040}
4141
42- int hexagon_user_dma_1d_sync (void * dst, void * src, uint32_t length) {
42+ int hexagon_user_dma_1d_sync_helper (void * dst, void * src, uint32_t length) {
4343#if defined(__hexagon__) && __HEXAGON_ARCH__ >= 68
4444 static int config_dma = init_hexagon_user_dma ();
4545 if (config_dma != DMA_SUCCESS) {
@@ -114,6 +114,35 @@ int hexagon_user_dma_1d_sync(void* dst, void* src, uint32_t length) {
114114#endif
115115}
116116
117+ int hexagon_user_dma_1d_sync (void * dst, void * src, uint32_t length) {
118+ // One DMA transfer can copy atmost DESC_LENGTH_MASK bytes.
119+ // Make the common case quick.
120+ if (length <= DESC_LENGTH_MASK) return hexagon_user_dma_1d_sync_helper (dst, src, length);
121+
122+ // Split big transfers into smaller transfers.
123+ char * cast_src = static_cast <char *>(src);
124+ char * cast_dst = static_cast <char *>(dst);
125+ for (uint32_t i = 0 ; i < length;) {
126+ // Ensure there is no overflow while updating i
127+ uint32_t cur_len = std::min<uint32_t >(length - i, DESC_LENGTH_MASK);
128+ int ret_val = hexagon_user_dma_1d_sync_helper (&cast_dst[i], &cast_src[i], cur_len);
129+ if (ret_val != DMA_SUCCESS) return ret_val;
130+ // 2 cases for new val for i:
131+ // 1. length - i <= DESC_LENGTH_MASK (<= MAX_UINT)
132+ // new_i = i + (length - i) = length, no more iter
133+ // and no overflow (since (length - i) <= (MAX_UINT - i))
134+ // 2. length - i > DESC_LENGTH_MASK
135+ // length > (i + DESC_LENGTH_MASK)
136+ // new_i = (i + DESC_LENGTH_MASK)
137+ // length > new_i for next iter, we're done
138+ // length - i > DESC_LENGTH_MASK
139+ // and length <= MAX_UINT,
140+ // so MAX_UINT >= length > DESC_LEN_MASK + i
141+ // MAX_UINT > (DESC_LEN_MASK + i), so no overflow
142+ i += cur_len;
143+ }
144+ return DMA_SUCCESS;
145+ }
117146} // namespace hexagon
118147} // namespace runtime
119148} // namespace tvm
0 commit comments