Skip to content

Commit 6dece18

Browse files
authored
Adding support for Hexagon User DMA Engine (#10217)
* initial hexagon user dma impl * Hexagon User DMA descriptor, instruction and register headers * Synchronous 1D DMA working * HexagonBuffer unit tests passing with memcpy * cleanup * comments and orgnanize code * format and lint * init function + other code review feedback * add ifdef hexagon around inline asm
1 parent c20cbc5 commit 6dece18

File tree

6 files changed

+799
-6
lines changed

6 files changed

+799
-6
lines changed

cmake/modules/Hexagon.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ if (NOT BUILD_FOR_HEXAGON AND NOT BUILD_FOR_ANDROID)
7676
# append select runtime sources for unit testing
7777
list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_buffer.cc)
7878
list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_common.cc)
79+
list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_user_dma.cc)
7980
return()
8081
elseif(NOT USE_HEXAGON_DEVICE STREQUAL "${PICK_SIM}" AND
8182
NOT USE_HEXAGON_DEVICE STREQUAL "${PICK_HW}")

src/runtime/hexagon/hexagon/hexagon_buffer.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ namespace tvm {
3737
namespace runtime {
3838
namespace hexagon {
3939

40+
int hexagon_user_dma_1d_sync(void* src, void* dst, uint32_t length);
41+
4042
struct Allocation {
4143
Allocation(size_t allocation_nbytes, size_t alignment)
4244
: allocation_nbytes_(allocation_nbytes), alignment_(alignment) {}
@@ -198,8 +200,10 @@ void HexagonBuffer::CopyTo(void* data, size_t nbytes) const {
198200
size_t bytes_to_copy = std::min(nbytes - copied, managed_allocations_[i]->allocation_nbytes_);
199201
if (bytes_to_copy == 0) break;
200202

201-
memcpy(static_cast<char*>(data) + copied,
202-
static_cast<const char*>(managed_allocations_[i]->data_), bytes_to_copy);
203+
void* data_plus_copied = static_cast<void*>((static_cast<char*>(data) + copied));
204+
int status =
205+
hexagon_user_dma_1d_sync(data_plus_copied, managed_allocations_[i]->data_, bytes_to_copy);
206+
CHECK_EQ(status, 0);
203207

204208
copied += bytes_to_copy;
205209
}
@@ -215,8 +219,10 @@ void HexagonBuffer::CopyFrom(void* data, size_t nbytes) {
215219
size_t bytes_to_copy = std::min(nbytes - copied, managed_allocations_[i]->allocation_nbytes_);
216220
if (bytes_to_copy == 0) break;
217221

218-
memcpy(static_cast<char*>(managed_allocations_[i]->data_),
219-
static_cast<const char*>(data) + copied, bytes_to_copy);
222+
void* data_plus_copied = static_cast<void*>((static_cast<char*>(data) + copied));
223+
int status =
224+
hexagon_user_dma_1d_sync(managed_allocations_[i]->data_, data_plus_copied, bytes_to_copy);
225+
CHECK_EQ(status, 0);
220226

221227
copied += bytes_to_copy;
222228
}
@@ -239,8 +245,9 @@ void HexagonBuffer::CopyFrom(const HexagonBuffer& other, size_t nbytes) {
239245
CHECK_LE(other.managed_allocations_[i]->allocation_nbytes_,
240246
managed_allocations_[i]->allocation_nbytes_);
241247

242-
memcpy(static_cast<char*>(managed_allocations_[i]->data_),
243-
static_cast<const char*>(other.managed_allocations_[i]->data_), bytes_to_copy);
248+
int status = hexagon_user_dma_1d_sync(managed_allocations_[i]->data_,
249+
other.managed_allocations_[i]->data_, bytes_to_copy);
250+
CHECK_EQ(status, 0);
244251

245252
copied += bytes_to_copy;
246253
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <algorithm>
21+
22+
#include "hexagon_common.h"
23+
#include "hexagon_user_dma_descriptors.h"
24+
#include "hexagon_user_dma_instructions.h"
25+
#include "hexagon_user_dma_registers.h"
26+
27+
namespace tvm {
28+
namespace runtime {
29+
namespace hexagon {
30+
31+
int init_hexagon_user_dma() {
32+
#if defined(__hexagon__)
33+
// reset DMA engine
34+
unsigned int status = dmpause() & DM0_STATUS_MASK;
35+
if (status != DM0_STATUS_IDLE) {
36+
return DMA_FAILURE;
37+
}
38+
#endif
39+
return DMA_SUCCESS;
40+
}
41+
42+
int hexagon_user_dma_1d_sync(void* dst, void* src, uint32_t length) {
43+
#if defined(__hexagon__)
44+
static int config_dma = init_hexagon_user_dma();
45+
if (config_dma != DMA_SUCCESS) {
46+
return DMA_FAILURE;
47+
}
48+
49+
uint64_t src64 = reinterpret_cast<uint64_t>(src);
50+
// source address limited to 32 bits
51+
if (src64 > DESC_SRC_MASK) {
52+
return DMA_FAILURE;
53+
}
54+
55+
uint64_t dst64 = reinterpret_cast<uint64_t>(dst);
56+
// destination address limited to 32 bits
57+
if (dst64 > DESC_DST_MASK) {
58+
return DMA_FAILURE;
59+
}
60+
61+
// length limited to 24 bits
62+
if (length > DESC_LENGTH_MASK) {
63+
return DMA_FAILURE;
64+
}
65+
66+
uint32_t src32 = src64 & DESC_SRC_MASK;
67+
uint32_t dst32 = dst64 & DESC_DST_MASK;
68+
69+
void* dma_desc = nullptr;
70+
71+
#ifdef _WIN32
72+
dma_desc = _aligned_malloc(DMA_DESC_2D_SIZE, DMA_DESC_2D_SIZE);
73+
#else
74+
int ret = posix_memalign(&dma_desc, DMA_DESC_2D_SIZE, DMA_DESC_2D_SIZE);
75+
if (ret) {
76+
return DMA_FAILURE;
77+
}
78+
#endif
79+
80+
if (!dma_desc) {
81+
return DMA_FAILURE;
82+
}
83+
84+
dma_desc_set_next(dma_desc, DMA_NULL_PTR);
85+
dma_desc_set_length(dma_desc, length);
86+
dma_desc_set_desctype(dma_desc, DESC_DESCTYPE_1D);
87+
dma_desc_set_dstcomp(dma_desc, DESC_COMP_NONE);
88+
dma_desc_set_srccomp(dma_desc, DESC_COMP_NONE);
89+
dma_desc_set_bypassdst(dma_desc, DESC_BYPASS_OFF);
90+
dma_desc_set_bypasssrc(dma_desc, DESC_BYPASS_OFF);
91+
dma_desc_set_order(dma_desc, DESC_ORDER_ORDER);
92+
dma_desc_set_dstate(dma_desc, DESC_DSTATE_INCOMPLETE);
93+
dma_desc_set_src(dma_desc, src32);
94+
dma_desc_set_dst(dma_desc, dst32);
95+
96+
dmstart(dma_desc);
97+
unsigned int status = dmwait() & DM0_STATUS_MASK;
98+
unsigned int done = dma_desc_get_dstate(dma_desc);
99+
100+
#ifdef _WIN32
101+
_aligned_free(dma_desc);
102+
#else
103+
free(dma_desc);
104+
#endif
105+
106+
if (status == DM0_STATUS_IDLE && done == DESC_DSTATE_COMPLETE) {
107+
return DMA_SUCCESS;
108+
}
109+
return DMA_FAILURE;
110+
#else
111+
memcpy(dst, src, length);
112+
return DMA_SUCCESS;
113+
#endif
114+
}
115+
116+
} // namespace hexagon
117+
} // namespace runtime
118+
} // namespace tvm

0 commit comments

Comments
 (0)