Skip to content

Commit 7fb50eb

Browse files
committed
[RUNTIME][OPENCL] OpenCL host pointer support to acheive zero copy
OpenCL supports device memory access to host by memory mapping. OpenCL flag "CL_MEM_ALLOC_HOST_PTR" enable this while creating a memory object. We enable this feature via compilation setting "USE_OPENCL_ENABLE_HOST_PTR" followed by a new API "GetNativePtr" on DeviceAPI followed by NDArray class. This allows application directly use hardware allocated memory while preparing the input. From user side we allocate NDArray which same size as graph input, access native memory and finally call set_input_zero_copy to set the input. Psudo code looks like auto narr = tvm::runtime::NDArray::Empty(shape, {kDLFloat, 32, 1}, {kDLOpenCL, 0}); void * nptr = narr.GetNativePtr(); ... access memory pointed by nptr up to the tensor size ... tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input_zero_copy"); set_input(i, ninput);
1 parent ad5c811 commit 7fb50eb

File tree

12 files changed

+95
-4
lines changed

12 files changed

+95
-4
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ endif()
2626
# Alernatively, use cmake -DOPTION=VALUE through command-line.
2727
tvm_option(USE_CUDA "Build with CUDA" OFF)
2828
tvm_option(USE_OPENCL "Build with OpenCL" OFF)
29+
tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF)
2930
tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest)
3031
tvm_option(USE_VULKAN "Build with Vulkan" OFF)
3132

cmake/config.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ set(USE_AOCL OFF)
7171
# - /path/to/opencl-sdk: use specific path to opencl-sdk
7272
set(USE_OPENCL OFF)
7373

74+
# Wheather to allow OPENCL cl_mem access to host
75+
# cl_mem will be allocated with CL_MEM_ALLOC_HOST_PTR
76+
# ndarray->GetHostPtr API returns the host accessible pointer
77+
set(USE_OPENCL_ENABLE_HOST_PTR OFF)
78+
7479
# Whether enable Metal runtime
7580
set(USE_METAL OFF)
7681

cmake/modules/OpenCL.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ if(USE_OPENCL)
6464
)
6565
endif()
6666
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
67+
if(USE_OPENCL_ENABLE_HOST_PTR)
68+
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
69+
endif(USE_OPENCL_ENABLE_HOST_PTR)
6770
else()
6871
list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc)
6972
endif(USE_OPENCL)

cmake/modules/contrib/CLML.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,7 @@ if(USE_CLML_GRAPH_EXECUTOR)
5454
file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
5555
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
5656
set(USE_OPENCL ON)
57-
57+
if(USE_OPENCL_ENABLE_HOST_PTR)
58+
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
59+
endif(USE_OPENCL_ENABLE_HOST_PTR)
5860
endif()

include/tvm/runtime/device_api.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ class TVM_DLL DeviceAPI {
124124
*/
125125
virtual void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
126126
Optional<String> mem_scope = NullOpt);
127+
/*!
128+
* \brief Return host accessible native pointer if underlaying device supports.
129+
* \return pointer to memory or nullptr if not supported
130+
*/
131+
virtual void* GetNativePtr(Device dev, void* ptr);
127132
/*!
128133
* \brief Free a data space on device.
129134
* \param dev The device device to perform operation.

include/tvm/runtime/ndarray.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ class NDArray : public ObjectRef {
155155
*/
156156
TVM_DLL static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev,
157157
Optional<String> mem_scope = NullOpt);
158+
/*!
159+
* \brief Return host accessible native pointer if underlaying device supports.
160+
* \return pointer to memory or nullptr if not supported
161+
*/
162+
TVM_DLL void* GetNativePtr();
158163
/*!
159164
* \brief Create a NDArray backed by an external DLTensor without memory copying.
160165
*

src/runtime/c_runtime_api.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ void* DeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDa
171171
return nullptr;
172172
}
173173

174+
void * DeviceAPI::GetNativePtr(Device dev, void *ptr) { return nullptr; }
175+
174176
void DeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
175177
// by default, we can always redirect to the flat memory copy operation.
176178
size_t nbytes = GetDataSize(*from);

src/runtime/ndarray.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,11 @@ NDArray NDArray::Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional<
205205
return ret;
206206
}
207207

208+
void* NDArray::GetNativePtr() {
209+
DLTensor dl_tensor = get_mutable()->dl_tensor;
210+
return DeviceAPI::Get(dl_tensor.device)->GetNativePtr(dl_tensor.device, dl_tensor.data);
211+
}
212+
208213
NDArray NDArray::FromExternalDLTensor(const DLTensor& dl_tensor) {
209214
ICHECK(::tvm::runtime::IsContiguous(dl_tensor)) << "External DLTensor must be contiguous.";
210215
ICHECK(IsAligned(dl_tensor)) << "Data in DLTensor is not aligned as required by NDArray";

src/runtime/opencl/opencl_common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ inline cl_channel_type DTypeToOpenCLChannelType(DLDataType data_type) {
213213
}
214214

215215
class OpenCLThreadEntry;
216+
struct BufferDescriptor;
216217

217218
/*!
218219
* \brief Process global OpenCL workspace.
@@ -291,6 +292,7 @@ class OpenCLWorkspace : public DeviceAPI {
291292
void* AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType type_hint) final;
292293
void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
293294
Optional<String> mem_scope = NullOpt) final;
295+
void* GetNativePtr(Device dev, void* ptr) final;
294296
void FreeDataSpace(Device dev, void* ptr) final;
295297
void StreamSync(Device dev, TVMStreamHandle stream) final;
296298
void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final;
@@ -310,6 +312,8 @@ class OpenCLWorkspace : public DeviceAPI {
310312
static OpenCLWorkspace* Global();
311313

312314
void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) final;
315+
316+
void* CreateHostPtrIfEnabled(BufferDescriptor *desc, Device dev, size_t size);
313317
};
314318

315319
/*! \brief Thread local workspace */
@@ -371,6 +375,7 @@ struct BufferDescriptor {
371375
static String ScopeFromMemoryLayout(MemoryLayout mem_scope);
372376

373377
cl_mem buffer{nullptr};
378+
cl_uchar *host_ptr{nullptr};
374379
MemoryLayout layout{MemoryLayout::kBuffer1D};
375380
};
376381
} // namespace cl

src/runtime/opencl/opencl_device_api.cc

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727

2828
#include "opencl_common.h"
2929

30+
#ifdef OPENCL_ENABLE_HOST_PTR
31+
#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR
32+
#else
33+
#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE
34+
#endif
35+
3036
namespace tvm {
3137
namespace runtime {
3238
namespace cl {
@@ -197,6 +203,17 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
197203
}
198204
}
199205

206+
void * OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device dev, size_t size) {
207+
#if defined(OPENCL_ENABLE_HOST_PTR)
208+
cl_int err_code;
209+
desc->host_ptr = (cl_uchar *)clEnqueueMapBuffer(this->GetQueue(dev), desc->buffer, CL_TRUE,
210+
CL_MAP_WRITE, 0, sizeof(cl_uchar) * size, 0,
211+
NULL, NULL, &err_code);
212+
OPENCL_CHECK_ERROR(err_code);
213+
#endif // OPENCL_ENABLE_HOST_PTR
214+
return desc;
215+
}
216+
200217
void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
201218
DLDataType type_hint) {
202219
this->Init();
@@ -207,10 +224,10 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
207224
if (size == 0) {
208225
size = 1;
209226
}
210-
desc->buffer = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size, nullptr, &err_code);
227+
desc->buffer = clCreateBuffer(this->context, CL_MEM_CREATE_FLAGS, size, nullptr, &err_code);
211228
desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D;
212229
OPENCL_CHECK_ERROR(err_code);
213-
return desc;
230+
return CreateHostPtrIfEnabled(desc, dev, size);
214231
}
215232

216233
void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
@@ -232,12 +249,21 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape
232249
return desc;
233250
}
234251

252+
void* OpenCLWorkspace::GetNativePtr(Device dev, void* ptr) {
253+
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
254+
return desc->host_ptr;
255+
}
256+
235257
void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
236258
// We have to make sure that the memory object is not in the command queue
237259
// for some OpenCL platforms.
238260
OPENCL_CALL(clFinish(this->GetQueue(dev)));
239261

240262
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
263+
if(desc->host_ptr) {
264+
clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, (void *) desc->host_ptr, 0, NULL,
265+
NULL);
266+
}
241267
OPENCL_CALL(clReleaseMemObject(desc->buffer));
242268
delete desc;
243269
}
@@ -251,7 +277,7 @@ cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
251277
cl_image_format format = {CL_RGBA, cl_type};
252278
cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 0, 0, 0};
253279
cl_mem mptr =
254-
clCreateImage(this->context, CL_MEM_READ_WRITE, &format, &descriptor, nullptr, &err_code);
280+
clCreateImage(this->context, CL_MEM_CREATE_FLAGS, &format, &descriptor, nullptr, &err_code);
255281
OPENCL_CHECK_ERROR(err_code);
256282
return mptr;
257283
}

0 commit comments

Comments
 (0)