Skip to content

Commit ba80b5e

Browse files
committed
[RUNTIME][OPENCL] OpenCL host pointer support to acheive zero copy
OpenCL supports device memory access to host by memory mapping. OpenCL flag "CL_MEM_ALLOC_HOST_PTR" enable this while creating a memory object. We enable this feature via compilation setting "USE_OPENCL_ENABLE_HOST_PTR" followed by a new API "GetNativePtr" on OpenCLWorkSpace. This allows application directly use hardware allocated memory while preparing the input. From user side we allocate NDArray which same size as graph input, access native memory and finally call set_input_zero_copy to set the input. Psudo code looks like auto narr = tvm::runtime::NDArray::Empty(shape, {kDLFloat, 32, 1}, {kDLOpenCL, 0}); OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); void *nptr = workspace->GetNativePtr(narr); ... access memory pointed by nptr up to the tensor size ... tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input_zero_copy"); set_input(i, narr);
1 parent 06be0b3 commit ba80b5e

File tree

10 files changed

+119
-4
lines changed

10 files changed

+119
-4
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ endif()
2626
# Alernatively, use cmake -DOPTION=VALUE through command-line.
2727
tvm_option(USE_CUDA "Build with CUDA" OFF)
2828
tvm_option(USE_OPENCL "Build with OpenCL" OFF)
29+
tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF)
2930
tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest)
3031
tvm_option(USE_VULKAN "Build with Vulkan" OFF)
3132

cmake/config.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ set(USE_AOCL OFF)
7171
# - /path/to/opencl-sdk: use specific path to opencl-sdk
7272
set(USE_OPENCL OFF)
7373

74+
# Wheather to allow OPENCL cl_mem access to host
75+
# cl_mem will be allocated with CL_MEM_ALLOC_HOST_PTR
76+
# OpenCLWorkspace->GetHostPtr API returns the host accessible pointer
77+
set(USE_OPENCL_ENABLE_HOST_PTR OFF)
78+
7479
# Whether enable Metal runtime
7580
set(USE_METAL OFF)
7681

cmake/modules/LibInfo.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ function(add_lib_info src_file)
8989
TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
9090
TVM_INFO_USE_NNPACK="${USE_NNPACK}"
9191
TVM_INFO_USE_OPENCL="${USE_OPENCL}"
92+
TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR="${USE_OPENCL_ENABLE_HOST_PTR}"
9293
TVM_INFO_USE_OPENCL_GTEST="${USE_OPENCL_GTEST}"
9394
TVM_INFO_USE_OPENMP="${USE_OPENMP}"
9495
TVM_INFO_USE_PAPI="${USE_PAPI}"

cmake/modules/OpenCL.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ if(USE_OPENCL)
7474
target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
7575
endif()
7676
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
77+
if(USE_OPENCL_ENABLE_HOST_PTR)
78+
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
79+
endif(USE_OPENCL_ENABLE_HOST_PTR)
7780
else()
7881
list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc)
7982
endif(USE_OPENCL)

cmake/modules/contrib/CLML.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,7 @@ if(USE_CLML_GRAPH_EXECUTOR)
5454
file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
5555
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
5656
set(USE_OPENCL ON)
57-
57+
if(USE_OPENCL_ENABLE_HOST_PTR)
58+
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
59+
endif(USE_OPENCL_ENABLE_HOST_PTR)
5860
endif()

src/runtime/opencl/opencl_common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ inline cl_channel_type DTypeToOpenCLChannelType(DLDataType data_type) {
212212
}
213213

214214
class OpenCLThreadEntry;
215+
struct BufferDescriptor;
215216

216217
/*!
217218
* \brief Process global OpenCL workspace.
@@ -290,6 +291,7 @@ class OpenCLWorkspace : public DeviceAPI {
290291
void* AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType type_hint) final;
291292
void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
292293
Optional<String> mem_scope = NullOpt) final;
294+
void* GetNativePtr(tvm::runtime::NDArray &narr);
293295
void FreeDataSpace(Device dev, void* ptr) final;
294296
void StreamSync(Device dev, TVMStreamHandle stream) final;
295297
void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final;
@@ -310,6 +312,8 @@ class OpenCLWorkspace : public DeviceAPI {
310312

311313
void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) final;
312314

315+
void* CreateHostPtrIfEnabled(BufferDescriptor *desc, Device dev, size_t size);
316+
313317
private:
314318
std::string GetError() {
315319
if (this->devices.size() == 0) return noDevicesErrorMsg;
@@ -377,6 +381,7 @@ struct BufferDescriptor {
377381
static String ScopeFromMemoryLayout(MemoryLayout mem_scope);
378382

379383
cl_mem buffer{nullptr};
384+
cl_uchar* host_ptr{nullptr};
380385
MemoryLayout layout{MemoryLayout::kBuffer1D};
381386
};
382387
} // namespace cl

src/runtime/opencl/opencl_device_api.cc

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@
2929

3030
#include "opencl_common.h"
3131

32+
#ifdef OPENCL_ENABLE_HOST_PTR
33+
#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR
34+
#else
35+
#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE
36+
#endif
37+
3238
namespace tvm {
3339
namespace runtime {
3440
namespace cl {
@@ -191,6 +197,17 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
191197
}
192198
}
193199

200+
void* OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device dev, size_t size) {
201+
#if defined(OPENCL_ENABLE_HOST_PTR)
202+
cl_int err_code;
203+
desc->host_ptr = reinterpret_cast<cl_uchar*>(
204+
clEnqueueMapBuffer(this->GetQueue(dev), desc->buffer, CL_TRUE, CL_MAP_WRITE, 0,
205+
sizeof(cl_uchar) * size, 0, NULL, NULL, &err_code));
206+
OPENCL_CHECK_ERROR(err_code);
207+
#endif // OPENCL_ENABLE_HOST_PTR
208+
return desc;
209+
}
210+
194211
void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
195212
DLDataType type_hint) {
196213
this->Init();
@@ -201,10 +218,10 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
201218
if (size == 0) {
202219
size = 1;
203220
}
204-
desc->buffer = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size, nullptr, &err_code);
221+
desc->buffer = clCreateBuffer(this->context, CL_MEM_CREATE_FLAGS, size, nullptr, &err_code);
205222
desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D;
206223
OPENCL_CHECK_ERROR(err_code);
207-
return desc;
224+
return CreateHostPtrIfEnabled(desc, dev, size);
208225
}
209226

210227
void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
@@ -226,12 +243,21 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape
226243
return desc;
227244
}
228245

246+
void* OpenCLWorkspace::GetNativePtr(tvm::runtime::NDArray &narr) {
247+
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(narr.operator->()->data);
248+
return desc->host_ptr;
249+
}
250+
229251
void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
230252
// We have to make sure that the memory object is not in the command queue
231253
// for some OpenCL platforms.
232254
OPENCL_CALL(clFinish(this->GetQueue(dev)));
233255

234256
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
257+
if (desc->host_ptr) {
258+
clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
259+
reinterpret_cast<void*>(desc->host_ptr), 0, NULL, NULL);
260+
}
235261
OPENCL_CALL(clReleaseMemObject(desc->buffer));
236262
delete desc;
237263
}
@@ -245,7 +271,7 @@ cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
245271
cl_image_format format = {CL_RGBA, cl_type};
246272
cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 0, 0, 0};
247273
cl_mem mptr =
248-
clCreateImage(this->context, CL_MEM_READ_WRITE, &format, &descriptor, nullptr, &err_code);
274+
clCreateImage(this->context, CL_MEM_CREATE_FLAGS, &format, &descriptor, nullptr, &err_code);
249275
OPENCL_CHECK_ERROR(err_code);
250276
return mptr;
251277
}

src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ using f_clEnqueueNDRangeKernel = cl_int (*)(cl_command_queue, cl_kernel, cl_uint
173173
cl_event*);
174174
using f_clCreateCommandQueue = cl_command_queue (*)(cl_context, cl_device_id,
175175
cl_command_queue_properties, cl_int*);
176+
using f_clEnqueueUnmapMemObject = cl_int (*)(cl_command_queue, cl_mem, void*, cl_uint,
177+
const cl_event*, cl_event*);
178+
using f_clEnqueueMapBuffer = void* (*)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t,
179+
size_t, cl_uint, const cl_event*, cl_event*, cl_int*);
180+
176181
} // namespace
177182

178183
cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms) {
@@ -572,3 +577,29 @@ cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device,
572577
return nullptr;
573578
}
574579
}
580+
581+
cl_int clEnqueueUnmapMemObject(cl_command_queue queue, cl_mem memobj, void* mapped_ptr,
582+
cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
583+
cl_event* event) {
584+
auto& lib = LibOpenCLWrapper::getInstance();
585+
auto func = (f_clEnqueueUnmapMemObject)lib.getOpenCLFunction("clEnqueueUnmapMemObject");
586+
if (func) {
587+
return func(queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event);
588+
} else {
589+
return CL_INVALID_PLATFORM;
590+
}
591+
}
592+
593+
void* clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map,
594+
cl_map_flags map_flags, size_t offset, size_t cb,
595+
cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
596+
cl_event* event, cl_int* errcode_ret) {
597+
auto& lib = LibOpenCLWrapper::getInstance();
598+
auto func = (f_clEnqueueMapBuffer)lib.getOpenCLFunction("clEnqueueMapBuffer");
599+
if (func) {
600+
return func(command_queue, buffer, blocking_map, map_flags, offset, cb, num_events_in_wait_list,
601+
event_wait_list, event, errcode_ret);
602+
} else {
603+
return nullptr;
604+
}
605+
}

src/support/libinfo.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@
4343
#define TVM_INFO_USE_OPENCL "NOT-FOUND"
4444
#endif
4545

46+
#ifndef TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR
47+
#define TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR "NOT-FOUND"
48+
#endif
49+
4650
#ifndef TVM_INFO_USE_OPENCL_GTEST
4751
#define TVM_INFO_USE_OPENCL_GTEST "NOT-FOUND"
4852
#endif
@@ -294,6 +298,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
294298
{"USE_MSVC_MT", TVM_INFO_USE_MSVC_MT},
295299
{"USE_NNPACK", TVM_INFO_USE_NNPACK},
296300
{"USE_OPENCL", TVM_INFO_USE_OPENCL},
301+
{"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR},
297302
{"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST},
298303
{"USE_OPENMP", TVM_INFO_USE_OPENMP},
299304
{"USE_PAPI", TVM_INFO_USE_PAPI},
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <gtest/gtest.h>
21+
#include <tvm/runtime/container/optional.h>
22+
23+
#include "../src/runtime/opencl/opencl_common.h"
24+
25+
using namespace tvm::runtime;
26+
using namespace tvm::runtime::cl;
27+
28+
#if defined(OPENCL_ENABLE_HOST_PTR)
29+
TEST(OpenCLNDArray, native_ptr) {
30+
OpenCLWorkspace* workspace = OpenCLWorkspace::Global();
31+
32+
auto A = tvm::runtime::NDArray::Empty({128, 128}, {kDLFloat, 32, 1}, {kDLOpenCL, 0});
33+
void *nptr = workspace->GetNativePtr(A);
34+
memset(nptr, 0x0, 128*128*4);
35+
}
36+
#endif

0 commit comments

Comments
 (0)