diff --git a/sycl/plugins/opencl/ext_functions.inc b/sycl/plugins/opencl/ext_functions.inc new file mode 100644 index 0000000000000..8a28048dc4424 --- /dev/null +++ b/sycl/plugins/opencl/ext_functions.inc @@ -0,0 +1,20 @@ +#ifndef _EXT_FUNCTION_INTEL +#error Undefined _EXT_FUNCTION_INTEL macro expansion +#endif + +#ifndef _EXT_FUNCTION +#error Undefined _EXT_FUNCTION macro expansion +#endif + +_EXT_FUNCTION_INTEL(clHostMemAlloc) +_EXT_FUNCTION_INTEL(clDeviceMemAlloc) +_EXT_FUNCTION_INTEL(clSharedMemAlloc) +_EXT_FUNCTION_INTEL(clCreateBufferWithProperties) +_EXT_FUNCTION_INTEL(clMemBlockingFree) +_EXT_FUNCTION_INTEL(clMemFree) +_EXT_FUNCTION_INTEL(clSetKernelArgMemPointer) +_EXT_FUNCTION_INTEL(clEnqueueMemset) +_EXT_FUNCTION_INTEL(clEnqueueMemcpy) +_EXT_FUNCTION_INTEL(clGetMemAllocInfo) +_EXT_FUNCTION(clGetDeviceFunctionPointer) +_EXT_FUNCTION(clSetProgramSpecializationConstant) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index cb8f9502c2f20..12764543cdf0c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -71,19 +73,93 @@ CONSTFIX char clGetDeviceFunctionPointerName[] = #undef CONSTFIX +typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)( + cl_device_id device, cl_program program, const char *FuncName, + cl_ulong *ret_ptr); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( + cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value); + +struct ExtFuncsPerContextT; + +namespace detail { +template +std::pair get(ExtFuncsPerContextT &); +} // namespace detail + +struct ExtFuncsPerContextT { +#define _EXT_FUNCTION_INTEL(t_pfx) \ + t_pfx##INTEL_fn t_pfx##Func = nullptr; \ + bool t_pfx##Initialized = false; + +#define _EXT_FUNCTION(t_pfx) \ + t_pfx##_fn t_pfx##Func = nullptr; \ + bool t_pfx##Initialized = false; + +#include "ext_functions.inc" + +#undef _EXT_FUNCTION +#undef _EXT_FUNCTION_INTEL + + std::mutex Mtx; + + template + std::pair get() { + return detail::get(*this); + } +}; + +namespace detail { + +#define _EXT_FUNCTION_COMMON(t_pfx, t_pfx_suff) \ + template <> \ + std::pair get( \ + ExtFuncsPerContextT & Funcs) { \ + using FPtrT = t_pfx_suff##_fn; \ + std::pair Ret{Funcs.t_pfx##Func, \ + Funcs.t_pfx##Initialized}; \ + return Ret; \ + } +#define _EXT_FUNCTION_INTEL(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx##INTEL) +#define _EXT_FUNCTION(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx) + +#include "ext_functions.inc" + +#undef _EXT_FUNCTION +#undef _EXT_FUNCTION_INTEL +#undef _EXT_FUNCTION_COMMON +} // namespace detail + +struct ExtFuncsCachesT { + std::map Caches; + std::mutex Mtx; +}; + +ExtFuncsCachesT *ExtFuncsCaches = nullptr; + // USM helper function to get an extension function pointer template static pi_result getExtFuncFromContext(pi_context context, T *fptr) { // TODO // Potentially redo caching as PI interface changes. - thread_local static std::map FuncPtrs; + ExtFuncsPerContextT *PerContext = nullptr; + { + assert(ExtFuncsCaches); + std::lock_guard Lock{ExtFuncsCaches->Mtx}; + + PerContext = &ExtFuncsCaches->Caches[context]; + } + + std::lock_guard Lock{PerContext->Mtx}; + std::pair FuncInitialized = PerContext->get(); // if cached, return cached FuncPtr - if (auto F = FuncPtrs[context]) { + if (FuncInitialized.second) { // if cached that extension is not available return nullptr and // PI_INVALID_VALUE - *fptr = F; - return F ? PI_SUCCESS : PI_INVALID_VALUE; + *fptr = FuncInitialized.first; + return *fptr ? PI_SUCCESS : PI_INVALID_VALUE; } cl_uint deviceCount; @@ -115,14 +191,17 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) { T FuncPtr = (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); + // We're about to store the cached value. Mark this cache entry initialized. + FuncInitialized.second = true; + if (!FuncPtr) { // Cache that the extension is not available - FuncPtrs[context] = nullptr; + FuncInitialized.first = nullptr; return PI_INVALID_VALUE; } + FuncInitialized.first = FuncPtr; *fptr = FuncPtr; - FuncPtrs[context] = FuncPtr; return cast(ret_err); } @@ -561,9 +640,6 @@ static bool is_in_separated_string(const std::string &str, char delimiter, return false; } -typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)( - cl_device_id device, cl_program program, const char *FuncName, - cl_ulong *ret_ptr); pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret) { @@ -1304,10 +1380,6 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, } } -typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( - cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value); - pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, @@ -1383,9 +1455,21 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result piTearDown(void *PluginParameter) { (void)PluginParameter; + delete ExtFuncsCaches; + ExtFuncsCaches = nullptr; return PI_SUCCESS; } +pi_result piContextRelease(pi_context Context) { + { + std::lock_guard Lock{ExtFuncsCaches->Mtx}; + + ExtFuncsCaches->Caches.erase(Context); + } + + return cast(clReleaseContext(cast(Context))); +} + pi_result piPluginInit(pi_plugin *PluginInit) { int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); if (CompareVersions < 0) { @@ -1397,6 +1481,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { // PI interface supports higher version or the same version. strncpy(PluginInit->PluginVersion, SupportedVersion, 4); + ExtFuncsCaches = new ExtFuncsCachesT; + #define _PI_CL(pi_api, ocl_api) \ (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); @@ -1420,7 +1506,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piContextCreate, piContextCreate) _PI_CL(piContextGetInfo, clGetContextInfo) _PI_CL(piContextRetain, clRetainContext) - _PI_CL(piContextRelease, clReleaseContext) + _PI_CL(piContextRelease, piContextRelease) _PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle) _PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle) // Queue diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index caf4f72b48adb..2e3b45e571f50 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -8,6 +8,7 @@ # UNSUPPORTED: libcxx piContextCreate +piContextRelease piDeviceGetInfo piDevicesGet piEnqueueMemBufferMap