From eeff988b888862add133fe1d75f00825e3d707e4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 11 Jan 2022 16:46:56 +0300 Subject: [PATCH 1/8] [SYCL] Clear extensions functions cache upon context release This is to eliminate reuse of invalid cached values after context being released. Signed-off-by: Sergey Kanaev --- sycl/plugins/opencl/pi_opencl.cpp | 150 +++++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 14 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index cb8f9502c2f20..28a74f8294e8c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -71,19 +73,105 @@ CONSTFIX char clGetDeviceFunctionPointerName[] = #undef CONSTFIX +typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)( + cl_device_id device, cl_program program, const char *FuncName, + cl_ulong *ret_ptr); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( + cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value); + +// For the time being, cache is split into multiple maps of type +// `context -> function_type'. +// There's another way. A mapping of context to collection of function pointers. +// Though, the former design allows for simultaneous access for different +// function pointer for different contexts. +template +struct ExtFuncCache { + std::map Cache; + // FIXME Use spin-lock to make lock/unlock faster and w/o context switching + std::mutex Mtx; +}; + +struct ExtFuncCacheCollection; + +namespace detail { + template + ExtFuncCache &get(::ExtFuncCacheCollection &); +} // namespace detail + +struct ExtFuncCacheCollection { + template + ExtFuncCache &get() { + return detail::get(*this); + } + +#define DEFINE_INTEL(t_pfx) \ + ExtFuncCache t_pfx ## _Cache +#define DEFINE(t_pfx) \ + ExtFuncCache t_pfx ## _Cache + + DEFINE_INTEL(clHostMemAlloc); + DEFINE_INTEL(clDeviceMemAlloc); + DEFINE_INTEL(clSharedMemAlloc); + DEFINE_INTEL(clCreateBufferWithProperties); + DEFINE_INTEL(clMemBlockingFree); + DEFINE_INTEL(clMemFree); + DEFINE_INTEL(clSetKernelArgMemPointer); + DEFINE_INTEL(clEnqueueMemset); + DEFINE_INTEL(clEnqueueMemcpy); + DEFINE_INTEL(clGetMemAllocInfo); + DEFINE(clGetDeviceFunctionPointer); + DEFINE(clSetProgramSpecializationConstant); +#undef DEFINE +#undef DEFINE_INTEL +}; + +namespace detail { +#define DEFINE_GETTER_INTEL(t_pfx) \ + template<> ExtFuncCache &get(::ExtFuncCacheCollection &C) { \ + return C.t_pfx ## _Cache; \ + } +#define DEFINE_GETTER(t_pfx) \ + template<> ExtFuncCache &get(::ExtFuncCacheCollection &C) { \ + return C.t_pfx ## _Cache; \ + } + + DEFINE_GETTER_INTEL(clHostMemAlloc) + DEFINE_GETTER_INTEL(clDeviceMemAlloc) + DEFINE_GETTER_INTEL(clSharedMemAlloc) + DEFINE_GETTER_INTEL(clCreateBufferWithProperties) + DEFINE_GETTER_INTEL(clMemBlockingFree) + DEFINE_GETTER_INTEL(clMemFree) + DEFINE_GETTER_INTEL(clSetKernelArgMemPointer) + DEFINE_GETTER_INTEL(clEnqueueMemset) + DEFINE_GETTER_INTEL(clEnqueueMemcpy) + DEFINE_GETTER_INTEL(clGetMemAllocInfo) + DEFINE_GETTER(clGetDeviceFunctionPointer) + DEFINE_GETTER(clSetProgramSpecializationConstant) +#undef DEFINE_GETTER +#undef DEFINE_GETTER_INTEL +} // namespace detail + +ExtFuncCacheCollection *ExtFuncCaches = nullptr; + // USM helper function to get an extension function pointer template static pi_result getExtFuncFromContext(pi_context context, T *fptr) { // TODO // Potentially redo caching as PI interface changes. - thread_local static std::map FuncPtrs; + ExtFuncCache &Cache = ExtFuncCaches->get(); + + std::lock_guard CacheLock{Cache.Mtx}; + + auto It = Cache.Cache.find(context); // if cached, return cached FuncPtr - if (auto F = FuncPtrs[context]) { + if (It != Cache.Cache.end()) { // if cached that extension is not available return nullptr and // PI_INVALID_VALUE - *fptr = F; - return F ? PI_SUCCESS : PI_INVALID_VALUE; + *fptr = It->second; + return It->second ? PI_SUCCESS : PI_INVALID_VALUE; } cl_uint deviceCount; @@ -117,12 +205,12 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) { if (!FuncPtr) { // Cache that the extension is not available - FuncPtrs[context] = nullptr; + Cache.Cache[context] = nullptr; return PI_INVALID_VALUE; } *fptr = FuncPtr; - FuncPtrs[context] = FuncPtr; + Cache.Cache[context] = FuncPtr; return cast(ret_err); } @@ -561,9 +649,6 @@ static bool is_in_separated_string(const std::string &str, char delimiter, return false; } -typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)( - cl_device_id device, cl_program program, const char *FuncName, - cl_ulong *ret_ptr); pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret) { @@ -1304,10 +1389,6 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, } } -typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( - cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value); - pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, @@ -1383,9 +1464,48 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result piTearDown(void *PluginParameter) { (void)PluginParameter; + delete ExtFuncCaches; + ExtFuncCaches = nullptr; return PI_SUCCESS; } +pi_result piContextRelease(pi_context Context) { +#define RELEASE_EXT_FUNCS_CACHE_INTEL(t_pfx) \ + { \ + ExtFuncCache &Cache = ExtFuncCaches->get(); \ + std::lock_guard CacheLock{Cache.Mtx}; \ + auto It = Cache.Cache.find(Context); \ + if (It != Cache.Cache.end()) \ + Cache.Cache.erase(It); \ + } +#define RELEASE_EXT_FUNCS_CACHE(t_pfx) \ + { \ + ExtFuncCache &Cache = ExtFuncCaches->get(); \ + std::lock_guard CacheLock{Cache.Mtx}; \ + auto It = Cache.Cache.find(Context); \ + if (It != Cache.Cache.end()) \ + Cache.Cache.erase(It); \ + } + + + RELEASE_EXT_FUNCS_CACHE_INTEL(clHostMemAlloc); + RELEASE_EXT_FUNCS_CACHE_INTEL(clDeviceMemAlloc); + RELEASE_EXT_FUNCS_CACHE_INTEL(clSharedMemAlloc); + RELEASE_EXT_FUNCS_CACHE_INTEL(clCreateBufferWithProperties); + RELEASE_EXT_FUNCS_CACHE_INTEL(clMemBlockingFree); + RELEASE_EXT_FUNCS_CACHE_INTEL(clMemFree); + RELEASE_EXT_FUNCS_CACHE_INTEL(clSetKernelArgMemPointer); + RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemset); + RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemcpy); + RELEASE_EXT_FUNCS_CACHE_INTEL(clGetMemAllocInfo); + RELEASE_EXT_FUNCS_CACHE(clGetDeviceFunctionPointer); + RELEASE_EXT_FUNCS_CACHE(clSetProgramSpecializationConstant); +#undef RELEASE_EXT_FUNCS_CACHE +#undef RELEASE_EXT_FUNCS_CACHE_INTEL + + return cast(clReleaseContext(cast(Context))); +} + pi_result piPluginInit(pi_plugin *PluginInit) { int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); if (CompareVersions < 0) { @@ -1397,6 +1517,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { // PI interface supports higher version or the same version. strncpy(PluginInit->PluginVersion, SupportedVersion, 4); + ExtFuncCaches = new ExtFuncCacheCollection; + #define _PI_CL(pi_api, ocl_api) \ (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); @@ -1420,7 +1542,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piContextCreate, piContextCreate) _PI_CL(piContextGetInfo, clGetContextInfo) _PI_CL(piContextRetain, clRetainContext) - _PI_CL(piContextRelease, clReleaseContext) + _PI_CL(piContextRelease, piContextRelease) _PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle) _PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle) // Queue From 6878a50851c10bed8dbe868bd6701d776965a643 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 11 Jan 2022 17:00:21 +0300 Subject: [PATCH 2/8] Stylistic change Signed-off-by: Sergey Kanaev --- sycl/plugins/opencl/pi_opencl.cpp | 85 ++++++++++++++++--------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 28a74f8294e8c..a80a80ccf0e12 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -86,8 +86,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( // There's another way. A mapping of context to collection of function pointers. // Though, the former design allows for simultaneous access for different // function pointer for different contexts. -template -struct ExtFuncCache { +template struct ExtFuncCache { std::map Cache; // FIXME Use spin-lock to make lock/unlock faster and w/o context switching std::mutex Mtx; @@ -96,8 +95,8 @@ struct ExtFuncCache { struct ExtFuncCacheCollection; namespace detail { - template - ExtFuncCache &get(::ExtFuncCacheCollection &); +template +ExtFuncCache &get(::ExtFuncCacheCollection &); } // namespace detail struct ExtFuncCacheCollection { @@ -106,10 +105,9 @@ struct ExtFuncCacheCollection { return detail::get(*this); } -#define DEFINE_INTEL(t_pfx) \ - ExtFuncCache t_pfx ## _Cache -#define DEFINE(t_pfx) \ - ExtFuncCache t_pfx ## _Cache +#define DEFINE_INTEL(t_pfx) \ + ExtFuncCache t_pfx##_Cache +#define DEFINE(t_pfx) ExtFuncCache t_pfx##_Cache DEFINE_INTEL(clHostMemAlloc); DEFINE_INTEL(clDeviceMemAlloc); @@ -128,27 +126,31 @@ struct ExtFuncCacheCollection { }; namespace detail { -#define DEFINE_GETTER_INTEL(t_pfx) \ - template<> ExtFuncCache &get(::ExtFuncCacheCollection &C) { \ - return C.t_pfx ## _Cache; \ +#define DEFINE_GETTER_INTEL(t_pfx) \ + template <> \ + ExtFuncCache \ + &get(::ExtFuncCacheCollection & C) { \ + return C.t_pfx##_Cache; \ } -#define DEFINE_GETTER(t_pfx) \ - template<> ExtFuncCache &get(::ExtFuncCacheCollection &C) { \ - return C.t_pfx ## _Cache; \ +#define DEFINE_GETTER(t_pfx) \ + template <> \ + ExtFuncCache &get( \ + ::ExtFuncCacheCollection & C) { \ + return C.t_pfx##_Cache; \ } - DEFINE_GETTER_INTEL(clHostMemAlloc) - DEFINE_GETTER_INTEL(clDeviceMemAlloc) - DEFINE_GETTER_INTEL(clSharedMemAlloc) - DEFINE_GETTER_INTEL(clCreateBufferWithProperties) - DEFINE_GETTER_INTEL(clMemBlockingFree) - DEFINE_GETTER_INTEL(clMemFree) - DEFINE_GETTER_INTEL(clSetKernelArgMemPointer) - DEFINE_GETTER_INTEL(clEnqueueMemset) - DEFINE_GETTER_INTEL(clEnqueueMemcpy) - DEFINE_GETTER_INTEL(clGetMemAllocInfo) - DEFINE_GETTER(clGetDeviceFunctionPointer) - DEFINE_GETTER(clSetProgramSpecializationConstant) +DEFINE_GETTER_INTEL(clHostMemAlloc) +DEFINE_GETTER_INTEL(clDeviceMemAlloc) +DEFINE_GETTER_INTEL(clSharedMemAlloc) +DEFINE_GETTER_INTEL(clCreateBufferWithProperties) +DEFINE_GETTER_INTEL(clMemBlockingFree) +DEFINE_GETTER_INTEL(clMemFree) +DEFINE_GETTER_INTEL(clSetKernelArgMemPointer) +DEFINE_GETTER_INTEL(clEnqueueMemset) +DEFINE_GETTER_INTEL(clEnqueueMemcpy) +DEFINE_GETTER_INTEL(clGetMemAllocInfo) +DEFINE_GETTER(clGetDeviceFunctionPointer) +DEFINE_GETTER(clSetProgramSpecializationConstant) #undef DEFINE_GETTER #undef DEFINE_GETTER_INTEL } // namespace detail @@ -1470,24 +1472,25 @@ pi_result piTearDown(void *PluginParameter) { } pi_result piContextRelease(pi_context Context) { -#define RELEASE_EXT_FUNCS_CACHE_INTEL(t_pfx) \ - { \ - ExtFuncCache &Cache = ExtFuncCaches->get(); \ - std::lock_guard CacheLock{Cache.Mtx}; \ - auto It = Cache.Cache.find(Context); \ - if (It != Cache.Cache.end()) \ - Cache.Cache.erase(It); \ +#define RELEASE_EXT_FUNCS_CACHE_INTEL(t_pfx) \ + { \ + ExtFuncCache &Cache = \ + ExtFuncCaches->get(); \ + std::lock_guard CacheLock{Cache.Mtx}; \ + auto It = Cache.Cache.find(Context); \ + if (It != Cache.Cache.end()) \ + Cache.Cache.erase(It); \ } -#define RELEASE_EXT_FUNCS_CACHE(t_pfx) \ - { \ - ExtFuncCache &Cache = ExtFuncCaches->get(); \ - std::lock_guard CacheLock{Cache.Mtx}; \ - auto It = Cache.Cache.find(Context); \ - if (It != Cache.Cache.end()) \ - Cache.Cache.erase(It); \ +#define RELEASE_EXT_FUNCS_CACHE(t_pfx) \ + { \ + ExtFuncCache &Cache = \ + ExtFuncCaches->get(); \ + std::lock_guard CacheLock{Cache.Mtx}; \ + auto It = Cache.Cache.find(Context); \ + if (It != Cache.Cache.end()) \ + Cache.Cache.erase(It); \ } - RELEASE_EXT_FUNCS_CACHE_INTEL(clHostMemAlloc); RELEASE_EXT_FUNCS_CACHE_INTEL(clDeviceMemAlloc); RELEASE_EXT_FUNCS_CACHE_INTEL(clSharedMemAlloc); From db047b88cd3ad81a3a3deb6b54bea9466eb49a2a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 12 Jan 2022 10:24:51 +0300 Subject: [PATCH 3/8] Add new symbol to ABI test Signed-off-by: Sergey Kanaev --- sycl/test/abi/pi_opencl_symbol_check.dump | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index caf4f72b48adb..2e3b45e571f50 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -8,6 +8,7 @@ # UNSUPPORTED: libcxx piContextCreate +piContextRelease piDeviceGetInfo piDevicesGet piEnqueueMemBufferMap From 65afd32fa6577b4c77773536a2e513a867c8d84e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 13 Jan 2022 17:27:45 +0300 Subject: [PATCH 4/8] Change structure of cache to reduce number of locks Signed-off-by: Sergey Kanaev --- sycl/plugins/opencl/ext_functions.inc | 20 +++ sycl/plugins/opencl/pi_opencl.cpp | 167 ++++++++++---------------- 2 files changed, 86 insertions(+), 101 deletions(-) create mode 100644 sycl/plugins/opencl/ext_functions.inc diff --git a/sycl/plugins/opencl/ext_functions.inc b/sycl/plugins/opencl/ext_functions.inc new file mode 100644 index 0000000000000..b305ff13d1f5d --- /dev/null +++ b/sycl/plugins/opencl/ext_functions.inc @@ -0,0 +1,20 @@ +#ifndef _EXT_FUNCTION_INTEL +#error Undefined _EXT_FUNCTION_INTEL macro expanstion +#endif + +#ifndef _EXT_FUNCTION +#error Undefined _EXT_FUNCTION macro expanstion +#endif + +_EXT_FUNCTION_INTEL(clHostMemAlloc) +_EXT_FUNCTION_INTEL(clDeviceMemAlloc) +_EXT_FUNCTION_INTEL(clSharedMemAlloc) +_EXT_FUNCTION_INTEL(clCreateBufferWithProperties) +_EXT_FUNCTION_INTEL(clMemBlockingFree) +_EXT_FUNCTION_INTEL(clMemFree) +_EXT_FUNCTION_INTEL(clSetKernelArgMemPointer) +_EXT_FUNCTION_INTEL(clEnqueueMemset) +_EXT_FUNCTION_INTEL(clEnqueueMemcpy) +_EXT_FUNCTION_INTEL(clGetMemAllocInfo) +_EXT_FUNCTION(clGetDeviceFunctionPointer) +_EXT_FUNCTION(clSetProgramSpecializationConstant) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index a80a80ccf0e12..9234b8154867c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -81,99 +81,90 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value); -// For the time being, cache is split into multiple maps of type -// `context -> function_type'. -// There's another way. A mapping of context to collection of function pointers. -// Though, the former design allows for simultaneous access for different -// function pointer for different contexts. -template struct ExtFuncCache { - std::map Cache; - // FIXME Use spin-lock to make lock/unlock faster and w/o context switching - std::mutex Mtx; -}; - -struct ExtFuncCacheCollection; +struct ExtFuncsPerContextT; namespace detail { -template -ExtFuncCache &get(::ExtFuncCacheCollection &); + template + std::pair get(ExtFuncsPerContextT &); } // namespace detail -struct ExtFuncCacheCollection { +struct ExtFuncsPerContextT { +#define _EXT_FUNCTION_INTEL(t_pfx) \ + t_pfx##INTEL_fn t_pfx##Func = nullptr; \ + bool t_pfx##Initialized = false; + +#define _EXT_FUNCTION(t_pfx) \ + t_pfx##_fn t_pfx##Func = nullptr; \ + bool t_pfx##Initialized = false; + +#include "ext_functions.inc" + +#undef _EXT_FUNCTION +#undef _EXT_FUNCTION_INTEL + + std::mutex Mtx; + template - ExtFuncCache &get() { + std::pair get() { return detail::get(*this); } - -#define DEFINE_INTEL(t_pfx) \ - ExtFuncCache t_pfx##_Cache -#define DEFINE(t_pfx) ExtFuncCache t_pfx##_Cache - - DEFINE_INTEL(clHostMemAlloc); - DEFINE_INTEL(clDeviceMemAlloc); - DEFINE_INTEL(clSharedMemAlloc); - DEFINE_INTEL(clCreateBufferWithProperties); - DEFINE_INTEL(clMemBlockingFree); - DEFINE_INTEL(clMemFree); - DEFINE_INTEL(clSetKernelArgMemPointer); - DEFINE_INTEL(clEnqueueMemset); - DEFINE_INTEL(clEnqueueMemcpy); - DEFINE_INTEL(clGetMemAllocInfo); - DEFINE(clGetDeviceFunctionPointer); - DEFINE(clSetProgramSpecializationConstant); -#undef DEFINE -#undef DEFINE_INTEL }; namespace detail { -#define DEFINE_GETTER_INTEL(t_pfx) \ +#define _EXT_FUNCTION_INTEL(t_pfx) \ template <> \ - ExtFuncCache \ - &get(::ExtFuncCacheCollection & C) { \ - return C.t_pfx##_Cache; \ + std::pair get( \ + ExtFuncsPerContextT &Funcs) { \ + using FPtrT = t_pfx##INTEL_fn; \ + std::pair Ret{ \ + Funcs.t_pfx##Func, Funcs.t_pfx##Initialized}; \ + return Ret; \ } -#define DEFINE_GETTER(t_pfx) \ + +#define _EXT_FUNCTION(t_pfx) \ template <> \ - ExtFuncCache &get( \ - ::ExtFuncCacheCollection & C) { \ - return C.t_pfx##_Cache; \ + std::pair get( \ + ExtFuncsPerContextT &Funcs) { \ + using FPtrT = t_pfx##_fn; \ + std::pair Ret{ \ + Funcs.t_pfx##Func, Funcs.t_pfx##Initialized}; \ + return Ret; \ } -DEFINE_GETTER_INTEL(clHostMemAlloc) -DEFINE_GETTER_INTEL(clDeviceMemAlloc) -DEFINE_GETTER_INTEL(clSharedMemAlloc) -DEFINE_GETTER_INTEL(clCreateBufferWithProperties) -DEFINE_GETTER_INTEL(clMemBlockingFree) -DEFINE_GETTER_INTEL(clMemFree) -DEFINE_GETTER_INTEL(clSetKernelArgMemPointer) -DEFINE_GETTER_INTEL(clEnqueueMemset) -DEFINE_GETTER_INTEL(clEnqueueMemcpy) -DEFINE_GETTER_INTEL(clGetMemAllocInfo) -DEFINE_GETTER(clGetDeviceFunctionPointer) -DEFINE_GETTER(clSetProgramSpecializationConstant) -#undef DEFINE_GETTER -#undef DEFINE_GETTER_INTEL +#include "ext_functions.inc" + +#undef _EXT_FUNCTION +#undef _EXT_FUNCTION_INTEL } // namespace detail -ExtFuncCacheCollection *ExtFuncCaches = nullptr; +struct ExtFuncsCachesT { + std::map Caches; + std::mutex Mtx; +}; + +ExtFuncsCachesT *ExtFuncsCaches = nullptr; // USM helper function to get an extension function pointer template static pi_result getExtFuncFromContext(pi_context context, T *fptr) { // TODO // Potentially redo caching as PI interface changes. - ExtFuncCache &Cache = ExtFuncCaches->get(); + ExtFuncsPerContextT *PerContext = nullptr; + { + std::lock_guard Lock{ExtFuncsCaches->Mtx}; - std::lock_guard CacheLock{Cache.Mtx}; + PerContext = &ExtFuncsCaches->Caches[context]; + } - auto It = Cache.Cache.find(context); + std::lock_guard Lock{PerContext->Mtx}; + std::pair FuncInitialized = PerContext->get(); // if cached, return cached FuncPtr - if (It != Cache.Cache.end()) { + if (FuncInitialized.second) { // if cached that extension is not available return nullptr and // PI_INVALID_VALUE - *fptr = It->second; - return It->second ? PI_SUCCESS : PI_INVALID_VALUE; + *fptr = FuncInitialized.first; + return *fptr ? PI_SUCCESS : PI_INVALID_VALUE; } cl_uint deviceCount; @@ -207,12 +198,14 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) { if (!FuncPtr) { // Cache that the extension is not available - Cache.Cache[context] = nullptr; + FuncInitialized.first = nullptr; + FuncInitialized.second = true; return PI_INVALID_VALUE; } + FuncInitialized.first = FuncPtr; + FuncInitialized.second = true; *fptr = FuncPtr; - Cache.Cache[context] = FuncPtr; return cast(ret_err); } @@ -1466,45 +1459,17 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result piTearDown(void *PluginParameter) { (void)PluginParameter; - delete ExtFuncCaches; - ExtFuncCaches = nullptr; + delete ExtFuncsCaches; + ExtFuncsCaches = nullptr; return PI_SUCCESS; } pi_result piContextRelease(pi_context Context) { -#define RELEASE_EXT_FUNCS_CACHE_INTEL(t_pfx) \ - { \ - ExtFuncCache &Cache = \ - ExtFuncCaches->get(); \ - std::lock_guard CacheLock{Cache.Mtx}; \ - auto It = Cache.Cache.find(Context); \ - if (It != Cache.Cache.end()) \ - Cache.Cache.erase(It); \ - } -#define RELEASE_EXT_FUNCS_CACHE(t_pfx) \ - { \ - ExtFuncCache &Cache = \ - ExtFuncCaches->get(); \ - std::lock_guard CacheLock{Cache.Mtx}; \ - auto It = Cache.Cache.find(Context); \ - if (It != Cache.Cache.end()) \ - Cache.Cache.erase(It); \ - } + { + std::lock_guard Lock{ExtFuncsCaches->Mtx}; - RELEASE_EXT_FUNCS_CACHE_INTEL(clHostMemAlloc); - RELEASE_EXT_FUNCS_CACHE_INTEL(clDeviceMemAlloc); - RELEASE_EXT_FUNCS_CACHE_INTEL(clSharedMemAlloc); - RELEASE_EXT_FUNCS_CACHE_INTEL(clCreateBufferWithProperties); - RELEASE_EXT_FUNCS_CACHE_INTEL(clMemBlockingFree); - RELEASE_EXT_FUNCS_CACHE_INTEL(clMemFree); - RELEASE_EXT_FUNCS_CACHE_INTEL(clSetKernelArgMemPointer); - RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemset); - RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemcpy); - RELEASE_EXT_FUNCS_CACHE_INTEL(clGetMemAllocInfo); - RELEASE_EXT_FUNCS_CACHE(clGetDeviceFunctionPointer); - RELEASE_EXT_FUNCS_CACHE(clSetProgramSpecializationConstant); -#undef RELEASE_EXT_FUNCS_CACHE -#undef RELEASE_EXT_FUNCS_CACHE_INTEL + ExtFuncsCaches->Caches.erase(Context); + } return cast(clReleaseContext(cast(Context))); } @@ -1520,7 +1485,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { // PI interface supports higher version or the same version. strncpy(PluginInit->PluginVersion, SupportedVersion, 4); - ExtFuncCaches = new ExtFuncCacheCollection; + ExtFuncsCaches = new ExtFuncsCachesT; #define _PI_CL(pi_api, ocl_api) \ (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); From d212fe7383116ce5b5f0c009ad409fbf0be32c5e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 13 Jan 2022 17:44:54 +0300 Subject: [PATCH 5/8] Stylistic change Signed-off-by: Sergey Kanaev --- sycl/plugins/opencl/pi_opencl.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 9234b8154867c..0d219d25ad302 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -84,8 +84,8 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( struct ExtFuncsPerContextT; namespace detail { - template - std::pair get(ExtFuncsPerContextT &); +template +std::pair get(ExtFuncsPerContextT &); } // namespace detail struct ExtFuncsPerContextT { @@ -114,20 +114,20 @@ namespace detail { #define _EXT_FUNCTION_INTEL(t_pfx) \ template <> \ std::pair get( \ - ExtFuncsPerContextT &Funcs) { \ + ExtFuncsPerContextT & Funcs) { \ using FPtrT = t_pfx##INTEL_fn; \ - std::pair Ret{ \ - Funcs.t_pfx##Func, Funcs.t_pfx##Initialized}; \ + std::pair Ret{Funcs.t_pfx##Func, \ + Funcs.t_pfx##Initialized}; \ return Ret; \ } #define _EXT_FUNCTION(t_pfx) \ template <> \ std::pair get( \ - ExtFuncsPerContextT &Funcs) { \ + ExtFuncsPerContextT & Funcs) { \ using FPtrT = t_pfx##_fn; \ - std::pair Ret{ \ - Funcs.t_pfx##Func, Funcs.t_pfx##Initialized}; \ + std::pair Ret{Funcs.t_pfx##Func, \ + Funcs.t_pfx##Initialized}; \ return Ret; \ } From a020c334a99f6c7629ed63bdef698c791f9c41bb Mon Sep 17 00:00:00 2001 From: sergei Date: Thu, 13 Jan 2022 22:55:32 +0300 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Steffen Larsen --- sycl/plugins/opencl/ext_functions.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/opencl/ext_functions.inc b/sycl/plugins/opencl/ext_functions.inc index b305ff13d1f5d..8a28048dc4424 100644 --- a/sycl/plugins/opencl/ext_functions.inc +++ b/sycl/plugins/opencl/ext_functions.inc @@ -1,9 +1,9 @@ #ifndef _EXT_FUNCTION_INTEL -#error Undefined _EXT_FUNCTION_INTEL macro expanstion +#error Undefined _EXT_FUNCTION_INTEL macro expansion #endif #ifndef _EXT_FUNCTION -#error Undefined _EXT_FUNCTION macro expanstion +#error Undefined _EXT_FUNCTION macro expansion #endif _EXT_FUNCTION_INTEL(clHostMemAlloc) From 52a6ad5efefbbd1eb81e0daa6872a804b5eaba69 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 14 Jan 2022 11:46:56 +0300 Subject: [PATCH 7/8] Address review comments Signed-off-by: Sergey Kanaev --- sycl/plugins/opencl/pi_opencl.cpp | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 0d219d25ad302..abb2c8e48393d 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -111,30 +111,24 @@ struct ExtFuncsPerContextT { }; namespace detail { -#define _EXT_FUNCTION_INTEL(t_pfx) \ - template <> \ - std::pair get( \ - ExtFuncsPerContextT & Funcs) { \ - using FPtrT = t_pfx##INTEL_fn; \ - std::pair Ret{Funcs.t_pfx##Func, \ - Funcs.t_pfx##Initialized}; \ - return Ret; \ - } -#define _EXT_FUNCTION(t_pfx) \ +#define _EXT_FUNCTION_COMMON(t_pfx, t_pfx_suff) \ template <> \ - std::pair get( \ + std::pair get( \ ExtFuncsPerContextT & Funcs) { \ - using FPtrT = t_pfx##_fn; \ + using FPtrT = t_pfx_suff##_fn; \ std::pair Ret{Funcs.t_pfx##Func, \ Funcs.t_pfx##Initialized}; \ return Ret; \ } +#define _EXT_FUNCTION_INTEL(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx##INTEL) +#define _EXT_FUNCTION(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx) #include "ext_functions.inc" #undef _EXT_FUNCTION #undef _EXT_FUNCTION_INTEL +#undef _EXT_FUNCTION_COMMON } // namespace detail struct ExtFuncsCachesT { @@ -196,15 +190,16 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) { T FuncPtr = (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); + // We're about to store the cached value. Mark this cache entry initialized. + FuncInitialized.second = true; + if (!FuncPtr) { // Cache that the extension is not available FuncInitialized.first = nullptr; - FuncInitialized.second = true; return PI_INVALID_VALUE; } FuncInitialized.first = FuncPtr; - FuncInitialized.second = true; *fptr = FuncPtr; return cast(ret_err); From 2a1b3acae73c9ffa3f6d56226ff3bf8e9aa4a596 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 17 Jan 2022 17:25:24 +0300 Subject: [PATCH 8/8] Add assertion Signed-off-by: Sergey Kanaev --- sycl/plugins/opencl/pi_opencl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index abb2c8e48393d..12764543cdf0c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -145,6 +145,7 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) { // Potentially redo caching as PI interface changes. ExtFuncsPerContextT *PerContext = nullptr; { + assert(ExtFuncsCaches); std::lock_guard Lock{ExtFuncsCaches->Mtx}; PerContext = &ExtFuncsCaches->Caches[context];