diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 389342f8f71fb..826948818719a 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -309,6 +309,7 @@ add_custom_target( sycl-toolchain DEPENDS sycl-runtime-libraries sycl-compiler sycl-ls + win_proxy_loader ${XPTIFW_LIBS} COMMENT "Building SYCL compiler toolchain..." ) @@ -341,6 +342,8 @@ add_subdirectory( plugins ) add_subdirectory(tools) +add_subdirectory(win_proxy_loader) + if(SYCL_INCLUDE_TESTS) if(NOT LLVM_INCLUDE_TESTS) message(FATAL_ERROR @@ -383,6 +386,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS sycl libsycldevice level-zero-sycl-dev + win_proxy_loader ${XPTIFW_LIBS} ${SYCL_TOOLCHAIN_DEPS} ) diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 5d1272a2724d9..3866ff954396b 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -62,6 +62,8 @@ enum TraceLevel { bool trace(TraceLevel level); #ifdef __SYCL_RT_OS_WINDOWS +// these same constants are used by win_proxy_loader.dll +// if a plugin is added here, add it there as well. #ifdef _MSC_VER #define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" @@ -150,11 +152,11 @@ __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, // Function to load the shared library // Implementation is OS dependent. -void *loadOsLibrary(const std::string &Library); +void *loadOsPluginLibrary(const std::string &Library); // Function to unload the shared library // Implementation is OS dependent (see posix-pi.cpp and windows-pi.cpp) -int unloadOsLibrary(void *Library); +int unloadOsPluginLibrary(void *Library); // OS agnostic function to unload the shared library int unloadPlugin(void *Library); diff --git a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp new file mode 100644 index 0000000000000..c473da9cd5cbb --- /dev/null +++ b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp @@ -0,0 +1,42 @@ +//==------------ common_win_pi_trace.hpp - SYCL standard header file -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// this .hpp is injected. Be sure to define __SYCL_PLUGIN_DLL_NAME before +// including +#ifdef _WIN32 +#include +BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module + DWORD fdwReason, // reason for calling function + LPVOID lpReserved) { // reserved + + bool PrintPiTrace = false; + static const char *PiTrace = std::getenv("SYCL_PI_TRACE"); + static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0; + if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces + PrintPiTrace = true; + } + + // Perform actions based on the reason for calling. + switch (fdwReason) { + case DLL_PROCESS_DETACH: + if (PrintPiTrace) + std::cout << "---> DLL_PROCESS_DETACH " << __SYCL_PLUGIN_DLL_NAME << "\n" + << std::endl; + + break; + case DLL_PROCESS_ATTACH: + if (PrintPiTrace) + std::cout << "---> DLL_PROCESS_ATTACH " << __SYCL_PLUGIN_DLL_NAME << "\n" + << std::endl; + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + } + return TRUE; +} +#endif // WIN32 diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b02462259ea9d..46b7e1b33b6d3 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5666,6 +5666,10 @@ pi_result cuda_piextEnqueueDeviceGlobalVariableRead( } // This API is called by Sycl RT to notify the end of the plugin lifetime. +// Windows: dynamically loaded plugins might have been unloaded already +// when this is called. Sycl RT holds onto the PI plugin so it can be +// called safely. But this is not transitive. If the PI plugin in turn +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result cuda_piTearDown(void *) { @@ -5862,6 +5866,12 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } +#ifdef _WIN32 +#define __SYCL_PLUGIN_DLL_NAME "pi_cuda.dll" +#include "../common_win_pi_trace/common_win_pi_trace.hpp" +#undef __SYCL_PLUGIN_DLL_NAME +#endif + } // extern "C" CUevent _pi_platform::evBase_{nullptr}; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index dfe2630e72708..c59c571d5b909 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2048,6 +2048,10 @@ pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) { return PI_SUCCESS; } +// Windows: dynamically loaded plugins might have been unloaded already +// when this is called. Sycl RT holds onto the PI plugin so it can be +// called safely. But this is not transitive. If the PI plugin in turn +// dynamically loaded a different DLL, that may have been unloaded. pi_result piTearDown(void *) { delete reinterpret_cast( PiESimdDeviceAccess->data); @@ -2102,4 +2106,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } +#ifdef _WIN32 +#define __SYCL_PLUGIN_DLL_NAME "pi_esimd_emulator.dll" +#include "../common_win_pi_trace/common_win_pi_trace.hpp" +#undef __SYCL_PLUGIN_DLL_NAME +#endif + } // extern C diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 37284eadbb81a..d7b98cf42f797 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5321,6 +5321,10 @@ pi_result hip_piextEnqueueDeviceGlobalVariableRead( } // This API is called by Sycl RT to notify the end of the plugin lifetime. +// Windows: dynamically loaded plugins might have been unloaded already +// when this is called. Sycl RT holds onto the PI plugin so it can be +// called safely. But this is not transitive. If the PI plugin in turn +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result hip_piTearDown(void *PluginParameter) { @@ -5510,6 +5514,12 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } +#ifdef _WIN32 +#define __SYCL_PLUGIN_DLL_NAME "pi_hip.dll" +#include "../common_win_pi_trace/common_win_pi_trace.hpp" +#undef __SYCL_PLUGIN_DLL_NAME +#endif + } // extern "C" hipEvent_t _pi_platform::evBase_{nullptr}; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index b8af995d3b56f..37fd0cc9a2045 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9055,6 +9055,10 @@ pi_result piextPluginGetOpaqueData(void *opaque_data_param, } // SYCL RT calls this api to notify the end of plugin lifetime. +// Windows: dynamically loaded plugins might have been unloaded already +// when this is called. Sycl RT holds onto the PI plugin so it can be +// called safely. But this is not transitive. If the PI plugin in turn +// dynamically loaded a different DLL, that may have been unloaded. // It can include all the jobs to tear down resources before // the plugin is unloaded from memory. pi_result piTearDown(void *PluginParameter) { @@ -9438,4 +9442,10 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, } return PI_SUCCESS; } + +#ifdef _WIN32 +#define __SYCL_PLUGIN_DLL_NAME "pi_level_zero.dll" +#include "../common_win_pi_trace/common_win_pi_trace.hpp" +#undef __SYCL_PLUGIN_DLL_NAME +#endif } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8c30389285c83..0b1b05e4b3cb0 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1745,6 +1745,10 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, } // This API is called by Sycl RT to notify the end of the plugin lifetime. +// Windows: dynamically loaded plugins might have been unloaded already +// when this is called. Sycl RT holds onto the PI plugin so it can be +// called safely. But this is not transitive. If the PI plugin in turn +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result piTearDown(void *PluginParameter) { @@ -1941,4 +1945,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } +#ifdef _WIN32 +#define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll" +#include "../common_win_pi_trace/common_win_pi_trace.hpp" +#undef __SYCL_PLUGIN_DLL_NAME +#endif + } // end extern 'C' diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index c9a1ba4101296..b7e02880d2684 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -53,6 +53,14 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() + # win_proxy_loader + include_directories(${LLVM_EXTERNAL_SYCL_SOURCE_DIR}/win_proxy_loader) + if(WIN_DUPE) + target_link_libraries(${LIB_NAME} PUBLIC win_proxy_loaderd) + else() + target_link_libraries(${LIB_NAME} PUBLIC win_proxy_loader) + endif() + target_compile_definitions(${LIB_OBJ_NAME} PRIVATE __SYCL_INTERNAL_API ) if (WIN32) @@ -215,11 +223,13 @@ if (MSVC) string(REGEX REPLACE "/MT" "" ${flag_var} "${${flag_var}}") endforeach() + set(WIN_DUPE "1") if (SYCL_ENABLE_XPTI_TRACING) add_sycl_rt_library(sycl${SYCL_MAJOR_VERSION}d sycld_object XPTI_LIB xptid COMPILE_OPTIONS "/MDd" SOURCES ${SYCL_SOURCES}) else() add_sycl_rt_library(sycl${SYCL_MAJOR_VERSION}d sycld_object COMPILE_OPTIONS "/MDd" SOURCES ${SYCL_SOURCES}) endif() + unset(WIN_DUPE) add_library(sycld ALIAS sycl${SYCL_MAJOR_VERSION}d) set(SYCL_EXTRA_OPTS "/MD") diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 55b9ae796a45d..071bb155bf642 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -136,7 +136,7 @@ context_impl::~context_impl() { } if (!MHostContext) { // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin().call(MContext); + getPlugin().call_nocheck(MContext); } } diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 91aac69cd30aa..6b6a241cd2a3a 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -165,6 +165,10 @@ void GlobalHandler::releaseDefaultContexts() { // finished. To avoid calls to nowhere, intentionally leak platform to device // cache. This will prevent destructors from being called, thus no PI cleanup // routines will be called in the end. + // Update: the win_proxy_loader addresses this for SYCL's own dependencies, + // but the GPU device dlls seem to manually load yet another DLL which may + // have been released when this function is called. So we still release() and + // leak until that is addressed. context destructs fine on CPU device. MPlatformToDefaultContextCache.Inst.release(); #endif } @@ -212,6 +216,18 @@ void GlobalHandler::drainThreadPool() { MHostTaskThreadPool.Inst->drain(); } +#ifdef _WIN32 + // because of something not-yet-understood on Windows + // threads may be shutdown once the end of main() is reached + // making an orderly shutdown difficult. Fortunately, Windows + // itself is very aggressive about reclaiming memory. Thus, + // we focus solely on unloading the plugins, so as to not + // accidentally retain device handles. etc +void shutdown(){ + GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); + Handler->unloadPlugins(); +} +#else void shutdown() { const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); @@ -246,18 +262,36 @@ void shutdown() { delete Handler; Handler = nullptr; } +#endif #ifdef _WIN32 extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { + bool PrintPiTrace = false; + static const char *PiTrace = std::getenv("SYCL_PI_TRACE"); + static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0; + if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces + PrintPiTrace = true; + } + // Perform actions based on the reason for calling. switch (fdwReason) { case DLL_PROCESS_DETACH: - if (!lpReserved) - shutdown(); + if (PrintPiTrace) + std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; + +#ifdef XPTI_ENABLE_INSTRUMENTATION + if (xptiTraceEnabled()) + return TRUE; // When doing xpti tracing, we can't safely call shutdown. + // TODO: figure out what XPTI is doing that prevents release. +#endif + + shutdown(); break; case DLL_PROCESS_ATTACH: + if (PrintPiTrace) + std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: break; diff --git a/sycl/source/detail/online_compiler/online_compiler.cpp b/sycl/source/detail/online_compiler/online_compiler.cpp index 8980c30e531e8..d6e4a80791a21 100644 --- a/sycl/source/detail/online_compiler/online_compiler.cpp +++ b/sycl/source/detail/online_compiler/online_compiler.cpp @@ -94,7 +94,7 @@ compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceType, #else static const std::string OclocLibraryName = "libocloc.so"; #endif - void *OclocLibrary = sycl::detail::pi::loadOsLibrary(OclocLibraryName); + void *OclocLibrary = sycl::detail::pi::loadOsPluginLibrary(OclocLibraryName); if (!OclocLibrary) throw online_compile_error("Cannot load ocloc library: " + OclocLibraryName); diff --git a/sycl/source/detail/os_util.cpp b/sycl/source/detail/os_util.cpp index 4d710d8bde8be..0185a990444a1 100644 --- a/sycl/source/detail/os_util.cpp +++ b/sycl/source/detail/os_util.cpp @@ -194,6 +194,8 @@ OSModuleHandle OSUtil::getOSModuleHandle(const void *VirtAddr) { } /// Returns an absolute path where the object was found. +// win_proxy_loader.dll uses this same logic. If it is changed +// significantly, it might be wise to change it there too. std::string OSUtil::getCurrentDSODir() { char Path[MAX_PATH]; Path[0] = '\0'; diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 0a35919224f1c..7bb0a49887b6c 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -364,12 +364,12 @@ std::vector> findPlugins() { // Load the Plugin by calling the OS dependent library loading call. // Return the handle to the Library. void *loadPlugin(const std::string &PluginPath) { - return loadOsLibrary(PluginPath); + return loadOsPluginLibrary(PluginPath); } // Unload the given plugin by calling teh OS-specific library unloading call. // \param Library OS-specific library handle created when loading. -int unloadPlugin(void *Library) { return unloadOsLibrary(Library); } +int unloadPlugin(void *Library) { return unloadOsPluginLibrary(Library); } // Binds all the PI Interface APIs to Plugin Library Function Addresses. // TODO: Remove the 'OclPtr' extension to PI_API. diff --git a/sycl/source/detail/posix_pi.cpp b/sycl/source/detail/posix_pi.cpp index 92c13edc9791d..c5e7a30f25c1c 100644 --- a/sycl/source/detail/posix_pi.cpp +++ b/sycl/source/detail/posix_pi.cpp @@ -16,7 +16,7 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail::pi { -void *loadOsLibrary(const std::string &PluginPath) { +void *loadOsPluginLibrary(const std::string &PluginPath) { // TODO: Check if the option RTLD_NOW is correct. Explore using // RTLD_DEEPBIND option when there are multiple plugins. void *so = dlopen(PluginPath.c_str(), RTLD_NOW); @@ -28,7 +28,7 @@ void *loadOsLibrary(const std::string &PluginPath) { return so; } -int unloadOsLibrary(void *Library) { +int unloadOsPluginLibrary(void *Library) { // The mock plugin does not have an associated library, so we allow nullptr // here to avoid it trying to free a non-existent library. if (!Library) diff --git a/sycl/source/detail/windows_pi.cpp b/sycl/source/detail/windows_pi.cpp index 034a5d49033de..a83d0006ab6dd 100644 --- a/sycl/source/detail/windows_pi.cpp +++ b/sycl/source/detail/windows_pi.cpp @@ -13,34 +13,22 @@ #include #include +#include "win_proxy_loader.hpp" + namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { namespace pi { -void *loadOsLibrary(const std::string &PluginPath) { - // Tells the system to not display the critical-error-handler message box. - // Instead, the system sends the error to the calling process. - // This is crucial for graceful handling of plugins that couldn't be - // loaded, e.g. due to missing native run-times. - // TODO: add reporting in case of an error. - // NOTE: we restore the old mode to not affect user app behavior. - // - UINT SavedMode = SetErrorMode(SEM_FAILCRITICALERRORS); - // Exclude current directory from DLL search path - if (!SetDllDirectoryA("")) { - assert(false && "Failed to update DLL search path"); - } - auto Result = (void *)LoadLibraryA(PluginPath.c_str()); - (void)SetErrorMode(SavedMode); - if (!SetDllDirectoryA(nullptr)) { - assert(false && "Failed to restore DLL search path"); - } +void *loadOsPluginLibrary(const std::string &PluginPath) { + // We fetch the preloaded plugin from the win_proxy_loader. + // The proxy_loader handles any required error suppression. + auto Result = getPreloadedPlugin(PluginPath); return Result; } -int unloadOsLibrary(void *Library) { +int unloadOsPluginLibrary(void *Library) { // The mock plugin does not have an associated library, so we allow nullptr // here to avoid it trying to free a non-existent library. if (!Library) diff --git a/sycl/win_proxy_loader/CMakeLists.txt b/sycl/win_proxy_loader/CMakeLists.txt new file mode 100644 index 0000000000000..477ee40b1b733 --- /dev/null +++ b/sycl/win_proxy_loader/CMakeLists.txt @@ -0,0 +1,57 @@ +project(win_proxy_loader) +add_library(win_proxy_loader SHARED win_proxy_loader.cpp) +if (WIN32) + install(TARGETS win_proxy_loader + RUNTIME DESTINATION "bin" COMPONENT win_proxy_loader + ) +endif() + +if (MSVC) + # MSVC provides two incompatible build variants for its CRT: release and debug + # To avoid potential issues in user code we also need to provide two kinds + # of SYCL Runtime Library for release and debug configurations. + set(WINUNLOAD_CXX_FLAGS "") + if (CMAKE_BUILD_TYPE MATCHES "Debug") + set(WINUNLOAD_CXX_FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") + string(REPLACE "/MDd" "" WINUNLOAD_CXX_FLAGS "${WINUNLOAD_CXX_FLAGS}") + string(REPLACE "/MTd" "" WINUNLOAD_CXX_FLAGS "${WINUNLOAD_CXX_FLAGS}") + else() + if (CMAKE_BUILD_TYPE MATCHES "Release") + set(WINUNLOAD_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELEASE}") + elseif (CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo") + set(WINUNLOAD_CXX_FLAGS "${CMAKE_CXX_FLAGS_MINSIZEREL}") + elseif (CMAKE_BUILD_TYPE MATCHES "MinSizeRel") + set(WINUNLOAD_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") + endif() + string(REPLACE "/MD" "" WINUNLOAD_CXX_FLAGS "${WINUNLOAD_CXX_FLAGS}") + string(REPLACE "/MT" "" WINUNLOAD_CXX_FLAGS "${WINUNLOAD_CXX_FLAGS}") + endif() + + # target_compile_options requires list of options, not a string + string(REPLACE " " ";" WINUNLOAD_CXX_FLAGS "${WINUNLOAD_CXX_FLAGS}") + + set(WINUNLOAD_CXX_FLAGS_RELEASE "${WINUNLOAD_CXX_FLAGS};/MD") + set(WINUNLOAD_CXX_FLAGS_DEBUG "${WINUNLOAD_CXX_FLAGS};/MDd") + + # CMake automatically applies these flags to all targets. To override this + # behavior, options lists are reset. + set(CMAKE_CXX_FLAGS_RELEASE "") + set(CMAKE_CXX_FLAGS_MINSIZEREL "") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "") + set(CMAKE_CXX_FLAGS_DEBUG "") + +# Handle the debug version for the Microsoft compiler as a special case by +# creating a debug version of the static library that uses the flags used by +# the SYCL runtime + add_library(win_proxy_loaderd SHARED win_proxy_loader.cpp) + target_compile_options(win_proxy_loaderd PRIVATE ${WINUNLOAD_CXX_FLAGS_DEBUG}) + target_compile_options(win_proxy_loader PRIVATE ${WINUNLOAD_CXX_FLAGS_RELEASE}) + target_link_libraries(win_proxy_loaderd PRIVATE shlwapi) + target_link_libraries(win_proxy_loader PRIVATE shlwapi) + if (WIN32) + install(TARGETS win_proxy_loaderd + RUNTIME DESTINATION "bin" COMPONENT win_proxy_loader) + endif() +endif() + + diff --git a/sycl/win_proxy_loader/win_proxy_loader.cpp b/sycl/win_proxy_loader/win_proxy_loader.cpp new file mode 100644 index 0000000000000..5264a05e67209 --- /dev/null +++ b/sycl/win_proxy_loader/win_proxy_loader.cpp @@ -0,0 +1,229 @@ +//==------------ win_proxy_loader.cpp - SYCL standard source file ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// On Windows, DLLs loaded dynamically (via LoadLibrary) are not tracked as +// dependencies of the caller in the same way they would be if linked +// statically. +// This can lead to unloading problems, where after main() finishes the OS will +// unload those DLLs from memory, possibly before the caller is done. +// (static var destruction or DllMain() can both occur after) +// The workaround is this proxy_loader. It is statically linked by the SYCL +// library and thus is a real dependency and is not unloaded from memory until +// after SYCL itself is unloaded. It calls LoadLibrary on all the PI Plugins +// that SYCL will use during its initialization, which ensures that those plugin +// DLLs are not unloaded until after. +// Note that this property is not transitive. If any of the PI DLLs in turn +// dynamically load some other DLL during their lifecycle there is no guarantee +// that the "grandchild" won't be unloaded early. They would need to employ a +// similar approach. + + +#include + +#ifdef _WIN32 + +#include +#include +#include +#include + +#endif + +#include +#include +#include + +#include "win_proxy_loader.hpp" + +#ifdef _WIN32 + +// ------------------------------------ + +static constexpr const char *DirSep = "\\"; +using OSModuleHandle = intptr_t; +/// Module handle for the executable module - it is assumed there is always +/// single one at most. +static constexpr OSModuleHandle ExeModuleHandle = -1; + +// cribbed from sycl/source/detail/os_util.cpp +std::string getDirName(const char *Path) { + std::string Tmp(Path); + // Remove trailing directory separators + Tmp.erase(Tmp.find_last_not_of("/\\") + 1, std::string::npos); + + size_t pos = Tmp.find_last_of("/\\"); + if (pos != std::string::npos) + return Tmp.substr(0, pos); + + // If no directory separator is present return initial path like dirname does + return Tmp; +} + +// cribbed from sycl/source/detail/os_util.cpp +OSModuleHandle getOSModuleHandle(const void *VirtAddr) { + HMODULE PhModule; + DWORD Flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT; + auto LpModuleAddr = reinterpret_cast(VirtAddr); + if (!GetModuleHandleExA(Flag, LpModuleAddr, &PhModule)) { + // Expect the caller to check for zero and take + // necessary action + return 0; + } + if (PhModule == GetModuleHandleA(nullptr)) + return ExeModuleHandle; + return reinterpret_cast(PhModule); +} + +// cribbed from sycl/source/detail/os_util.cpp +/// Returns an absolute path where the object was found. +std::string getCurrentDSODir() { + char Path[MAX_PATH]; + Path[0] = '\0'; + Path[sizeof(Path) - 1] = '\0'; + auto Handle = getOSModuleHandle(reinterpret_cast(&getCurrentDSODir)); + DWORD Ret = GetModuleFileNameA( + reinterpret_cast(ExeModuleHandle == Handle ? 0 : Handle), + reinterpret_cast(&Path), sizeof(Path)); + assert(Ret < sizeof(Path) && "Path is longer than PATH_MAX?"); + assert(Ret > 0 && "GetModuleFileNameA failed"); + (void)Ret; + + BOOL RetCode = PathRemoveFileSpecA(reinterpret_cast(&Path)); + assert(RetCode && "PathRemoveFileSpecA failed"); + (void)RetCode; + + return Path; +} + +// these are cribbed from include/sycl/detail/pi.hpp +// a new plugin must be added to both places. +#ifdef _MSC_VER +#define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" +#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" +#define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" +#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "pi_esimd_emulator.dll" +#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" +#define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "pi_unified_runtime.dll" +#else // llvm-mingw +#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" +#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" +#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" +#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dll" +#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" +#define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "libpi_unified_runtime.dll" +#endif + +// ------------------------------------ + +using MapT = std::map; + +MapT& getDllMap() { + static MapT dllMap; + return dllMap; +} + +/// Load the plugin libraries and store them in a map. +void preloadLibraries() { + // Suppress system errors. + // Tells the system to not display the critical-error-handler message box. + // Instead, the system sends the error to the calling process. + // This is crucial for graceful handling of plugins that couldn't be + // loaded, e.g. due to missing native run-times. + // Sometimes affects L0 or the unified runtime. + // TODO: add reporting in case of an error. + // NOTE: we restore the old mode to not affect user app behavior. + // + UINT SavedMode = SetErrorMode(SEM_FAILCRITICALERRORS); + // Exclude current directory from DLL search path + if (!SetDllDirectoryA("")) { + assert(false && "Failed to update DLL search path"); + } + + // this path duplicates sycl/detail/pi.cpp:initializePlugins + const std::string LibSYCLDir = getCurrentDSODir() + DirSep; + + MapT& dllMap = getDllMap(); + + std::string ocl_path = LibSYCLDir + __SYCL_OPENCL_PLUGIN_NAME; + dllMap.emplace(ocl_path, LoadLibraryA(ocl_path.c_str())); + + std::string l0_path = LibSYCLDir + __SYCL_LEVEL_ZERO_PLUGIN_NAME; + dllMap.emplace(l0_path, LoadLibraryA(l0_path.c_str())); + + std::string cuda_path = LibSYCLDir + __SYCL_CUDA_PLUGIN_NAME; + dllMap.emplace(cuda_path, LoadLibraryA(cuda_path.c_str())); + + std::string esimd_path = LibSYCLDir + __SYCL_ESIMD_EMULATOR_PLUGIN_NAME; + dllMap.emplace(esimd_path, LoadLibraryA(esimd_path.c_str())); + + std::string hip_path = LibSYCLDir + __SYCL_HIP_PLUGIN_NAME; + dllMap.emplace(hip_path, LoadLibraryA(hip_path.c_str())); + + std::string ur_path = LibSYCLDir + __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME; + dllMap.emplace(ur_path, LoadLibraryA(ur_path.c_str())); + + // Restore system error handling. + (void)SetErrorMode(SavedMode); + if (!SetDllDirectoryA(nullptr)) { + assert(false && "Failed to restore DLL search path"); + } +} + +/// windows_pi.cpp:loadOsPluginLibrary() calls this to get the DLL loaded earlier. +__declspec(dllexport) void *getPreloadedPlugin(const std::string &PluginPath) { + + MapT& dllMap = getDllMap(); + + auto match = dllMap.find( + PluginPath); // result might be nullptr (not found), which is perfectly valid. + if (match == dllMap.end()) { + // unit testing? return nullptr (not found) rather than risk asserting below + if (PluginPath.find("unittests") != std::string::npos) + return nullptr; + + // Otherwise, asking for something we don't know about at all, is an issue. + std::cout << "unknown plugin: " << PluginPath << std::endl; + assert(false && "getPreloadedPlugin was given an unknown plugin path."); + return nullptr; + } + return match->second; +} + +BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module + DWORD fdwReason, // reason for calling function + LPVOID lpReserved) // reserved +{ + bool PrintPiTrace = false; + static const char *PiTrace = std::getenv("SYCL_PI_TRACE"); + static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0; + if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces + PrintPiTrace = true; + } + + switch (fdwReason) { + case DLL_PROCESS_ATTACH: + if (PrintPiTrace) + std::cout << "---> DLL_PROCESS_ATTACH win_proxy_loader.dll\n" + << std::endl; + + preloadLibraries(); + break; + case DLL_PROCESS_DETACH: + if (PrintPiTrace) + std::cout << "---> DLL_PROCESS_DETACH win_proxy_loader.dll\n" + << std::endl; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + } + return TRUE; +} + +#endif // WIN32 diff --git a/sycl/win_proxy_loader/win_proxy_loader.hpp b/sycl/win_proxy_loader/win_proxy_loader.hpp new file mode 100644 index 0000000000000..5f40753b495b7 --- /dev/null +++ b/sycl/win_proxy_loader/win_proxy_loader.hpp @@ -0,0 +1,15 @@ +//==------------ win_proxy_loader.hpp - SYCL standard header file ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#ifdef _WIN32 +#include + +__declspec(dllexport) void *getPreloadedPlugin(const std::string &PluginPath); +#endif