Respond to comments

oneapi-src · Jan 10, 2024 · 0c643e9 · 0c643e9
1 parent ceee123
commit 0c643e9
Show file tree

Hide file tree

Showing 9 changed files with 54 additions and 23 deletions.
diff --git a/src/gpu/amd/sycl_hip_compat.cpp b/src/gpu/amd/sycl_hip_compat.cpp
@@ -1,3 +1,19 @@
+/*******************************************************************************
+* Copyright 2019-2024 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
 #include "sycl_hip_compat.hpp"
 
 namespace dnnl {

diff --git a/src/gpu/amd/sycl_hip_compat.hpp b/src/gpu/amd/sycl_hip_compat.hpp
@@ -54,6 +54,9 @@ native_object_t get_native(const sycl_object_t &sycl_object) {
     return reinterpret_cast<native_object_t>(handle);
 }
 
+template <>
+HIPcontext get_native(const ::sycl::device &device);
+
 } // namespace compat
 } // namespace amd
 } // namespace gpu

diff --git a/src/gpu/amd/sycl_hip_scoped_context.cpp b/src/gpu/amd/sycl_hip_scoped_context.cpp
@@ -32,13 +32,12 @@ hip_sycl_scoped_context_handler_t::hip_sycl_scoped_context_handler_t(
         const sycl_hip_engine_t &engine)
     : need_to_recover_(false) {
     try {
-        auto desired = engine.get_underlying_context();
         HIP_EXECUTE_FUNC(hipCtxGetCurrent, &original_);
+        auto desired
+                = engine.get_underlying_context(); // Getting the context also makes it active
+        currentDevice = engine.get_underlying_device();
 
         if (original_ != desired) {
-
-            HIP_EXECUTE_FUNC(hipCtxSetCurrent, desired);
-
             need_to_recover_
                     = !(original_ == nullptr && engine.has_primary_context());
         }
@@ -51,7 +50,10 @@ hip_sycl_scoped_context_handler_t::
         ~hip_sycl_scoped_context_handler_t() noexcept(false) {
 
     try {
-        if (need_to_recover_) { HIP_EXECUTE_FUNC(hipCtxSetCurrent, original_); }
+        if (need_to_recover_) {
+            HIP_EXECUTE_FUNC(hipDevicePrimaryCtxRelease, currentDevice);
+            HIP_EXECUTE_FUNC(hipCtxSetCurrent, original_);
+        }
     } catch (const std::runtime_error &e) {
         error::wrap_c_api(status::runtime_error, e.what());
     }

diff --git a/src/gpu/amd/sycl_hip_scoped_context.hpp b/src/gpu/amd/sycl_hip_scoped_context.hpp
@@ -31,6 +31,7 @@ namespace amd {
 
 class hip_sycl_scoped_context_handler_t {
     hipCtx_t original_;
+    hipDevice_t currentDevice_;
     bool need_to_recover_;
 
 public:

diff --git a/src/gpu/nvidia/sycl_cuda_compat.cpp b/src/gpu/nvidia/sycl_cuda_compat.cpp
@@ -1,3 +1,19 @@
+/*******************************************************************************
+* Copyright 2019-2024 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
 #include "sycl_cuda_compat.hpp"
 
 namespace dnnl {

diff --git a/src/gpu/nvidia/sycl_cuda_compat.hpp b/src/gpu/nvidia/sycl_cuda_compat.hpp
@@ -49,6 +49,9 @@ native_object_t get_native(const sycl_object_t &sycl_object) {
     return reinterpret_cast<native_object_t>(handle);
 }
 
+template <>
+CUcontext get_native(const ::sycl::device &device);
+
 } // namespace compat
 } // namespace nvidia
 } // namespace gpu

diff --git a/src/gpu/nvidia/sycl_cuda_scoped_context.cpp b/src/gpu/nvidia/sycl_cuda_scoped_context.cpp
@@ -26,18 +26,12 @@ cuda_sycl_scoped_context_handler_t::cuda_sycl_scoped_context_handler_t(
         const sycl_cuda_engine_t &engine)
     : need_to_recover_(false) {
     try {
-        auto desired = engine.get_underlying_context();
         CUDA_EXECUTE_FUNC(cuCtxGetCurrent, &original_);
+        auto desired
+                = engine.get_underlying_context(); // Getting the context also makes it active
+        currentDevice = engine.get_underlying_device();
 
         if (original_ != desired) {
-            // Sets the desired context as the active one for the thread
-            CUDA_EXECUTE_FUNC(cuCtxSetCurrent, desired);
-            // No context is installed and the suggested context is primary
-            // This is the most common case. We can activate the context in the
-            // thread and leave it there until all the PI context referring to
-            // the same underlying CUDA primary context are destroyed. This
-            // emulates the behaviour of the CUDA runtime api, and avoids costly
-            // context switches. No action is required on this side of the if.
             need_to_recover_
                     = !(original_ == nullptr && engine.has_primary_context());
         }
@@ -51,7 +45,10 @@ cuda_sycl_scoped_context_handler_t::
     // we need to release the placed_context_ since we set it from
     // ctx.get() retains the underlying context so we need to remove it
     try {
-        if (need_to_recover_) { CUDA_EXECUTE_FUNC(cuCtxSetCurrent, original_); }
+        if (need_to_recover_) {
+            CUDA_EXECUTE_FUNC(cuDevicePrimaryCtxRelease, currentDevice);
+            CUDA_EXECUTE_FUNC(cuCtxSetCurrent, original_);
+        }
     } catch (const std::runtime_error &e) {
         error::wrap_c_api(status::runtime_error, e.what());
     }

diff --git a/src/gpu/nvidia/sycl_cuda_scoped_context.hpp b/src/gpu/nvidia/sycl_cuda_scoped_context.hpp
@@ -34,6 +34,7 @@ namespace nvidia {
 // required to put the stream context on top of the cuda stack
 class cuda_sycl_scoped_context_handler_t {
     CUcontext original_;
+    CUdevice currentDevice_;
     bool need_to_recover_;
 
 public:

diff --git a/src/sycl/sycl_compat.hpp b/src/sycl/sycl_compat.hpp
@@ -40,14 +40,6 @@ template <typename native_object_t, typename sycl_object_t>
 native_object_t get_native(const sycl_object_t &sycl_object) {
     return reinterpret_cast<native_object_t>(get_native(sycl_object));
 }
-#ifdef DNNL_SYCL_CUDA
-template <>
-CUcontext get_native(const ::sycl::device &device);
-#endif
-#ifdef DNNL_SYCL_HIP
-template <>
-HIPcontext get_native(const ::sycl::device &device);
-#endif
 
 // Automatically use host_task if it is supported by compiler,
 // otherwise fall back to codeplay_host_task.