Merge pull request #32 from LLNL/events-hack

FastEvent uses CUDA events
LLNL · Jan 22, 2019 · 288a3d0 · 288a3d0
2 parents 073b717 + ee5d926
commit 288a3d0
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 4 deletions.
diff --git a/src/cuda.cpp b/src/cuda.cpp
@@ -109,44 +109,56 @@ bool stream_memory_operations_supported() {
 }
 
 FastEvent::FastEvent() {
+#if 0
   if (stream_memory_operations_supported()) {
     sync_event = get_pinned_memory<int32_t>(1);
     // Initialize to completed to match CUDA event semantics.
     __atomic_store_n(sync_event, 1, __ATOMIC_SEQ_CST);
     AL_CHECK_CUDA_DRV(cuMemHostGetDevicePointer(
                         &sync_event_dev_ptr, sync_event, 0));
   }
-  else {
+  else
+#endif
+  {
     plain_event = get_cuda_event();
   }
 }
 
 FastEvent::~FastEvent() {
+#if 0
   if (stream_memory_operations_supported()) {
     release_pinned_memory(sync_event);
   }
-  else {
+  else
+#endif
+  {
     release_cuda_event(plain_event);
   }
 }
 
 void FastEvent::record(cudaStream_t stream) {
+#if 0
   if (stream_memory_operations_supported()) {
     // We cannot use std::atomic because we need the actual address of the memory.
     __atomic_store_n(sync_event, 0, __ATOMIC_SEQ_CST);
     AL_CHECK_CUDA_DRV(cuStreamWriteValue32(
                         stream, sync_event_dev_ptr, 1,
                         CU_STREAM_WRITE_VALUE_DEFAULT));
   }
-  else {
+  else
+#endif
+  {
     AL_CHECK_CUDA(cudaEventRecord(plain_event, stream));
   }
 }
 
 bool FastEvent::query() {
+#if 0
   if (stream_memory_operations_supported())
     return __atomic_load_n(sync_event, __ATOMIC_SEQ_CST);
-  else {
+  else
+#endif
+  {
     cudaError_t r = cudaEventQuery(plain_event);
     if (r == cudaSuccess)
       return true;

diff --git a/src/cuda.hpp b/src/cuda.hpp
@@ -140,6 +140,8 @@ bool stream_memory_operations_supported();
  * using the stream memory write operation.
  * This falls back to the usual CUDA events when stream memory operations are
  * not available.
+ * @note This is currently always falling back on CUDA events to work around a
+ * hang, the underlying cause of which has not been diagnosed.
  */
 class FastEvent {
  public:
@@ -153,8 +155,10 @@ class FastEvent {
   /** Return true if the event has completed. */
   bool query();
  private:
+#if 0
   int32_t* sync_event __attribute__((aligned(64)));
   CUdeviceptr sync_event_dev_ptr;
+#endif
   cudaEvent_t plain_event;
 };