From ed0c2d8fed39b2184d0471eb6b4a2f7349ba3693 Mon Sep 17 00:00:00 2001
From: Yuxi Hu <darrenyxhu@gmail.com>
Date: Wed, 27 Mar 2019 19:40:30 -0700
Subject: [PATCH] Tidy up storage allocation and deallocation (#14480)

* free memory when dptr is not nullptr

* skip memory allocation when handle size is 0

* update comments

* update Alloc in naive storage manager

* address comments

* add unit test for size 0 allocation
---
 include/mxnet/ndarray.h                  |  8 +++----
 src/ndarray/ndarray.cc                   |  8 +++----
 src/resource.cc                          | 27 ++++++++++++------------
 src/storage/cpu_device_storage.h         | 25 +++++++++++-----------
 src/storage/cpu_shared_storage_manager.h |  7 +++++-
 src/storage/gpu_device_storage.h         | 19 ++++++++---------
 src/storage/naive_storage_manager.h      |  2 +-
 src/storage/pinned_memory_storage.h      | 21 +++++++++---------
 src/storage/pooled_storage_manager.h     | 20 ++++++++++++++++++
 src/storage/storage.cc                   |  8 +++++++
 src/storage/storage_manager.h            | 11 ++++------
 tests/cpp/include/test_util.h            |  8 +++----
 tests/cpp/storage/storage_test.cc        | 15 +++++++++++++
 13 files changed, 109 insertions(+), 70 deletions(-)

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index c55cb01b4688..d00cb479b92e 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -986,8 +986,8 @@ class NDArray {
 #endif
         delay_alloc = false;
       } else if (shandle.size < dbytes) {
-        // free storage if necessary and alloc again
-        if (shandle.size > 0) Storage::Get()->Free(shandle);
+        // free storage
+        Storage::Get()->Free(shandle);
         // init storage
         shandle = Storage::Get()->Alloc(dbytes, shandle.ctx);
 #if MXNET_USE_MKLDNN == 1
@@ -1055,8 +1055,8 @@ class NDArray {
       }
       size_t aux_bytes = shape.Size() * mshadow::mshadow_sizeof(aux_types[i]);
       if (aux_handles[i].size < aux_bytes) {
-        // free storage if necessary and alloc again
-        if (aux_handles[i].size > 0) Storage::Get()->Free(aux_handles[i]);
+        // free storage
+        Storage::Get()->Free(aux_handles[i]);
         // init aux storage
         aux_handles[i] = Storage::Get()->Alloc(aux_bytes, ctx);
       }
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 367712755483..377bef072b03 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -121,9 +121,9 @@ NDArray::Chunk::~Chunk() {
         CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
       }
 #endif
-      if (mem.h.size > 0) Storage::Get()->Free(mem.h);
+      Storage::Get()->Free(mem.h);
       for (const auto& aux : mem.aux_h) {
-        if (aux.size > 0) Storage::Get()->Free(aux);
+        Storage::Get()->Free(aux);
       }
     }
   }, shandle.ctx, var);
@@ -134,8 +134,8 @@ void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) {
       << "data is expected to be allocated after aux_data";
   auto dbytes = shape.Size() * mshadow::mshadow_sizeof(dtype);
   if (shandle.size < dbytes) {
-    // free storage if necessary and alloc again
-    if (shandle.size > 0) Storage::Get()->Free(shandle);
+    // free storage
+    Storage::Get()->Free(shandle);
     // init storage
     shandle = Storage::Get()->Alloc(dbytes, ctx);
 #if MXNET_USE_MKLDNN == 1
diff --git a/src/resource.cc b/src/resource.cc
index 0317ff32bbf3..de24286ba535 100644
--- a/src/resource.cc
+++ b/src/resource.cc
@@ -54,30 +54,29 @@ struct SpaceAllocator {
     host_handle.dptr = nullptr;
     host_handle.size = 0;
   }
+
   inline void ReleaseAll() {
-    if (handle.size != 0) {
-      Storage::Get()->DirectFree(handle);
-      handle.size = 0;
-    }
-    if (host_handle.size != 0) {
-      Storage::Get()->DirectFree(host_handle);
-      host_handle.size = 0;
-    }
+    Storage::Get()->DirectFree(handle);
+    handle.dptr = nullptr;
+    handle.size = 0;
+
+    Storage::Get()->DirectFree(host_handle);
+    host_handle.dptr = nullptr;
+    host_handle.size = 0;
   }
+
   inline void* GetSpace(size_t size) {
     if (handle.size >= size) return handle.dptr;
-    if (handle.size != 0) {
-      Storage::Get()->DirectFree(handle);
-    }
+
+    Storage::Get()->DirectFree(handle);
     handle = Storage::Get()->Alloc(size, ctx);
     return handle.dptr;
   }
 
   inline void* GetHostSpace(size_t size) {
     if (host_handle.size >= size) return host_handle.dptr;
-    if (host_handle.size != 0) {
-      Storage::Get()->DirectFree(host_handle);
-    }
+
+    Storage::Get()->DirectFree(host_handle);
     host_handle = Storage::Get()->Alloc(size, Context());
     return host_handle.dptr;
   }
diff --git a/src/storage/cpu_device_storage.h b/src/storage/cpu_device_storage.h
index 25ad61efb232..f6b296a9643f 100644
--- a/src/storage/cpu_device_storage.h
+++ b/src/storage/cpu_device_storage.h
@@ -40,13 +40,12 @@ class CPUDeviceStorage {
  public:
   /*!
    * \brief Aligned allocation on CPU.
-   * \param size Size to allocate.
-   * \return Pointer to the storage.
+   * \param handle Handle struct.
    */
-  inline static void* Alloc(Storage::Handle* handle);
+  inline static void Alloc(Storage::Handle* handle);
   /*!
    * \brief Deallocation.
-   * \param ptr Pointer to deallocate.
+   * \param handle Handle struct.
    */
   inline static void Free(Storage::Handle handle);
 
@@ -63,25 +62,25 @@ class CPUDeviceStorage {
 #endif
 };  // class CPUDeviceStorage
 
-inline void* CPUDeviceStorage::Alloc(Storage::Handle* handle) {
+inline void CPUDeviceStorage::Alloc(Storage::Handle* handle) {
+  handle->dptr = nullptr;
   const size_t size = handle->size;
-  void* ptr;
+  if (size == 0) return;
+
 #if _MSC_VER
-  ptr = _aligned_malloc(size, alignment_);
-  if (ptr == NULL) LOG(FATAL) << "Failed to allocate CPU Memory";
+  handle->dptr = _aligned_malloc(size, alignment_);
+  if (handle->dptr == nullptr) LOG(FATAL) << "Failed to allocate CPU Memory";
 #else
-  int ret = posix_memalign(&ptr, alignment_, size);
+  int ret = posix_memalign(&handle->dptr, alignment_, size);
   if (ret != 0) LOG(FATAL) << "Failed to allocate CPU Memory";
 #endif
-  return ptr;
 }
 
 inline void CPUDeviceStorage::Free(Storage::Handle handle) {
-  void * ptr = handle.dptr;
 #if _MSC_VER
-  _aligned_free(ptr);
+  _aligned_free(handle.dptr);
 #else
-  free(ptr);
+  free(handle.dptr);
 #endif
 }
 
diff --git a/src/storage/cpu_shared_storage_manager.h b/src/storage/cpu_shared_storage_manager.h
index a52d779d2318..9c57a4b61eed 100644
--- a/src/storage/cpu_shared_storage_manager.h
+++ b/src/storage/cpu_shared_storage_manager.h
@@ -115,13 +115,18 @@ class CPUSharedStorageManager final : public StorageManager {
 };  // class CPUSharedStorageManager
 
 void CPUSharedStorageManager::Alloc(Storage::Handle* handle) {
+  if (handle->size == 0) {
+    handle->dptr = nullptr;
+    return;
+  }
+
   std::lock_guard<std::recursive_mutex> lock(mutex_);
   std::uniform_int_distribution<> dis(0, std::numeric_limits<int>::max());
   int fid = -1;
   std::string filename;
   bool is_new = false;
   size_t size = handle->size + alignment_;
-  void *ptr = nullptr;
+  void* ptr = nullptr;
 #ifdef _WIN32
   CheckAndRealFree();
   HANDLE map_handle = nullptr;
diff --git a/src/storage/gpu_device_storage.h b/src/storage/gpu_device_storage.h
index 562badb8752e..5e09561c0b54 100644
--- a/src/storage/gpu_device_storage.h
+++ b/src/storage/gpu_device_storage.h
@@ -43,43 +43,42 @@ class GPUDeviceStorage {
  public:
   /*!
    * \brief Allocation.
-   * \param size Size to allocate.
-   * \return Pointer to the storage.
+   * \param handle Handle struct.
    */
-  inline static void* Alloc(Storage::Handle* handle);
+  inline static void Alloc(Storage::Handle* handle);
   /*!
    * \brief Deallocation.
-   * \param ptr Pointer to deallocate.
+   * \param handle Handle struct.
    */
   inline static void Free(Storage::Handle handle);
 };  // class GPUDeviceStorage
 
-inline void* GPUDeviceStorage::Alloc(Storage::Handle* handle) {
+inline void GPUDeviceStorage::Alloc(Storage::Handle* handle) {
+  handle->dptr = nullptr;
   const size_t size = handle->size;
-  void* ret = nullptr;
+  if (size == 0) return;
+
 #if MXNET_USE_CUDA
   mxnet::common::cuda::DeviceStore device_store(handle->ctx.real_dev_id(), true);
 #if MXNET_USE_NCCL
   std::lock_guard<std::mutex> l(Storage::Get()->GetMutex(Context::kGPU));
 #endif  // MXNET_USE_NCCL
-  cudaError_t e = cudaMalloc(&ret, size);
+  cudaError_t e = cudaMalloc(&handle->dptr, size);
   if (e != cudaSuccess && e != cudaErrorCudartUnloading)
     LOG(FATAL) << "CUDA: " << cudaGetErrorString(e);
 #else   // MXNET_USE_CUDA
   LOG(FATAL) << "Please compile with CUDA enabled";
 #endif  // MXNET_USE_CUDA
-  return ret;
 }
 
 inline void GPUDeviceStorage::Free(Storage::Handle handle) {
 #if MXNET_USE_CUDA
-  void * ptr = handle.dptr;
   mxnet::common::cuda::DeviceStore device_store(handle.ctx.real_dev_id(), true);
 #if MXNET_USE_NCCL
   std::lock_guard<std::mutex> l(Storage::Get()->GetMutex(Context::kGPU));
 #endif  // MXNET_USE_NCCL
   // throw special exception for caller to catch.
-  cudaError_t err = cudaFree(ptr);
+  cudaError_t err = cudaFree(handle.dptr);
   // ignore unloading error, as memory has already been recycled
   if (err != cudaSuccess && err != cudaErrorCudartUnloading) {
     LOG(FATAL) << "CUDA: " << cudaGetErrorString(err);
diff --git a/src/storage/naive_storage_manager.h b/src/storage/naive_storage_manager.h
index 55112b5a82e9..471b015eb32c 100644
--- a/src/storage/naive_storage_manager.h
+++ b/src/storage/naive_storage_manager.h
@@ -58,7 +58,7 @@ class NaiveStorageManager final : public StorageManager {
 
 template <class DeviceStorage>
 void NaiveStorageManager<DeviceStorage>::Alloc(Storage::Handle* handle) {
-  handle->dptr = DeviceStorage::Alloc(handle);
+  DeviceStorage::Alloc(handle);
 }
 
 template <class DeviceStorage>
diff --git a/src/storage/pinned_memory_storage.h b/src/storage/pinned_memory_storage.h
index c4ababbdc03a..13573d99360c 100644
--- a/src/storage/pinned_memory_storage.h
+++ b/src/storage/pinned_memory_storage.h
@@ -19,7 +19,7 @@
 
 /*!
  * Copyright (c) 2015 by Contributors
- * \file cpu_device_storage.h
+ * \file pinned_memory_storage.h
  * \brief CPU storage with pinned memory
  */
 #ifndef MXNET_STORAGE_PINNED_MEMORY_STORAGE_H_
@@ -38,37 +38,36 @@ class PinnedMemoryStorage {
  public:
   /*!
    * \brief Allocation.
-   * \param size Size to allocate.
-   * \return Pointer to the storage.
+   * \param handle Handle struct.
    */
-  inline static void* Alloc(Storage::Handle* handle);
+  inline static void Alloc(Storage::Handle* handle);
 
   /*!
    * \brief Deallocation.
-   * \param ptr Pointer to deallocate.
+   * \param handle Handle struct.
    */
   inline static void Free(Storage::Handle handle);
 };
 
-inline void* PinnedMemoryStorage::Alloc(Storage::Handle* handle) {
-  void* ret = nullptr;
+inline void PinnedMemoryStorage::Alloc(Storage::Handle* handle) {
+  handle->dptr = nullptr;
   const size_t size = handle->size;
+  if (size == 0) return;
+
 #if MXNET_USE_NCCL
   std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
 #endif
   mxnet::common::cuda::DeviceStore device_store(handle->ctx.real_dev_id(), true);
   // make the memory available across all devices
-  CUDA_CALL(cudaHostAlloc(&ret, size, cudaHostAllocPortable));
-  return ret;
+  CUDA_CALL(cudaHostAlloc(&handle->dptr, size, cudaHostAllocPortable));
 }
 
 inline void PinnedMemoryStorage::Free(Storage::Handle handle) {
-  void * ptr = handle.dptr;
 #if MXNET_USE_NCCL
   std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
 #endif
   mxnet::common::cuda::DeviceStore device_store(handle.ctx.real_dev_id(), true);
-  cudaError_t err = cudaFreeHost(ptr);
+  cudaError_t err = cudaFreeHost(handle.dptr);
   // ignore unloading error, as memory has already been recycled
   if (err != cudaSuccess && err != cudaErrorCudartUnloading) {
     LOG(FATAL) << "CUDA: " << cudaGetErrorString(err);
diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h
index c407a9f00cb6..4c8ae4eb12dd 100644
--- a/src/storage/pooled_storage_manager.h
+++ b/src/storage/pooled_storage_manager.h
@@ -129,6 +129,12 @@ class GPUPooledStorageManager final : public StorageManager {
 };  // class GPUPooledStorageManager
 
 void GPUPooledStorageManager::Alloc(Storage::Handle* handle) {
+  // Set dptr to nullptr when handle size is 0.
+  if (handle->size == 0) {
+    handle->dptr = nullptr;
+    return;
+  }
+
   std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
   size_t size = RoundAllocSize(handle->size);
   auto&& reuse_it = memory_pool_.find(size);
@@ -155,6 +161,10 @@ void GPUPooledStorageManager::Alloc(Storage::Handle* handle) {
 }
 
 void GPUPooledStorageManager::Free(Storage::Handle handle) {
+  // Do nothing if dptr is nullptr. Otherwise, nullptr may be reused
+  // which can cause illegal memory access error.
+  if (handle.dptr == nullptr) return;
+
   std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
   size_t size = RoundAllocSize(handle.size);
   auto&& reuse_pool = memory_pool_[size];
@@ -286,6 +296,12 @@ class GPUPooledRoundedStorageManager final : public StorageManager {
 };  // class GPUPooledRoundedStorageManager
 
 void GPUPooledRoundedStorageManager::Alloc(Storage::Handle* handle) {
+  // Set dptr to nullptr when handle size is 0.
+  if (handle->size == 0) {
+    handle->dptr = nullptr;
+    return;
+  }
+
   std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
   int bucket = get_bucket(handle->size);
   size_t size = get_size(bucket);
@@ -312,6 +328,10 @@ void GPUPooledRoundedStorageManager::Alloc(Storage::Handle* handle) {
 }
 
 void GPUPooledRoundedStorageManager::Free(Storage::Handle handle) {
+  // Do nothing if dptr is nullptr. Otherwise, nullptr may be reused
+  // which can cause illegal memory access error.
+  if (handle.dptr == nullptr) return;
+
   std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
   int bucket = get_bucket(handle.size);
   auto&& reuse_pool = memory_pool_[bucket];
diff --git a/src/storage/storage.cc b/src/storage/storage.cc
index 911d30cc3f05..7484e699d388 100644
--- a/src/storage/storage.cc
+++ b/src/storage/storage.cc
@@ -127,6 +127,10 @@ void StorageImpl::Alloc(Storage::Handle* handle) {
 }
 
 void StorageImpl::Free(Storage::Handle handle) {
+  // Do nothing if dtpr is nullptr because the handle may have already
+  // been freed or have not been allocated memory yet.
+  if (handle.dptr == nullptr) return;
+
   const Context &ctx = handle.ctx;
   auto&& device = storage_managers_.at(ctx.dev_type);
   std::shared_ptr<storage::StorageManager> manager = device.Get(
@@ -140,6 +144,10 @@ void StorageImpl::Free(Storage::Handle handle) {
 }
 
 void StorageImpl::DirectFree(Storage::Handle handle) {
+  // Do nothing if dtpr is nullptr because the handle may have already
+  // been freed or have not been allocated memory yet.
+  if (handle.dptr == nullptr) return;
+
   const Context &ctx = handle.ctx;
   auto&& device = storage_managers_.at(ctx.dev_type);
   std::shared_ptr<storage::StorageManager> manager = device.Get(
diff --git a/src/storage/storage_manager.h b/src/storage/storage_manager.h
index 15a2c7ecffcb..d17dc91dc2fc 100644
--- a/src/storage/storage_manager.h
+++ b/src/storage/storage_manager.h
@@ -39,20 +39,17 @@ class StorageManager {
  public:
   /*!
    * \brief Allocation.
-   * \param size Size to allocate.
-   * \return Pointer to the storage.
+   * \param handle Handle struct.
    */
   virtual void Alloc(Storage::Handle* handle) = 0;
   /*!
    * \brief Deallocation.
-   * \param ptr Pointer to deallocate.
-   * \param size Size of the storage.
+   * \param handle Handle struct.
    */
   virtual void Free(Storage::Handle handle) = 0;
   /*!
-   * \brief Direct de-allocation.
-   * \param ptr Pointer to deallocate.
-   * \param size Size of the storage.
+   * \brief Direct deallocation.
+   * \param handle Handle struct.
    */
   virtual void DirectFree(Storage::Handle handle) = 0;
   /*!
diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h
index aec3ddc5a59b..e0caddbcd027 100644
--- a/tests/cpp/include/test_util.h
+++ b/tests/cpp/include/test_util.h
@@ -70,11 +70,9 @@ class BlobMemory {
     return handle_.dptr;
   }
   void Free() {
-    if (handle_.dptr) {
-      Storage *storage = mxnet::Storage::Get();
-      storage->DirectFree(handle_);
-      handle_.dptr = nullptr;
-    }
+    mxnet::Storage::Get()->DirectFree(handle_);
+    handle_.dptr = nullptr;
+    handle_.size = 0;
   }
   size_t Size() const {
     return handle_.size;
diff --git a/tests/cpp/storage/storage_test.cc b/tests/cpp/storage/storage_test.cc
index 026c3660f326..ce8d4ebd7a71 100644
--- a/tests/cpp/storage/storage_test.cc
+++ b/tests/cpp/storage/storage_test.cc
@@ -36,10 +36,15 @@ TEST(Storage, Basic_CPU) {
   EXPECT_EQ(handle.ctx, context_cpu);
   EXPECT_EQ(handle.size, kSize);
   storage->Free(handle);
+
   handle = storage->Alloc(kSize, context_cpu);
   EXPECT_EQ(handle.ctx, context_cpu);
   EXPECT_EQ(handle.size, kSize);
   storage->Free(handle);
+
+  handle = storage->Alloc(0, context_cpu);
+  EXPECT_EQ(handle.dptr, nullptr);
+  storage->Free(handle);
 }
 
 #if MXNET_USE_CUDA
@@ -47,6 +52,7 @@ TEST(Storage_GPU, Basic_GPU) {
   if (mxnet::test::unitTestsWithCuda) {
     putenv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF=20");
     putenv("MXNET_GPU_MEM_POOL_TYPE=Round");
+
     auto &&storage = mxnet::Storage::Get();
     mxnet::Context context_gpu = mxnet::Context::GPU(0);
     auto &&handle = storage->Alloc(32, context_gpu);
@@ -71,6 +77,11 @@ TEST(Storage_GPU, Basic_GPU) {
     EXPECT_EQ(handle2.size, 3145728);
     EXPECT_EQ(handle2.dptr, ptr2);
     storage->Free(handle2);
+
+    handle = storage->Alloc(0, context_gpu);
+    EXPECT_EQ(handle.dptr, nullptr);
+    storage->Free(handle);
+
     unsetenv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF");
     unsetenv("MXNET_GPU_MEM_POOL_TYPE");
   }
@@ -88,6 +99,10 @@ TEST(Storage_GPU, Basic_GPU) {
     EXPECT_EQ(handle.size, kSize);
     EXPECT_EQ(handle.dptr, ptr);
     storage->Free(handle);
+
+    handle = storage->Alloc(0, context_gpu);
+    EXPECT_EQ(handle.dptr, nullptr);
+    storage->Free(handle);
   }
 }
 #endif  // MXNET_USE_CUDA