From ed0c2d8fed39b2184d0471eb6b4a2f7349ba3693 Mon Sep 17 00:00:00 2001 From: Yuxi Hu Date: Wed, 27 Mar 2019 19:40:30 -0700 Subject: [PATCH] Tidy up storage allocation and deallocation (#14480) * free memory when dptr is not nullptr * skip memory allocation when handle size is 0 * update comments * update Alloc in naive storage manager * address comments * add unit test for size 0 allocation --- include/mxnet/ndarray.h | 8 +++---- src/ndarray/ndarray.cc | 8 +++---- src/resource.cc | 27 ++++++++++++------------ src/storage/cpu_device_storage.h | 25 +++++++++++----------- src/storage/cpu_shared_storage_manager.h | 7 +++++- src/storage/gpu_device_storage.h | 19 ++++++++--------- src/storage/naive_storage_manager.h | 2 +- src/storage/pinned_memory_storage.h | 21 +++++++++--------- src/storage/pooled_storage_manager.h | 20 ++++++++++++++++++ src/storage/storage.cc | 8 +++++++ src/storage/storage_manager.h | 11 ++++------ tests/cpp/include/test_util.h | 8 +++---- tests/cpp/storage/storage_test.cc | 15 +++++++++++++ 13 files changed, 109 insertions(+), 70 deletions(-) diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h index c55cb01b4688..d00cb479b92e 100644 --- a/include/mxnet/ndarray.h +++ b/include/mxnet/ndarray.h @@ -986,8 +986,8 @@ class NDArray { #endif delay_alloc = false; } else if (shandle.size < dbytes) { - // free storage if necessary and alloc again - if (shandle.size > 0) Storage::Get()->Free(shandle); + // free storage + Storage::Get()->Free(shandle); // init storage shandle = Storage::Get()->Alloc(dbytes, shandle.ctx); #if MXNET_USE_MKLDNN == 1 @@ -1055,8 +1055,8 @@ class NDArray { } size_t aux_bytes = shape.Size() * mshadow::mshadow_sizeof(aux_types[i]); if (aux_handles[i].size < aux_bytes) { - // free storage if necessary and alloc again - if (aux_handles[i].size > 0) Storage::Get()->Free(aux_handles[i]); + // free storage + Storage::Get()->Free(aux_handles[i]); // init aux storage aux_handles[i] = Storage::Get()->Alloc(aux_bytes, ctx); } diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 367712755483..377bef072b03 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -121,9 +121,9 @@ NDArray::Chunk::~Chunk() { CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr); } #endif - if (mem.h.size > 0) Storage::Get()->Free(mem.h); + Storage::Get()->Free(mem.h); for (const auto& aux : mem.aux_h) { - if (aux.size > 0) Storage::Get()->Free(aux); + Storage::Get()->Free(aux); } } }, shandle.ctx, var); @@ -134,8 +134,8 @@ void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) { << "data is expected to be allocated after aux_data"; auto dbytes = shape.Size() * mshadow::mshadow_sizeof(dtype); if (shandle.size < dbytes) { - // free storage if necessary and alloc again - if (shandle.size > 0) Storage::Get()->Free(shandle); + // free storage + Storage::Get()->Free(shandle); // init storage shandle = Storage::Get()->Alloc(dbytes, ctx); #if MXNET_USE_MKLDNN == 1 diff --git a/src/resource.cc b/src/resource.cc index 0317ff32bbf3..de24286ba535 100644 --- a/src/resource.cc +++ b/src/resource.cc @@ -54,30 +54,29 @@ struct SpaceAllocator { host_handle.dptr = nullptr; host_handle.size = 0; } + inline void ReleaseAll() { - if (handle.size != 0) { - Storage::Get()->DirectFree(handle); - handle.size = 0; - } - if (host_handle.size != 0) { - Storage::Get()->DirectFree(host_handle); - host_handle.size = 0; - } + Storage::Get()->DirectFree(handle); + handle.dptr = nullptr; + handle.size = 0; + + Storage::Get()->DirectFree(host_handle); + host_handle.dptr = nullptr; + host_handle.size = 0; } + inline void* GetSpace(size_t size) { if (handle.size >= size) return handle.dptr; - if (handle.size != 0) { - Storage::Get()->DirectFree(handle); - } + + Storage::Get()->DirectFree(handle); handle = Storage::Get()->Alloc(size, ctx); return handle.dptr; } inline void* GetHostSpace(size_t size) { if (host_handle.size >= size) return host_handle.dptr; - if (host_handle.size != 0) { - Storage::Get()->DirectFree(host_handle); - } + + Storage::Get()->DirectFree(host_handle); host_handle = Storage::Get()->Alloc(size, Context()); return host_handle.dptr; } diff --git a/src/storage/cpu_device_storage.h b/src/storage/cpu_device_storage.h index 25ad61efb232..f6b296a9643f 100644 --- a/src/storage/cpu_device_storage.h +++ b/src/storage/cpu_device_storage.h @@ -40,13 +40,12 @@ class CPUDeviceStorage { public: /*! * \brief Aligned allocation on CPU. - * \param size Size to allocate. - * \return Pointer to the storage. + * \param handle Handle struct. */ - inline static void* Alloc(Storage::Handle* handle); + inline static void Alloc(Storage::Handle* handle); /*! * \brief Deallocation. - * \param ptr Pointer to deallocate. + * \param handle Handle struct. */ inline static void Free(Storage::Handle handle); @@ -63,25 +62,25 @@ class CPUDeviceStorage { #endif }; // class CPUDeviceStorage -inline void* CPUDeviceStorage::Alloc(Storage::Handle* handle) { +inline void CPUDeviceStorage::Alloc(Storage::Handle* handle) { + handle->dptr = nullptr; const size_t size = handle->size; - void* ptr; + if (size == 0) return; + #if _MSC_VER - ptr = _aligned_malloc(size, alignment_); - if (ptr == NULL) LOG(FATAL) << "Failed to allocate CPU Memory"; + handle->dptr = _aligned_malloc(size, alignment_); + if (handle->dptr == nullptr) LOG(FATAL) << "Failed to allocate CPU Memory"; #else - int ret = posix_memalign(&ptr, alignment_, size); + int ret = posix_memalign(&handle->dptr, alignment_, size); if (ret != 0) LOG(FATAL) << "Failed to allocate CPU Memory"; #endif - return ptr; } inline void CPUDeviceStorage::Free(Storage::Handle handle) { - void * ptr = handle.dptr; #if _MSC_VER - _aligned_free(ptr); + _aligned_free(handle.dptr); #else - free(ptr); + free(handle.dptr); #endif } diff --git a/src/storage/cpu_shared_storage_manager.h b/src/storage/cpu_shared_storage_manager.h index a52d779d2318..9c57a4b61eed 100644 --- a/src/storage/cpu_shared_storage_manager.h +++ b/src/storage/cpu_shared_storage_manager.h @@ -115,13 +115,18 @@ class CPUSharedStorageManager final : public StorageManager { }; // class CPUSharedStorageManager void CPUSharedStorageManager::Alloc(Storage::Handle* handle) { + if (handle->size == 0) { + handle->dptr = nullptr; + return; + } + std::lock_guard lock(mutex_); std::uniform_int_distribution<> dis(0, std::numeric_limits::max()); int fid = -1; std::string filename; bool is_new = false; size_t size = handle->size + alignment_; - void *ptr = nullptr; + void* ptr = nullptr; #ifdef _WIN32 CheckAndRealFree(); HANDLE map_handle = nullptr; diff --git a/src/storage/gpu_device_storage.h b/src/storage/gpu_device_storage.h index 562badb8752e..5e09561c0b54 100644 --- a/src/storage/gpu_device_storage.h +++ b/src/storage/gpu_device_storage.h @@ -43,43 +43,42 @@ class GPUDeviceStorage { public: /*! * \brief Allocation. - * \param size Size to allocate. - * \return Pointer to the storage. + * \param handle Handle struct. */ - inline static void* Alloc(Storage::Handle* handle); + inline static void Alloc(Storage::Handle* handle); /*! * \brief Deallocation. - * \param ptr Pointer to deallocate. + * \param handle Handle struct. */ inline static void Free(Storage::Handle handle); }; // class GPUDeviceStorage -inline void* GPUDeviceStorage::Alloc(Storage::Handle* handle) { +inline void GPUDeviceStorage::Alloc(Storage::Handle* handle) { + handle->dptr = nullptr; const size_t size = handle->size; - void* ret = nullptr; + if (size == 0) return; + #if MXNET_USE_CUDA mxnet::common::cuda::DeviceStore device_store(handle->ctx.real_dev_id(), true); #if MXNET_USE_NCCL std::lock_guard l(Storage::Get()->GetMutex(Context::kGPU)); #endif // MXNET_USE_NCCL - cudaError_t e = cudaMalloc(&ret, size); + cudaError_t e = cudaMalloc(&handle->dptr, size); if (e != cudaSuccess && e != cudaErrorCudartUnloading) LOG(FATAL) << "CUDA: " << cudaGetErrorString(e); #else // MXNET_USE_CUDA LOG(FATAL) << "Please compile with CUDA enabled"; #endif // MXNET_USE_CUDA - return ret; } inline void GPUDeviceStorage::Free(Storage::Handle handle) { #if MXNET_USE_CUDA - void * ptr = handle.dptr; mxnet::common::cuda::DeviceStore device_store(handle.ctx.real_dev_id(), true); #if MXNET_USE_NCCL std::lock_guard l(Storage::Get()->GetMutex(Context::kGPU)); #endif // MXNET_USE_NCCL // throw special exception for caller to catch. - cudaError_t err = cudaFree(ptr); + cudaError_t err = cudaFree(handle.dptr); // ignore unloading error, as memory has already been recycled if (err != cudaSuccess && err != cudaErrorCudartUnloading) { LOG(FATAL) << "CUDA: " << cudaGetErrorString(err); diff --git a/src/storage/naive_storage_manager.h b/src/storage/naive_storage_manager.h index 55112b5a82e9..471b015eb32c 100644 --- a/src/storage/naive_storage_manager.h +++ b/src/storage/naive_storage_manager.h @@ -58,7 +58,7 @@ class NaiveStorageManager final : public StorageManager { template void NaiveStorageManager::Alloc(Storage::Handle* handle) { - handle->dptr = DeviceStorage::Alloc(handle); + DeviceStorage::Alloc(handle); } template diff --git a/src/storage/pinned_memory_storage.h b/src/storage/pinned_memory_storage.h index c4ababbdc03a..13573d99360c 100644 --- a/src/storage/pinned_memory_storage.h +++ b/src/storage/pinned_memory_storage.h @@ -19,7 +19,7 @@ /*! * Copyright (c) 2015 by Contributors - * \file cpu_device_storage.h + * \file pinned_memory_storage.h * \brief CPU storage with pinned memory */ #ifndef MXNET_STORAGE_PINNED_MEMORY_STORAGE_H_ @@ -38,37 +38,36 @@ class PinnedMemoryStorage { public: /*! * \brief Allocation. - * \param size Size to allocate. - * \return Pointer to the storage. + * \param handle Handle struct. */ - inline static void* Alloc(Storage::Handle* handle); + inline static void Alloc(Storage::Handle* handle); /*! * \brief Deallocation. - * \param ptr Pointer to deallocate. + * \param handle Handle struct. */ inline static void Free(Storage::Handle handle); }; -inline void* PinnedMemoryStorage::Alloc(Storage::Handle* handle) { - void* ret = nullptr; +inline void PinnedMemoryStorage::Alloc(Storage::Handle* handle) { + handle->dptr = nullptr; const size_t size = handle->size; + if (size == 0) return; + #if MXNET_USE_NCCL std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); #endif mxnet::common::cuda::DeviceStore device_store(handle->ctx.real_dev_id(), true); // make the memory available across all devices - CUDA_CALL(cudaHostAlloc(&ret, size, cudaHostAllocPortable)); - return ret; + CUDA_CALL(cudaHostAlloc(&handle->dptr, size, cudaHostAllocPortable)); } inline void PinnedMemoryStorage::Free(Storage::Handle handle) { - void * ptr = handle.dptr; #if MXNET_USE_NCCL std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); #endif mxnet::common::cuda::DeviceStore device_store(handle.ctx.real_dev_id(), true); - cudaError_t err = cudaFreeHost(ptr); + cudaError_t err = cudaFreeHost(handle.dptr); // ignore unloading error, as memory has already been recycled if (err != cudaSuccess && err != cudaErrorCudartUnloading) { LOG(FATAL) << "CUDA: " << cudaGetErrorString(err); diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h index c407a9f00cb6..4c8ae4eb12dd 100644 --- a/src/storage/pooled_storage_manager.h +++ b/src/storage/pooled_storage_manager.h @@ -129,6 +129,12 @@ class GPUPooledStorageManager final : public StorageManager { }; // class GPUPooledStorageManager void GPUPooledStorageManager::Alloc(Storage::Handle* handle) { + // Set dptr to nullptr when handle size is 0. + if (handle->size == 0) { + handle->dptr = nullptr; + return; + } + std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); size_t size = RoundAllocSize(handle->size); auto&& reuse_it = memory_pool_.find(size); @@ -155,6 +161,10 @@ void GPUPooledStorageManager::Alloc(Storage::Handle* handle) { } void GPUPooledStorageManager::Free(Storage::Handle handle) { + // Do nothing if dptr is nullptr. Otherwise, nullptr may be reused + // which can cause illegal memory access error. + if (handle.dptr == nullptr) return; + std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); size_t size = RoundAllocSize(handle.size); auto&& reuse_pool = memory_pool_[size]; @@ -286,6 +296,12 @@ class GPUPooledRoundedStorageManager final : public StorageManager { }; // class GPUPooledRoundedStorageManager void GPUPooledRoundedStorageManager::Alloc(Storage::Handle* handle) { + // Set dptr to nullptr when handle size is 0. + if (handle->size == 0) { + handle->dptr = nullptr; + return; + } + std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); int bucket = get_bucket(handle->size); size_t size = get_size(bucket); @@ -312,6 +328,10 @@ void GPUPooledRoundedStorageManager::Alloc(Storage::Handle* handle) { } void GPUPooledRoundedStorageManager::Free(Storage::Handle handle) { + // Do nothing if dptr is nullptr. Otherwise, nullptr may be reused + // which can cause illegal memory access error. + if (handle.dptr == nullptr) return; + std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); int bucket = get_bucket(handle.size); auto&& reuse_pool = memory_pool_[bucket]; diff --git a/src/storage/storage.cc b/src/storage/storage.cc index 911d30cc3f05..7484e699d388 100644 --- a/src/storage/storage.cc +++ b/src/storage/storage.cc @@ -127,6 +127,10 @@ void StorageImpl::Alloc(Storage::Handle* handle) { } void StorageImpl::Free(Storage::Handle handle) { + // Do nothing if dtpr is nullptr because the handle may have already + // been freed or have not been allocated memory yet. + if (handle.dptr == nullptr) return; + const Context &ctx = handle.ctx; auto&& device = storage_managers_.at(ctx.dev_type); std::shared_ptr manager = device.Get( @@ -140,6 +144,10 @@ void StorageImpl::Free(Storage::Handle handle) { } void StorageImpl::DirectFree(Storage::Handle handle) { + // Do nothing if dtpr is nullptr because the handle may have already + // been freed or have not been allocated memory yet. + if (handle.dptr == nullptr) return; + const Context &ctx = handle.ctx; auto&& device = storage_managers_.at(ctx.dev_type); std::shared_ptr manager = device.Get( diff --git a/src/storage/storage_manager.h b/src/storage/storage_manager.h index 15a2c7ecffcb..d17dc91dc2fc 100644 --- a/src/storage/storage_manager.h +++ b/src/storage/storage_manager.h @@ -39,20 +39,17 @@ class StorageManager { public: /*! * \brief Allocation. - * \param size Size to allocate. - * \return Pointer to the storage. + * \param handle Handle struct. */ virtual void Alloc(Storage::Handle* handle) = 0; /*! * \brief Deallocation. - * \param ptr Pointer to deallocate. - * \param size Size of the storage. + * \param handle Handle struct. */ virtual void Free(Storage::Handle handle) = 0; /*! - * \brief Direct de-allocation. - * \param ptr Pointer to deallocate. - * \param size Size of the storage. + * \brief Direct deallocation. + * \param handle Handle struct. */ virtual void DirectFree(Storage::Handle handle) = 0; /*! diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h index aec3ddc5a59b..e0caddbcd027 100644 --- a/tests/cpp/include/test_util.h +++ b/tests/cpp/include/test_util.h @@ -70,11 +70,9 @@ class BlobMemory { return handle_.dptr; } void Free() { - if (handle_.dptr) { - Storage *storage = mxnet::Storage::Get(); - storage->DirectFree(handle_); - handle_.dptr = nullptr; - } + mxnet::Storage::Get()->DirectFree(handle_); + handle_.dptr = nullptr; + handle_.size = 0; } size_t Size() const { return handle_.size; diff --git a/tests/cpp/storage/storage_test.cc b/tests/cpp/storage/storage_test.cc index 026c3660f326..ce8d4ebd7a71 100644 --- a/tests/cpp/storage/storage_test.cc +++ b/tests/cpp/storage/storage_test.cc @@ -36,10 +36,15 @@ TEST(Storage, Basic_CPU) { EXPECT_EQ(handle.ctx, context_cpu); EXPECT_EQ(handle.size, kSize); storage->Free(handle); + handle = storage->Alloc(kSize, context_cpu); EXPECT_EQ(handle.ctx, context_cpu); EXPECT_EQ(handle.size, kSize); storage->Free(handle); + + handle = storage->Alloc(0, context_cpu); + EXPECT_EQ(handle.dptr, nullptr); + storage->Free(handle); } #if MXNET_USE_CUDA @@ -47,6 +52,7 @@ TEST(Storage_GPU, Basic_GPU) { if (mxnet::test::unitTestsWithCuda) { putenv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF=20"); putenv("MXNET_GPU_MEM_POOL_TYPE=Round"); + auto &&storage = mxnet::Storage::Get(); mxnet::Context context_gpu = mxnet::Context::GPU(0); auto &&handle = storage->Alloc(32, context_gpu); @@ -71,6 +77,11 @@ TEST(Storage_GPU, Basic_GPU) { EXPECT_EQ(handle2.size, 3145728); EXPECT_EQ(handle2.dptr, ptr2); storage->Free(handle2); + + handle = storage->Alloc(0, context_gpu); + EXPECT_EQ(handle.dptr, nullptr); + storage->Free(handle); + unsetenv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF"); unsetenv("MXNET_GPU_MEM_POOL_TYPE"); } @@ -88,6 +99,10 @@ TEST(Storage_GPU, Basic_GPU) { EXPECT_EQ(handle.size, kSize); EXPECT_EQ(handle.dptr, ptr); storage->Free(handle); + + handle = storage->Alloc(0, context_gpu); + EXPECT_EQ(handle.dptr, nullptr); + storage->Free(handle); } } #endif // MXNET_USE_CUDA