Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[storage] putting things together
Browse files Browse the repository at this point in the history
  • Loading branch information
hotpxl committed Aug 11, 2015
1 parent 2214ce3 commit 84bf795
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 62 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ endif
BIN = test/api_registry_test
OBJ = storage.o narray_op_cpu.o static_operator.o static_operator_cpu.o
# add threaded engine after it is done
OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o operator.o fully_connect_op_cpu.o cpu_storage.o gpu_storage.o
OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o operator.o fully_connect_op_cpu.o cpu_storage.o gpu_storage.o storage.o
CUOBJ =
SLIB = lib/libmxnet.so
ALIB = lib/libmxnet.a
Expand Down
10 changes: 5 additions & 5 deletions include/mxnet/narray.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,14 @@ class NArray {
/*! \brief the real data chunk that backs NArray */
struct Chunk {
/*! \brief storage handlefrom storage engine */
StorageManager::Handle shandle;
Storage::Handle shandle;
/*! \brief variable from DAG engine */
DAGEngine::Variable var;
/*! \brief holds the data content */
TBlob data;
/*!
* \brief if this is true, this means the data do not come
* from StorageManager, and do not need to be freed
* from Storage, and do not need to be freed
*/
bool static_data;
/*! \brief whether allocation is delayed */
Expand Down Expand Up @@ -161,7 +161,7 @@ class NArray {
/*! \brief check if delay alloc is on, do alloc if not yet done */
inline void CheckAndAlloc(void) {
if (delay_alloc) {
shandle = StorageManager::Get()->Alloc(data.shape_.Size() * sizeof(real_t), shandle.ctx);
shandle = Storage::Get()->Alloc(data.shape_.Size() * sizeof(real_t), shandle.ctx);
data = TBlob(static_cast<real_t*>(shandle.dptr), data.shape_, shandle.ctx.dev_mask);
delay_alloc = false;
}
Expand All @@ -172,9 +172,9 @@ class NArray {
DAGEngine::Get()->PushDelete([](RunContext s) {}, shandle.ctx, var);
} else {
CHECK(!delay_alloc) << "deleted before allocation";
StorageManager::Handle h = this->shandle;
Storage::Handle h = this->shandle;
DAGEngine::Get()->PushDelete([h](RunContext s) {
StorageManager::Get()->Free(h);
Storage::Get()->Free(h);
}, shandle.ctx, var);
}
}
Expand Down
56 changes: 38 additions & 18 deletions include/mxnet/storage.h
Original file line number Diff line number Diff line change
@@ -1,49 +1,69 @@
/*!
* Copyright (c) 2015 by Contributors
* \file storage.h
* \brief the memory allocator that manages the memory across multiple devices
* \brief Storage manager across multiple devices.
*/
#ifndef MXNET_STORAGE_H_
#define MXNET_STORAGE_H_

#include <memory>
#include "./base.h"
#include "./tensor_blob.h"

namespace mxnet {

/*! \brief memory allocator of storage */
class StorageManager {
/*!
* \brief Storage manager across multiple devices.
*/
class Storage {
public:
/*!
* \brief storage handle the represents storage information
* \brief Storage handle.
*/
struct Handle {
/*! \brief pointer to the data */
/*!
* \brief Pointer to the data.
*/
void* dptr;
/*! \brief context information about device and deviceID */
/*!
* \brief Size of the storage.
*/
size_t size;
/*!
* \brief Context information about device and ID.
*/
Context ctx;
};
/*!
* \brief allocate a new contiguous memory for a given size
* \param size the total size of memory in bytes
* \param ctx context information about the device and deviceID
* \return Handle struct
* \brief Allocate a new contiguous memory for a given size.
* \param size Total size of memory in bytes.
* \param ctx Context information about the device and ID.
* \return Handle struct.
*/
Handle Alloc(size_t size, Context ctx);
/*!
* \brief free the space represened the handle
* \param handle the handle to memory to be freed
* \brief Free storage.
* \param handle Handle struect.
*/
void Free(Handle handle);
/*! \return storage manager singleton */
static StorageManager* Get();
/*!
* \brief Destructor.
*/
~Storage();
/*!
* \return Storage singleton.
*/
static Storage* Get();

private:
/*!
* \brief disabled constructors
* \brief Hidden constructors.
*/
StorageManager() {}
DISALLOW_COPY_AND_ASSIGN(StorageManager);
}; // class StorageManager
Storage();
struct Impl;
std::unique_ptr<Impl> impl_;
DISALLOW_COPY_AND_ASSIGN(Storage);
}; // class Storage

} // namespace mxnet

Expand Down
2 changes: 1 addition & 1 deletion src/storage/cpu_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class CpuStorage {
* \brief Alignment of allocation.
*/
static constexpr size_t alignment_ = 16;
};
}; // class CpuStorage

} // namespace storage
} // namespace mxnet
Expand Down
2 changes: 1 addition & 1 deletion src/storage/gpu_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class GpuStorage {
* \param ptr Pointer to deallocate.
*/
static void Free(void* ptr);
};
}; // class GpuStorage

} // namespace storage
} // namespace mxnet
Expand Down
6 changes: 3 additions & 3 deletions src/storage/naive_storage_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ class NaiveStorageManager final : public StorageManager {

private:
DISALLOW_COPY_AND_ASSIGN(NaiveStorageManager);
};
}; // class NaiveStorageManager

template <class DeviceStorage>
void* NaiveStorageManager::Alloc(size_t size) {
void* NaiveStorageManager<DeviceStorage>::Alloc(size_t size) {
return DeviceStorage::Alloc(size);
}

template <class DeviceStorage>
void NaiveStorageManager::Free(void* ptr) {
void NaiveStorageManager<DeviceStorage>::Free(void* ptr, size_t) {
DeviceStorage::Free(ptr);
}

Expand Down
13 changes: 7 additions & 6 deletions src/storage/pooled_storage_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ class PooledStorageManager final : public StorageManager {
size_t used_memory_ = 0;
std::unordered_map<size_t, std::vector<void*>> memory_pool_;
DISALLOW_COPY_AND_ASSIGN(PooledStorageManager);
};
}; // class PooledStorageManager

templace <class DeviceStorage, size_t kThreshold>
template <class DeviceStorage, size_t kThreshold>
void* PooledStorageManager<DeviceStorage, kThreshold>::Alloc(size_t size) {
auto&& reuse_it = memory_pool_.find(size);
if (reuse_it == memory_pool_.end() || reuse_it->second.size() == 0) {
if (kThreshold <= used_memory_) {
ReleaseAll();
ReleaseAll();
}
used_memory_ += size;
return DeviceStorage::Alloc(size);
Expand All @@ -55,8 +55,9 @@ void* PooledStorageManager<DeviceStorage, kThreshold>::Alloc(size_t size) {
}
}

templace <class DeviceStorage, size_t kThreshold>
void PooledStorageManager<DeviceStorage, kThreshold>::Free(void* ptr, size_t size) {
template <class DeviceStorage, size_t kThreshold>
void PooledStorageManager<DeviceStorage, kThreshold>::Free(void* ptr,
size_t size) {
auto&& reuse_pool = memory_pool_[size];
reuse_pool.push_back(ptr);
}
Expand All @@ -65,7 +66,7 @@ template <class DeviceStorage, size_t kThreshold>
void PooledStorageManager<DeviceStorage, kThreshold>::ReleaseAll() {
for (auto&& i : memory_pool_) {
for (auto&& j : i.second) {
DeviceStorage::Free(i.second);
DeviceStorage::Free(j);
used_memory_ -= i.first;
}
}
Expand Down
99 changes: 74 additions & 25 deletions src/storage/storage.cc
Original file line number Diff line number Diff line change
@@ -1,44 +1,93 @@
/*!
* Copyright (c) 2015 by Contributors
*/
#include "mxnet/storage.h"
#include <mshadow/tensor.h>
#include <mxnet/storage.h>
#include <dmlc/logging.h>
#include "./storage_manager.h"
#include "./naive_storage_manager.h"
#include "./pooled_storage_manager.h"
#include "./cpu_storage.h"
#include "./gpu_storage.h"
#include "mxnet/cuda_utils.h"

namespace mxnet {

// class NaiveStorageManager : public StorageManager {
// public:
// virtual Handle Alloc(size_t size, Context ctx);
// virtual void Free(Handle handle);
// };
struct Storage::Impl {
static constexpr size_t kPoolThreshold = 4096 * 1024 * 1024ul;

template <class DeviceStorage>
using CurrentStorageManager = storage::PooledStorageManager<DeviceStorage, kPoolThreshold>;

static void ActivateDevice(Context ctx) {
switch (ctx.dev_mask) {
case cpu::kDevMask:
break;
case gpu::kDevMask:
#if MXNET_USE_CUDA
CUDA_CALL(cudaSetDevice(ctx.dev_id));
#else // MXNET_USE_CUDA
LOG(FATAL) << "Please compile with CUDA enabled";
#endif // MXNET_USE_CUDA
break;
default:
LOG(FATAL) << "Unimplemented device";
}
}

std::unordered_map<
int, std::unordered_map<int, std::unique_ptr<storage::StorageManager>>>

This comment has been minimized.

Copy link
@tqchen

tqchen Aug 12, 2015

Member

consider switch two unordered_map to vector. Since the device id is likely to be consecutive and regular, having two hash maps here may not be ideal for quick lookups.

This comment has been minimized.

Copy link
@hotpxl

hotpxl Aug 12, 2015

Author Contributor

What if the user uses GPU 0, 1, 6, 7 with a gap in between?

This comment has been minimized.

Copy link
@tqchen

tqchen Aug 12, 2015

Member

It is OK to have nullptr unique ptr ?

This comment has been minimized.

Copy link
@hotpxl

hotpxl Aug 12, 2015

Author Contributor

OK. That will work. Also on the threshold of the pool. What do you think is the best way to specify it?

This comment has been minimized.

Copy link
@hotpxl

hotpxl Aug 12, 2015

Author Contributor

I'm thinking a percentage of the total memory of a device

This comment has been minimized.

Copy link
@tqchen

tqchen Aug 12, 2015

Member

I think current way is OK, as long as it can be configured via ENV variable

storage_managers;
}; // struct Storage::Impl

StorageManager::Handle StorageManager::Alloc(size_t size, Context ctx) {
Storage::Handle Storage::Alloc(size_t size, Context ctx) {
Handle hd;
hd.ctx = ctx;
if (ctx.dev_mask == cpu::kDevMask) {
hd.dptr = calloc(size, sizeof(real_t));
} else {
#if MXNET_USE_CUDA
cudaMalloc(&hd.dptr, size);
#endif
auto&& device = impl_->storage_managers[ctx.dev_mask];
auto&& device_id_it = device.find(ctx.dev_id);
// Allocate device if necessary.
if (device_id_it == device.end()) {
switch (ctx.dev_mask) {
case cpu::kDevMask:
device_id_it =
device.emplace(std::make_pair(
ctx.dev_id,
std::unique_ptr<storage::StorageManager>{
new Storage::Impl::CurrentStorageManager<
storage::CpuStorage>{}})).first;
break;
case gpu::kDevMask:
device_id_it =
device.emplace(std::make_pair(
ctx.dev_id,
std::unique_ptr<storage::StorageManager>{
new Storage::Impl::CurrentStorageManager<
storage::GpuStorage>{}})).first;
break;
default:
LOG(FATAL) << "Unimplemented device";
}
}
Impl::ActivateDevice(ctx);
hd.dptr = device_id_it->second->Alloc(size);
hd.size = size;
return hd;
}

void StorageManager::Free(StorageManager::Handle handle) {
if (handle.ctx.dev_mask == cpu::kDevMask) {
free(handle.dptr);
handle.dptr = NULL;
// cudaFreeHost(handle.dptr);
} else {
#if MXNET_USE_CUDA
cudaFree(handle.dptr);
#endif
}
void Storage::Free(Storage::Handle handle) {
Impl::ActivateDevice(handle.ctx);
impl_->storage_managers.at(handle.ctx.dev_mask)
.at(handle.ctx.dev_id)
->Free(handle.dptr, handle.size);
}

StorageManager* StorageManager::Get() {
static StorageManager inst;
Storage::~Storage() = default;

Storage* Storage::Get() {
static Storage inst;
return &inst;
}

Storage::Storage() : impl_{new Impl{}} {}

} // namespace mxnet
4 changes: 2 additions & 2 deletions src/storage/storage_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ class StorageManager {
* \param ptr Pointer to deallocate.
* \param size Size of the storage.
*/
virtual void* Free(void* ptr, size_t size) = 0;
virtual void Free(void* ptr, size_t size) = 0;
/*!
* \brief Destructor.
*/
virtual ~StorageManager() = default;
};
}; // namespace StorageManager

} // namespace storage
} // namespace mxnet
Expand Down

0 comments on commit 84bf795

Please sign in to comment.