diff --git a/include/tvm/runtime/vm/memory_manager.h b/include/tvm/runtime/vm/memory_manager.h index fb2354bca4ec..feafc01f63d9 100644 --- a/include/tvm/runtime/vm/memory_manager.h +++ b/include/tvm/runtime/vm/memory_manager.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,8 @@ struct Buffer { void* data{nullptr}; /*! \brief The size of the block. */ size_t size{0}; + /*! \brief The shape of the tensor. */ + std::vector shape; /*! \brief The context of the allocated buffers. */ Device device; }; @@ -72,6 +75,15 @@ class Allocator { * \return A sized allocation in the form of a buffer. */ virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; + /*! \brief Allocate a buffer given a shape and type. + * \param ndims The rank of the tensor. + * \param shape The shape of the tensor. + * \param type_hint A type hint to the allocator. + * \param mem_scope A memory scope of the buffer. + * \return A sized allocation in the form of a buffer. + */ + virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope = "") = 0; /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. */ @@ -81,6 +93,10 @@ class Allocator { */ virtual size_t UsedMemory() const = 0; + protected: + virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope); + private: AllocatorType type_; }; @@ -105,7 +121,7 @@ class MemoryManager { private: MemoryManager() {} - private: + protected: std::mutex mu_; std::unordered_map> allocators_; }; diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/vm/memory_manager.cc index 22afcce6a01e..2855722a4cf4 100644 --- a/src/runtime/vm/memory_manager.cc +++ b/src/runtime/vm/memory_manager.cc @@ -170,6 +170,24 @@ NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice return NDArray(GetObjectPtr(container)); } +Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) { + if (mem_scope.empty() || mem_scope == "global") { + // by default, we can always redirect to the flat memory allocations + std::vector s; + for (int i = 0; i < ndims; ++i) { + s.push_back(shape[i]); + } + NDArray::Container container(nullptr, s, type_hint, dev); + size_t size = GetDataSize(container.dl_tensor); + size_t alignment = GetDataAlignment(container.dl_tensor); + return Alloc(size, alignment, type_hint); + } + LOG(FATAL) << "Allocator cannot allocate data space with " + << "specified memory scope: " << mem_scope; + return {}; +} + } // namespace vm } // namespace runtime } // namespace tvm diff --git a/src/runtime/vm/naive_allocator.h b/src/runtime/vm/naive_allocator.h index 9fce66f60669..799f16ad60bc 100644 --- a/src/runtime/vm/naive_allocator.h +++ b/src/runtime/vm/naive_allocator.h @@ -27,6 +27,7 @@ #include #include +#include namespace tvm { namespace runtime { @@ -46,6 +47,31 @@ class NaiveAllocator final : public Allocator { return buf; } + Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { + Buffer buf; + size_t nbytes = 1; + for (int i = 0; i < ndims; ++i) { + buf.shape.push_back(shape[i]); + nbytes *= static_cast(shape[i]); + } + nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8; + buf.device = device_; + if (mem_scope.empty() || mem_scope == "global") { + auto tmp_buf = Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + buf.size = tmp_buf.size; + buf.data = tmp_buf.data; + return buf; + } + + buf.size = nbytes; + buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, ndims, shape, type_hint, + String(mem_scope)); + used_memory_.fetch_add(nbytes, std::memory_order_relaxed); + DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; + return buf; + } + void Free(const Buffer& buffer) override { DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, buffer.data); used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed); diff --git a/src/runtime/vm/pooled_allocator.h b/src/runtime/vm/pooled_allocator.h index 9c11c783011e..ea6059e0c64c 100644 --- a/src/runtime/vm/pooled_allocator.h +++ b/src/runtime/vm/pooled_allocator.h @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -71,6 +72,15 @@ class PooledAllocator final : public Allocator { return buf; } + Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { + if (mem_scope.empty() || mem_scope == "global") { + return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + } + LOG(FATAL) << "This alloc should be implemented"; + return {}; + } + void Free(const Buffer& buffer) override { std::lock_guard lock(mu_); if (memory_pool_.find(buffer.size) == memory_pool_.end()) { diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc b/tests/cpp/runtime/vm/memory_manager_tests.cc new file mode 100644 index 000000000000..ac1ff201cf34 --- /dev/null +++ b/tests/cpp/runtime/vm/memory_manager_tests.cc @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include + +#include "../../../../src/runtime/vm/pooled_allocator.h" + +namespace tvm { +namespace runtime { +namespace vm { + +// MemoryManangerWrapper is necessary because in class MemoryManager we don't have access to its +// protected members. In this class we add a new method which allow us to clear internal state of +// the global memory manager. +class MemoryManagerWrapper : public MemoryManager { + public: + static MemoryManagerWrapper* Global() { + return reinterpret_cast(MemoryManager::Global()); + } + void clear() { allocators_.clear(); } +}; + +class TvmVMMemoryManagerTest : public ::testing::Test { + protected: + void SetUp() override { + // Clear allocators from previous tests + MemoryManagerWrapper::Global()->clear(); + } +}; + +TEST_F(TvmVMMemoryManagerTest, NaiveAllocBasic) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto buff = allocator->Alloc(64, 32, DataType::Float(32)); + EXPECT_EQ(allocator->UsedMemory(), 64); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), 0); +} + +TEST_F(TvmVMMemoryManagerTest, PooledAllocBasic) { + Device dev = {kDLCPU, 0}; + size_t nbytes = 64; + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto buff = allocator->Alloc(nbytes, 32, DataType::Float(32)); + EXPECT_EQ(allocator->UsedMemory(), size); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), size); +} + +TEST_F(TvmVMMemoryManagerTest, NaiveEmptyBasic) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + std::vector shape = {1, 3, 6, 6}; + { + auto ndarray = allocator->Empty(shape, dt, dev); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + } + EXPECT_EQ(allocator->UsedMemory(), 0); +} + +TEST_F(TvmVMMemoryManagerTest, PooledEmptyBasic) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + std::vector shape = {1, 3, 6, 6}; + { + auto ndarray = allocator->Empty(shape, dt, dev); + EXPECT_EQ(allocator->UsedMemory(), size); + } + EXPECT_EQ(allocator->UsedMemory(), size); +} + +TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), 0); + + try { + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + FAIL(); + } catch (std::exception& e) { + std::string pattern = + "Device does not support allocate data space with specified memory scope: global.texture"; + std::string what = e.what(); + EXPECT_NE(what.find(pattern), std::string::npos) << what; + } +} + +TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), size); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), size); + + try { + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + FAIL(); + } catch (std::exception& e) { + std::string pattern = "This alloc should be implemented"; + std::string what = e.what(); + EXPECT_NE(what.find(pattern), std::string::npos) << what; + } +} + +TEST_F(TvmVMMemoryManagerTest, NaiveAllocOpenCLTexture) { + bool enabled = tvm::runtime::RuntimeEnabled("opencl"); + if (!enabled) { + LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is disabled.\n"; + return; + } + Device dev = {kDLOpenCL, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), 0); + + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + allocator->Free(texture); + EXPECT_EQ(allocator->UsedMemory(), 0); +} + +TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { + bool enabled = tvm::runtime::RuntimeEnabled("opencl"); + if (!enabled) { + LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is disabled.\n"; + return; + } + Device dev = {kDLOpenCL, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), size); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), size); + + try { + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + FAIL(); + } catch (std::exception& e) { + std::string pattern = "This alloc should be implemented"; + std::string what = e.what(); + EXPECT_NE(what.find(pattern), std::string::npos) << what; + } +} +} // namespace vm +} // namespace runtime +} // namespace tvm