diff --git a/tech_reports/memory/allocator.md b/tech_reports/memory/allocator.md index 135baf4b3f9..b1651bc0a12 100644 --- a/tech_reports/memory/allocator.md +++ b/tech_reports/memory/allocator.md @@ -75,6 +75,9 @@ These reports can be enabled from C++ and Python. // API to dump state of memory for a given device. Optional prefix will be prepended to the report. DumpDeviceMemoryState(const Device *device, std::string prefix=""); +// API to get dram memory view for a given device +GetMemoryView(const IDevice* device, const BufferType& buffer_type); + // APIs to enable/disable memory reports for each Program's compile EnableMemoryReports(); DisableMemoryReports(); diff --git a/tt_metal/detail/reports/memory_reporter.cpp b/tt_metal/detail/reports/memory_reporter.cpp index ce948932235..34068a4cc70 100644 --- a/tt_metal/detail/reports/memory_reporter.cpp +++ b/tt_metal/detail/reports/memory_reporter.cpp @@ -148,6 +148,23 @@ void DumpDeviceMemoryState(const IDevice* device, const std::string& prefix) { MemoryReporter::inst().dump_memory_usage_state(device, std::move(prefix)); } +MemoryView MemoryReporter::get_memory_view(const IDevice* device, const BufferType& buffer_type) const { + auto stats = device->get_memory_allocation_statistics(buffer_type); + auto num_banks_ = device->num_banks(buffer_type); + + return MemoryView{ + .num_banks = num_banks_, + .total_bytes_per_bank = stats.total_allocatable_size_bytes, + .total_bytes_allocated_per_bank = stats.total_allocated_bytes, + .total_bytes_free_per_bank = stats.total_free_bytes, + .largest_contiguous_bytes_free_per_bank = stats.largest_free_block_bytes, + .block_table = device->get_memory_block_table(buffer_type)}; +} + +MemoryView GetMemoryView(const IDevice* device, const BufferType& buffer_type) { + return MemoryReporter::inst().get_memory_view(device, buffer_type); +} + bool MemoryReporter::enabled() { return is_enabled_; } void MemoryReporter::toggle(bool state) { is_enabled_ = state; } diff --git a/tt_metal/detail/reports/memory_reporter.hpp b/tt_metal/detail/reports/memory_reporter.hpp index ebcdd97fdfe..d8321da2f26 100644 --- a/tt_metal/detail/reports/memory_reporter.hpp +++ b/tt_metal/detail/reports/memory_reporter.hpp @@ -8,6 +8,11 @@ #include #include #include +#include +#include + +#include "tt_metal/impl/allocator/allocator.hpp" + namespace tt::tt_metal { inline namespace v0 { @@ -16,6 +21,7 @@ class IDevice; } // namespace v0 namespace detail { +struct MemoryView; /** * Enable generation of reports for memory allocation statistics. @@ -60,6 +66,32 @@ void DisableMemoryReports(); * */ void DumpDeviceMemoryState(const IDevice* device, const std::string& prefix = ""); +/** + * Populates MemoryView for BufferType [dram, l1, l1 small, trace]. Used when storing to disk is not an option. + * + * num_banks: total number of BufferType banks for given device + * total_bytes_per_bank: total allocatable size per bank of BufferType in bytes + * total_bytes_allocated_per_bank: currently allocated size per bank of BufferType in bytes + * total_bytes_free_per_bank: total free size per bank of BufferType in bytes + * largest_contiguous_bytes_free_per_bank: largest contiguous free block of BufferType in bytes + * block_table: list of all blocks in BufferType (blockID, address, size, prevID, nextID, allocated) + * + * | Argument | Description | Type | Valid Range | Required | + * |---------------|---------------------------------------------------|-----------------|--------------------------------------------------------|----------| + * | device | The device for which memory stats will be dumped. | const IDevice * | | True | + * | buffer_type | The type of buffer to populate the memory view. | const BufferType& | | True | + * */ +MemoryView GetMemoryView(const IDevice* device, const BufferType& buffer_type); + +struct MemoryView { + std::uint64_t num_banks = 0; + size_t total_bytes_per_bank = 0; + size_t total_bytes_allocated_per_bank = 0; + size_t total_bytes_free_per_bank = 0; + size_t largest_contiguous_bytes_free_per_bank = 0; + MemoryBlockTable block_table; +}; + class MemoryReporter { public: MemoryReporter& operator=(const MemoryReporter&) = delete; @@ -71,6 +103,8 @@ class MemoryReporter { void dump_memory_usage_state(const IDevice* device, const std::string& prefix = "") const; + MemoryView get_memory_view(const IDevice* device, const BufferType& buffer_type) const; + static void toggle(bool state); static MemoryReporter& inst(); static bool enabled(); diff --git a/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp b/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp index 6eec0150db8..0d64cd0d9cb 100644 --- a/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp +++ b/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp @@ -65,6 +65,8 @@ class Algorithm { virtual void dump_blocks(std::ostream& out) const = 0; + virtual MemoryBlockTable get_memory_block_table() const = 0; + virtual void shrink_size(DeviceAddr shrink_size, bool bottom_up = true) = 0; virtual void reset_size() = 0; diff --git a/tt_metal/impl/allocator/algorithms/free_list.cpp b/tt_metal/impl/allocator/algorithms/free_list.cpp index adc75c102e0..b4265844682 100644 --- a/tt_metal/impl/allocator/algorithms/free_list.cpp +++ b/tt_metal/impl/allocator/algorithms/free_list.cpp @@ -446,6 +446,24 @@ void FreeList::dump_blocks(std::ostream& out) const { out << "\n"; } +MemoryBlockTable FreeList::get_memory_block_table() const { + MemoryBlockTable blocks; + boost::local_shared_ptr curr_block = this->block_head_; + + while (curr_block != nullptr) { + std::unordered_map block_entry; + + block_entry["address"] = std::to_string(curr_block->address + this->offset_bytes_); // bytes + block_entry["size"] = std::to_string(curr_block->size); // bytes + block_entry["allocated"] = this->is_allocated(curr_block) ? "yes" : "no"; + + blocks.push_back(block_entry); + curr_block = curr_block->next_block; + } + + return blocks; +} + void FreeList::shrink_size(DeviceAddr shrink_size, bool bottom_up) { if (shrink_size == 0) { return; diff --git a/tt_metal/impl/allocator/algorithms/free_list.hpp b/tt_metal/impl/allocator/algorithms/free_list.hpp index 467fc8c3ece..5a3af36782b 100644 --- a/tt_metal/impl/allocator/algorithms/free_list.hpp +++ b/tt_metal/impl/allocator/algorithms/free_list.hpp @@ -38,6 +38,8 @@ class FreeList : public Algorithm { void dump_blocks(std::ostream& out) const; + MemoryBlockTable get_memory_block_table() const; + void shrink_size(DeviceAddr shrink_size, bool bottom_up = true); void reset_size(); diff --git a/tt_metal/impl/allocator/algorithms/free_list_opt.cpp b/tt_metal/impl/allocator/algorithms/free_list_opt.cpp index bd24c0c871b..19ca7d76b93 100644 --- a/tt_metal/impl/allocator/algorithms/free_list_opt.cpp +++ b/tt_metal/impl/allocator/algorithms/free_list_opt.cpp @@ -466,6 +466,28 @@ void FreeListOpt::dump_blocks(std::ostream& out) const { } } +MemoryBlockTable FreeListOpt::get_memory_block_table() const { + MemoryBlockTable blocks; + + for (size_t i = 0; i < block_address_.size(); i++) { + std::unordered_map block_entry; + + if (!meta_block_is_allocated_[i]) { + continue; + } + + block_entry["blockID"] = std::to_string(i); + block_entry["address"] = std::to_string(block_address_[i]); // bytes + block_entry["size"] = std::to_string(block_size_[i]); // bytes + block_entry["prevID"] = std::to_string(block_prev_block_[i]); + block_entry["nextID"] = std::to_string(block_next_block_[i]); + block_entry["allocated"] = block_is_allocated_[i] ? "yes" : "no"; + blocks.push_back(block_entry); + } + + return blocks; +} + void FreeListOpt::shrink_size(DeviceAddr shrink_size, bool bottom_up) { if (shrink_size == 0) { return; diff --git a/tt_metal/impl/allocator/algorithms/free_list_opt.hpp b/tt_metal/impl/allocator/algorithms/free_list_opt.hpp index 1a3a7f87405..341c96561f3 100644 --- a/tt_metal/impl/allocator/algorithms/free_list_opt.hpp +++ b/tt_metal/impl/allocator/algorithms/free_list_opt.hpp @@ -49,6 +49,8 @@ class FreeListOpt : public Algorithm { void dump_blocks(std::ostream& out) const override; + MemoryBlockTable get_memory_block_table() const override; + void shrink_size(DeviceAddr shrink_size, bool bottom_up = true) override; void reset_size() override; diff --git a/tt_metal/impl/allocator/allocator.cpp b/tt_metal/impl/allocator/allocator.cpp index 059c209d7a0..c85294fce25 100644 --- a/tt_metal/impl/allocator/allocator.cpp +++ b/tt_metal/impl/allocator/allocator.cpp @@ -201,6 +201,15 @@ void BankManager::dump_blocks(std::ofstream& out) const { } } +MemoryBlockTable BankManager::get_memory_block_table() const { + if (this->allocator_) { + return this->allocator_->get_memory_block_table(); + } + + log_warning("allocator is not initialized, cannot get block table for memory"); + return {}; +} + void BankManager::shrink_size(DeviceAddr shrink_size, bool bottom_up) { if (this->allocator_) { this->allocator_->shrink_size(shrink_size, bottom_up); @@ -375,6 +384,18 @@ void dump_memory_blocks(const Allocator& allocator, const BufferType& buffer_typ } } +MemoryBlockTable get_memory_block_table(const Allocator& allocator, const BufferType& buffer_type) { + switch (buffer_type) { + case BufferType::DRAM: return allocator.dram_manager.get_memory_block_table(); + case BufferType::L1: return allocator.l1_manager.get_memory_block_table(); + case BufferType::L1_SMALL: return allocator.l1_small_manager.get_memory_block_table(); + case BufferType::TRACE: return allocator.trace_buffer_manager.get_memory_block_table(); + default: { + TT_THROW("Unsupported buffer type!"); + } + } +} + std::optional lowest_occupied_l1_address(const Allocator& allocator, uint32_t bank_id) { // l1_manager always sits below l1_small_manager in the address space, so there is no need to check l1_small_manager return allocator.l1_manager.lowest_occupied_address(bank_id); diff --git a/tt_metal/impl/allocator/allocator.hpp b/tt_metal/impl/allocator/allocator.hpp index a3eb7f05ed0..647e4f3324f 100644 --- a/tt_metal/impl/allocator/allocator.hpp +++ b/tt_metal/impl/allocator/allocator.hpp @@ -76,6 +76,8 @@ class BankManager { void dump_blocks(std::ofstream& out) const; + MemoryBlockTable get_memory_block_table() const; + void shrink_size(DeviceAddr shrink_size, bool bottom_up = true); void reset_size(); @@ -122,6 +124,8 @@ Statistics get_statistics(const Allocator& allocator, const BufferType& buffer_t void dump_memory_blocks(const Allocator& allocator, const BufferType& buffer_type, std::ofstream& out); +MemoryBlockTable get_memory_block_table(const Allocator& allocator, const BufferType& buffer_type); + std::optional lowest_occupied_l1_address(const Allocator& allocator, uint32_t bank_id); DeviceAddr base_alloc( diff --git a/tt_metal/impl/allocator/allocator_types.hpp b/tt_metal/impl/allocator/allocator_types.hpp index b4ad6bf960c..7f30e4d1cf1 100644 --- a/tt_metal/impl/allocator/allocator_types.hpp +++ b/tt_metal/impl/allocator/allocator_types.hpp @@ -13,6 +13,13 @@ namespace tt::tt_metal { // Fwd declares +/* +MemoryBlockTable is a list of memory blocks in the following format: +[{"blockID": "0", "address": "0", "size": "0", "prevID": "0", "nextID": "0", "allocated": true}] +address: bytes +size: bytes +*/ +using MemoryBlockTable = std::vector>; struct Allocator; namespace allocator { class BankManager; diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index aed0658c120..391b6df1b14 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -1431,6 +1431,11 @@ void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &ou return allocator::dump_memory_blocks(*allocator, buffer_type, out); } +MemoryBlockTable Device::get_memory_block_table(const BufferType& buffer_type) const { + const auto& allocator = this->get_initialized_allocator(); + return allocator::get_memory_block_table(*allocator, buffer_type); +} + const std::unordered_set &Device::get_allocated_buffers() const { const auto& allocator = this->get_initialized_allocator(); return allocator::get_allocated_buffers(*allocator); diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp index 88bfdeb4b91..8ecae5c032e 100644 --- a/tt_metal/impl/device/device.hpp +++ b/tt_metal/impl/device/device.hpp @@ -147,6 +147,8 @@ class Device : public IDevice { void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out) const override; void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, SubDeviceId sub_device_id) const override; + MemoryBlockTable get_memory_block_table(const BufferType &buffer_type) const override; + // Set of logical ethernet core coordinates // core.x represents connectivity to one other chip, i.e. cores with all connect to same chip // core.y represents different channels along one diff --git a/tt_metal/include/tt_metal/device.hpp b/tt_metal/include/tt_metal/device.hpp index 3cb6a375706..64c71b5607a 100644 --- a/tt_metal/include/tt_metal/device.hpp +++ b/tt_metal/include/tt_metal/device.hpp @@ -26,6 +26,13 @@ namespace tt { namespace tt_metal { +/* +MemoryBlockTable is a list of memory blocks in the following format: +[{"blockID": "0", "address": "0", "size": "0", "prevID": "0", "nextID": "0", "allocated": true}] +address: bytes +size: bytes +*/ +using MemoryBlockTable = std::vector>; enum class BufferType; inline namespace v0 { @@ -162,6 +169,8 @@ class IDevice { virtual void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out) const = 0; virtual void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, SubDeviceId sub_device_id) const = 0; + virtual MemoryBlockTable get_memory_block_table(const BufferType& buffer_type) const = 0; + // Set of logical ethernet core coordinates // core.x represents connectivity to one other chip, i.e. cores with all connect to same chip // core.y represents different channels along one diff --git a/ttnn/cpp/pybind11/device.cpp b/ttnn/cpp/pybind11/device.cpp index 0c8f118dd04..ffbff2172a6 100644 --- a/ttnn/cpp/pybind11/device.cpp +++ b/ttnn/cpp/pybind11/device.cpp @@ -108,6 +108,18 @@ void py_device_module_types(py::module& m_device) { py::class_(m_device, "SubDeviceId", "ID of a sub-device."); py::class_(m_device, "SubDeviceManagerId", "ID of a sub-device manager."); + + py::class_( + m_device, "MemoryView", "Class representing view of the memory (dram, l1, l1_small, trace) of a device.") + .def_readonly("num_banks", &tt::tt_metal::detail::MemoryView::num_banks) + .def_readonly("total_bytes_per_bank", &tt::tt_metal::detail::MemoryView::total_bytes_per_bank) + .def_readonly( + "total_bytes_allocated_per_bank", &tt::tt_metal::detail::MemoryView::total_bytes_allocated_per_bank) + .def_readonly("total_bytes_free_per_bank", &tt::tt_metal::detail::MemoryView::total_bytes_free_per_bank) + .def_readonly( + "largest_contiguous_bytes_free_per_bank", + &tt::tt_metal::detail::MemoryView::largest_contiguous_bytes_free_per_bank) + .def_readonly("block_table", &tt::tt_metal::detail::MemoryView::block_table); } void device_module(py::module& m_device) { @@ -537,6 +549,22 @@ void device_module(py::module& m_device) { +------------------+----------------------------------+-----------------------+-------------+----------+ )doc"); + m_device.def( + "GetMemoryView", + &tt::tt_metal::detail::GetMemoryView, + py::arg().noconvert(), + py::arg().noconvert(), + R"doc( + Populates MemoryView for BufferType [dram, l1, l1 small, trace]. Used when storing to disk is not an option. + + +------------------+----------------------------------+-----------------------+-------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +==================+==================================+=======================+=============+==========+ + | device | Device to dump memory state for | ttnn.Device | | Yes | + | buffer_type | Type of buffer for memory view | ttnn.BufferType | | Yes | + +------------------+----------------------------------+-----------------------+-------------+----------+ + )doc"); + m_device.def( "synchronize_device", [](IDevice* device, const std::optional cq_id, const std::vector& sub_device_ids) { diff --git a/ttnn/cpp/pybind11/tensor.cpp b/ttnn/cpp/pybind11/tensor.cpp index 70c1efd8008..dd60bb08cc7 100644 --- a/ttnn/cpp/pybind11/tensor.cpp +++ b/ttnn/cpp/pybind11/tensor.cpp @@ -80,7 +80,8 @@ void tensor_mem_config_module_types(py::module& m_tensor) { py::enum_(m_tensor, "BufferType") .value("DRAM", BufferType::DRAM) .value("L1", BufferType::L1) - .value("L1_SMALL", BufferType::L1_SMALL); + .value("L1_SMALL", BufferType::L1_SMALL) + .value("TRACE", BufferType::TRACE); tt_serializable_class(m_tensor, "CoreCoord", R"doc( Class defining core coordinate diff --git a/ttnn/ttnn/__init__.py b/ttnn/ttnn/__init__.py index 5c981565257..91e55d35c29 100644 --- a/ttnn/ttnn/__init__.py +++ b/ttnn/ttnn/__init__.py @@ -174,6 +174,7 @@ def manage_config(name, value): manage_device, synchronize_device, dump_device_memory_state, + get_memory_view, GetPCIeDeviceID, GetNumPCIeDevices, GetNumAvailableDevices, diff --git a/ttnn/ttnn/device.py b/ttnn/ttnn/device.py index 6cbfaa85ead..09aa5a22929 100644 --- a/ttnn/ttnn/device.py +++ b/ttnn/ttnn/device.py @@ -134,6 +134,10 @@ def dump_device_memory_state(device, prefix=""): ttnn._ttnn.device.DumpDeviceMemoryState(device, prefix) +def get_memory_view(device, buffer_type): + return ttnn._ttnn.device.GetMemoryView(device, buffer_type) + + def is_wormhole_b0(device=None): if device is not None: return device.arch() == ttnn._ttnn.device.Arch.WORMHOLE_B0