From d8bbeb0320d36c6103841928140088d1bdc8de7a Mon Sep 17 00:00:00 2001 From: Tapasvi Patel Date: Tue, 14 Jan 2025 23:14:09 +0000 Subject: [PATCH] #1683: Updated memory collection and report of dram and l1 using new metal APIs --- docs/src/ttrt.md | 7 +- runtime/include/tt/runtime/detail/ttmetal.h | 3 + runtime/include/tt/runtime/detail/ttnn.h | 3 + runtime/include/tt/runtime/runtime.h | 13 + runtime/include/tt/runtime/types.h | 23 ++ runtime/lib/runtime.cpp | 17 ++ runtime/lib/ttmetal/runtime.cpp | 43 +++ runtime/lib/ttnn/runtime.cpp | 42 +++ runtime/tools/python/ttrt/common/callback.py | 245 +++++++----------- runtime/tools/python/ttrt/common/run.py | 64 +---- runtime/tools/python/ttrt/runtime/__init__.py | 1 + runtime/tools/python/ttrt/runtime/module.cpp | 20 +- 12 files changed, 262 insertions(+), 219 deletions(-) diff --git a/docs/src/ttrt.md b/docs/src/ttrt.md index 0bd59cb02..6dc589287 100644 --- a/docs/src/ttrt.md +++ b/docs/src/ttrt.md @@ -174,7 +174,11 @@ ttrt run out.ttnn --save-artifacts --artifact-dir /path/to/some/dir ttrt run out.ttnn --load-kernels-from-disk ttrt run out.ttnn --enable-async-ttnn ttrt run out.ttnn --result-file result.json -ttrt run out.ttnn --golden +ttrt run out.ttnn --disable-golden +ttrt run out.ttnn --save-golden-tensors +ttrt run out.ttnn --debugger +ttrt run out.ttnn --memory --save-artifacts +ttrt run out.ttnn --memory --check-memory-leak ``` ### query @@ -219,6 +223,7 @@ ttrt perf /dir/of/flatbuffers --loops 10 --host-only ttrt perf /dir/of/flatbuffers --log-file ttrt.log --host-only ttrt perf --save-artifacts --artifact-dir /path/to/some/dir ttrt perf out.ttnn --result-file result.json +ttrt run out.ttnn --memory ``` To use the Tracy GUI, run the following instructions on your macbook. You can upload your .tracy file into the GUI to view the profiled dumps. diff --git a/runtime/include/tt/runtime/detail/ttmetal.h b/runtime/include/tt/runtime/detail/ttmetal.h index aa7808bac..87e3c3604 100644 --- a/runtime/include/tt/runtime/detail/ttmetal.h +++ b/runtime/include/tt/runtime/detail/ttmetal.h @@ -44,6 +44,9 @@ void deallocateBuffers(Device device); void dumpMemoryReport(Device device); +std::unordered_map +getMemoryView(Device device, int deviceID = 0); + void wait(Event event); void wait(Tensor tensor); diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h index 081ef02fe..e9ed590b9 100644 --- a/runtime/include/tt/runtime/detail/ttnn.h +++ b/runtime/include/tt/runtime/detail/ttnn.h @@ -95,6 +95,9 @@ void deallocateBuffers(Device device); void dumpMemoryReport(Device device); +std::unordered_map +getMemoryView(Device device, int deviceID = 0); + void wait(Event event); void wait(Tensor tensor); diff --git a/runtime/include/tt/runtime/runtime.h b/runtime/include/tt/runtime/runtime.h index 7725e4b56..1a668918c 100644 --- a/runtime/include/tt/runtime/runtime.h +++ b/runtime/include/tt/runtime/runtime.h @@ -20,6 +20,19 @@ std::pair getCurrentSystemDesc(); namespace detail { void deallocateBuffers(Device device); void dumpMemoryReport(Device device); + +/* +This function get the memory view per device + { + "DRAM": MemoryView, + "L1": MemoryView, + "L1Small": MemoryView, + "Trace": MemoryView + } +*/ +std::unordered_map +getMemoryView(Device device, int deviceID = 0); + } // namespace detail DeviceRuntime getCurrentRuntime(); diff --git a/runtime/include/tt/runtime/types.h b/runtime/include/tt/runtime/types.h index cc2791e23..154501512 100644 --- a/runtime/include/tt/runtime/types.h +++ b/runtime/include/tt/runtime/types.h @@ -18,6 +18,20 @@ #pragma clang diagnostic pop namespace tt::runtime { +/* +MemoryBlockTable is a list of memory blocks in the following format: +[{"blockID": "0", "address": "0", "size": "0", "prevID": "0", "nextID": "0", +"allocated": true}] address: bytes size: bytes +*/ +using MemoryBlockTable = + std::vector>; + +enum class MemoryBufferType { + DRAM, + L1, + L1_SMALL, + TRACE, +}; enum class DeviceRuntime { Disabled, @@ -146,6 +160,15 @@ struct OpContext : public detail::RuntimeCheckedObjectImpl { using detail::RuntimeCheckedObjectImpl::RuntimeCheckedObjectImpl; }; +struct MemoryView { + std::uint64_t numBanks = 0; + size_t totalBytesPerBank = 0; + size_t totalBytesAllocatedPerBank = 0; + size_t totalBytesFreePerBank = 0; + size_t largestContiguousBytesFreePerBank = 0; + MemoryBlockTable blockTable; +}; + } // namespace tt::runtime #endif diff --git a/runtime/lib/runtime.cpp b/runtime/lib/runtime.cpp index b0ac1ee43..f4fa49f2a 100644 --- a/runtime/lib/runtime.cpp +++ b/runtime/lib/runtime.cpp @@ -60,6 +60,23 @@ void dumpMemoryReport(Device device) { LOG_FATAL("runtime is not enabled"); } + +std::unordered_map +getMemoryView(Device device, int deviceID) { +#if defined(TT_RUNTIME_ENABLE_TTNN) + if (getCurrentRuntime() == DeviceRuntime::TTNN) { + return ::tt::runtime::ttnn::getMemoryView(device, deviceID); + } +#endif + +#if defined(TT_RUNTIME_ENABLE_TTMETAL) + if (getCurrentRuntime() == DeviceRuntime::TTMetal) { + return ::tt::runtime::ttmetal::getMemoryView(device, deviceID); + } +#endif + + LOG_FATAL("runtime is not enabled"); +} } // namespace detail DeviceRuntime getCurrentRuntime() { diff --git a/runtime/lib/ttmetal/runtime.cpp b/runtime/lib/ttmetal/runtime.cpp index f2702b52a..ab20c3cc3 100644 --- a/runtime/lib/ttmetal/runtime.cpp +++ b/runtime/lib/ttmetal/runtime.cpp @@ -33,6 +33,19 @@ static Tensor createNullTensor() { return Tensor(nullptr, nullptr, DeviceRuntime::TTMetal); } +static tt::runtime::MemoryView +createMemoryView(tt::tt_metal::detail::MemoryView const &memoryView) { + return tt::runtime::MemoryView{ + .numBanks = memoryView.num_banks, + .totalBytesPerBank = memoryView.total_bytes_per_bank, + .totalBytesAllocatedPerBank = memoryView.total_bytes_allocated_per_bank, + .totalBytesFreePerBank = memoryView.total_bytes_free_per_bank, + .largestContiguousBytesFreePerBank = + memoryView.largest_contiguous_bytes_free_per_bank, + .blockTable = memoryView.block_table, + }; +} + Tensor createTensor(std::shared_ptr data, std::vector const &shape, std::vector const &stride, @@ -116,6 +129,36 @@ void dumpMemoryReport(Device deviceHandle) { } } +std::unordered_map +getMemoryView(Device deviceHandle, int deviceID) { + std::unordered_map + memoryMap; + ::tt::tt_metal::distributed::MeshDevice &meshDevice = + deviceHandle.as<::tt::tt_metal::distributed::MeshDevice>( + DeviceRuntime::TTMetal); + + auto device = meshDevice.get_device(deviceID); + + auto dramMemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::DRAM); + auto l1MemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::L1); + auto l1SmallMemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::L1_SMALL); + auto traceMemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::TRACE); + + memoryMap[tt::runtime::MemoryBufferType::DRAM] = + createMemoryView(dramMemoryView); + memoryMap[tt::runtime::MemoryBufferType::L1] = createMemoryView(l1MemoryView); + memoryMap[tt::runtime::MemoryBufferType::L1_SMALL] = + createMemoryView(l1SmallMemoryView); + memoryMap[tt::runtime::MemoryBufferType::TRACE] = + createMemoryView(traceMemoryView); + + return memoryMap; +} + void wait(Event event) { Events events = event.as(DeviceRuntime::TTMetal); for (auto e : events) { diff --git a/runtime/lib/ttnn/runtime.cpp b/runtime/lib/ttnn/runtime.cpp index 72378f881..593433581 100644 --- a/runtime/lib/ttnn/runtime.cpp +++ b/runtime/lib/ttnn/runtime.cpp @@ -94,6 +94,19 @@ static DeviceVariant getTargetDevice(::ttnn::MeshDevice &meshDevice) { return std::ref(meshDevice); } +static tt::runtime::MemoryView +createMemoryView(tt::tt_metal::detail::MemoryView const &memoryView) { + return tt::runtime::MemoryView{ + .numBanks = memoryView.num_banks, + .totalBytesPerBank = memoryView.total_bytes_per_bank, + .totalBytesAllocatedPerBank = memoryView.total_bytes_allocated_per_bank, + .totalBytesFreePerBank = memoryView.total_bytes_free_per_bank, + .largestContiguousBytesFreePerBank = + memoryView.largest_contiguous_bytes_free_per_bank, + .blockTable = memoryView.block_table, + }; +} + static ::tt::target::ttnn::TTNNBinary const *getBinary(Flatbuffer binary) { bool isTTNN = ::tt::target::ttnn::SizePrefixedTTNNBinaryBufferHasIdentifier( binary.handle.get()); @@ -225,6 +238,35 @@ void dumpMemoryReport(Device deviceHandle) { } } +std::unordered_map +getMemoryView(Device deviceHandle, int deviceID) { + std::unordered_map + memoryMap; + ::ttnn::MeshDevice &meshDevice = + deviceHandle.as<::ttnn::MeshDevice>(DeviceRuntime::TTNN); + + auto device = meshDevice.get_device(deviceID); + + auto dramMemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::DRAM); + auto l1MemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::L1); + auto l1SmallMemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::L1_SMALL); + auto traceMemoryView = ::tt::tt_metal::detail::GetMemoryView( + device, tt::tt_metal::BufferType::TRACE); + + memoryMap[tt::runtime::MemoryBufferType::DRAM] = + createMemoryView(dramMemoryView); + memoryMap[tt::runtime::MemoryBufferType::L1] = createMemoryView(l1MemoryView); + memoryMap[tt::runtime::MemoryBufferType::L1_SMALL] = + createMemoryView(l1SmallMemoryView); + memoryMap[tt::runtime::MemoryBufferType::TRACE] = + createMemoryView(traceMemoryView); + + return memoryMap; +} + void wait(Event event) { // Nothing to do for ttnn runtime LOG_ASSERT(event.matchesRuntime(DeviceRuntime::TTNN)); diff --git a/runtime/tools/python/ttrt/common/callback.py b/runtime/tools/python/ttrt/common/callback.py index 56223224f..93a9af267 100644 --- a/runtime/tools/python/ttrt/common/callback.py +++ b/runtime/tools/python/ttrt/common/callback.py @@ -62,6 +62,49 @@ def save_memory_report(self, memory_report_path): self.logging.debug(f"Saved memory report to={memory_report_path}") + def check_pcc(self): + for loc, golden_data in self.golden_report.items(): + if golden_data["actual_pcc"] < golden_data["expected_pcc"]: + raise Exception( + f"Failed: golden comparison failed, actual_pcc={golden_data['actual_pcc']} < expected_pcc={golden_data['expected_pcc']}" + ) + + def check_memory_leak(self): + num_items = 0 + for key, value in self.memory_report.items(): + num_items += 1 + + if num_items == 0: + self.logging.warning(f"No memory data found") + else: + # query initial memory usage + dram_initial_size_per_device = self.memory_report[0]["dram"] + l1_initial_size_per_device = self.memory_report[0]["l1"] + + # query final memory usage and ensure no memory leaks + dram_final_size_per_device = self.memory_report[num_items - 1]["dram"] + l1_final_size_per_device = self.memory_report[num_items - 1]["l1"] + + for key, value in dram_initial_size_per_device.items(): + dram_initial_size = value["total_bytes_allocated_per_bank"] + dram_final_size = dram_final_size_per_device[key][ + "total_bytes_allocated_per_bank" + ] + + if dram_final_size > dram_initial_size: + raise Exception(f"Memory leak detected in DRAM for device={key}") + + for key, value in l1_initial_size_per_device.items(): + l1_initial_size = value["total_bytes_allocated_per_bank"] + l1_final_size = l1_final_size_per_device[key][ + "total_bytes_allocated_per_bank" + ] + + if l1_final_size > l1_initial_size: + raise Exception( + f"Memory leak detected in L1 cache for device={key}" + ) + """ -----------------------GOLDEN CALLBACK----------------------- @@ -234,151 +277,20 @@ def golden(callback_runtime_config, binary, program_context, op_context): """ -def add_key(dram_memory_usage, l1_memory_usage, current_section, key, value): - if current_section == "DRAM": - dram_memory_usage[key] = value - elif current_section == "L1": - l1_memory_usage[key] = value - - -def parse_detailed_memory_usage_file(dram_memory_usage, l1_memory_usage, file_path): - current_section = None - - with open(file_path, "r") as file: - reader = csv.reader(file) - blocks = [] - - for row in reader: - if not any(row): - continue - - if row[1].strip() == "DRAM": - current_section = "DRAM" - elif row[1].strip() == "L1": - current_section = "L1" - elif "Total" in row[1]: - if row[1].strip() == "Total allocatable (B):": - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "total_allocatable (bytes) : total_allocatable/bank * num_banks", - row[2].strip(), - ) - elif row[1].strip() == "Total allocated (B):": - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "total_allocated (bytes) : total_allocated/bank * num_banks", - row[2].strip(), - ) - elif row[1].strip() == "Total free (B):": - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "total_free (bytes) : total_allocatable - total_allocated", - row[2].strip(), - ) - elif "Blocks" in row[2]: - blocks = [] - else: - block = {} - block["address (bytes)"] = row[3].strip() - block["size (bytes)"] = row[4].strip() - block["allocated (y/n)"] = row[5].strip() - - blocks.append(block) - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "blocks", - blocks, - ) - - -def parse_memory_usage_summary_file(dram_memory_usage, l1_memory_usage, file_path): - with open(file_path, "r") as file: - reader = csv.reader(file) - current_section = "DRAM" - - for row in reader: - if not any(row): - continue - - if "Total Allocatable Size" in row[1]: - continue - - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "total_allocatable (bytes) : per bank", - row[1].strip(), - ) - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "total_allocated (bytes): per bank", - row[2].strip(), - ) - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "total_free (bytes) : per bank", - row[3].strip(), - ) - add_key( - dram_memory_usage, - l1_memory_usage, - current_section, - "largest_free_block (bytes) : per bank", - row[4].strip(), - ) - - if current_section == "DRAM": - current_section = "L1" - +def create_memory_dictionary(memory_view): + memory_dict = {} + memory_dict["num_banks"] = memory_view.num_banks + memory_dict["total_bytes_per_bank"] = memory_view.total_bytes_per_bank + memory_dict[ + "total_bytes_allocated_per_bank" + ] = memory_view.total_bytes_allocated_per_bank + memory_dict["total_bytes_free_per_bank"] = memory_view.total_bytes_free_per_bank + memory_dict[ + "largest_contiguous_bytes_free_per_bank" + ] = memory_view.largest_contiguous_bytes_free_per_bank + memory_dict["block_table"] = memory_view.block_table -def parse_l1_usage_summary_file(dram_memory_usage, l1_memory_usage, file_path): - with open(file_path, "r") as file: - reader = csv.reader(file) - dram_row = True - - for index, row in enumerate(reader): - if index == 2: - add_key( - dram_memory_usage, - l1_memory_usage, - "L1", - "largest_contiguous_free_block (bytes) : per bank", - row[1].strip(), - ) - - -def parse_memory_csv_files( - detailed_memory_usage_file_path, - memory_usage_summary_file_path, - l1_usage_summary_file_path, -): - dram_memory_usage = {} - l1_memory_usage = {} - - parse_detailed_memory_usage_file( - dram_memory_usage, l1_memory_usage, detailed_memory_usage_file_path - ) - parse_memory_usage_summary_file( - dram_memory_usage, l1_memory_usage, memory_usage_summary_file_path - ) - parse_l1_usage_summary_file( - dram_memory_usage, l1_memory_usage, l1_usage_summary_file_path - ) - - return dram_memory_usage, l1_memory_usage + return memory_dict def memory(callback_runtime_config, binary, program_context, op_context): @@ -390,22 +302,43 @@ def memory(callback_runtime_config, binary, program_context, op_context): logging.debug("executing memory dump") loc = ttrt.runtime.get_op_loc_info(op_context) debug_str = ttrt.runtime.get_op_debug_str(op_context) + device_id = 0 - device.dump_memory_report() - memory_dump_dir_path = f"{get_ttrt_metal_home_path()}/generated/reports" - - # read generated memory reports and store in condensed memory_report - dram_memory_usage, l1_memory_usage = parse_memory_csv_files( - f"{memory_dump_dir_path}/detailed_memory_usage.csv", - f"{memory_dump_dir_path}/memory_usage_summary.csv", - f"{memory_dump_dir_path}/l1_usage_summary.csv", - ) + memory_views = device.get_memory_view(device_id) + dram_memory_view = memory_views[ttrt.runtime.MemoryBufferType.DRAM] + l1_memory_view = memory_views[ttrt.runtime.MemoryBufferType.L1] + l1_small_memory_view = memory_views[ttrt.runtime.MemoryBufferType.L1_SMALL] + trace_memory_view = memory_views[ttrt.runtime.MemoryBufferType.TRACE] op_memory_report = {} op_memory_report["loc"] = loc op_memory_report["debug_str"] = debug_str - op_memory_report["dram"] = dram_memory_usage - op_memory_report["l1"] = l1_memory_usage + + dram_op_device_memory_report = {} + dram_op_device_memory_report["device_" + str(device_id)] = create_memory_dictionary( + dram_memory_view + ) + + l1_op_device_memory_report = {} + l1_op_device_memory_report["device_" + str(device_id)] = create_memory_dictionary( + l1_memory_view + ) + + l1_small_op_device_memory_report = {} + l1_small_op_device_memory_report[ + "device_" + str(device_id) + ] = create_memory_dictionary(l1_small_memory_view) + + trace_op_device_memory_report = {} + trace_op_device_memory_report[ + "device_" + str(device_id) + ] = create_memory_dictionary(trace_memory_view) + + op_memory_report["dram"] = dram_op_device_memory_report + op_memory_report["l1"] = l1_op_device_memory_report + op_memory_report["l1_small"] = l1_small_op_device_memory_report + op_memory_report["trace"] = trace_op_device_memory_report + callback_runtime_config.memory_report[ callback_runtime_config.callback_counter() ] = op_memory_report @@ -418,6 +351,8 @@ def memory(callback_runtime_config, binary, program_context, op_context): def debugger(callback_runtime_config, binary, program_context, op_context): import pdb + import ttrt.runtime + import ttrt.binary device = callback_runtime_config.device logging = callback_runtime_config.logging diff --git a/runtime/tools/python/ttrt/common/run.py b/runtime/tools/python/ttrt/common/run.py index b83c5d390..d5f468739 100644 --- a/runtime/tools/python/ttrt/common/run.py +++ b/runtime/tools/python/ttrt/common/run.py @@ -589,17 +589,7 @@ def _execute(binaries): f"{self.artifacts.get_binary_folder_path(bin)}/run/program_{program_index}/golden_results.json" ) - for ( - loc, - golden_data, - ) in callback_runtime_config.golden_report.items(): - if ( - golden_data["actual_pcc"] - < golden_data["expected_pcc"] - ): - raise Exception( - f"Failed: golden comparison failed for program={program_index}, actual_pcc={golden_data['actual_pcc']} < expected_pcc={golden_data['expected_pcc']}" - ) + callback_runtime_config.check_pcc() if self["--memory"]: if self["--save-artifacts"]: @@ -608,57 +598,7 @@ def _execute(binaries): ) if self["--check-memory-leak"]: - num_items = 0 - for ( - key, - value, - ) in callback_runtime_config.memory_report.items(): - num_items += 1 - - if num_items == 0: - self.logging.warning(f"No memory data found") - else: - # query initial memory usage - dram_initial_size = callback_runtime_config.memory_report[ - 0 - ][ - "dram" - ][ - "total_allocated (bytes) : total_allocated/bank * num_banks" - ] - l1_initlal_size = callback_runtime_config.memory_report[ - 0 - ][ - "l1" - ][ - "total_allocated (bytes) : total_allocated/bank * num_banks" - ] - - # query final memory usage and ensure no memory leaks - dram_final_size = callback_runtime_config.memory_report[ - num_items - 1 - ][ - "dram" - ][ - "total_allocated (bytes) : total_allocated/bank * num_banks" - ] - l1_final_size = callback_runtime_config.memory_report[ - num_items - 1 - ][ - "l1" - ][ - "total_allocated (bytes) : total_allocated/bank * num_banks" - ] - - if dram_final_size > dram_initial_size: - raise Exception( - "Memory leak detected in DRAM" - ) - - if l1_final_size > l1_initlal_size: - raise Exception( - "Memory leak detected in L1 cache" - ) + callback_runtime_config.check_memory_leak() except Exception as e: test_result = { diff --git a/runtime/tools/python/ttrt/runtime/__init__.py b/runtime/tools/python/ttrt/runtime/__init__.py index 5ff953da5..b394dd901 100644 --- a/runtime/tools/python/ttrt/runtime/__init__.py +++ b/runtime/tools/python/ttrt/runtime/__init__.py @@ -7,6 +7,7 @@ Device, Event, Tensor, + MemoryBufferType, DataType, DeviceRuntime, DebugEnv, diff --git a/runtime/tools/python/ttrt/runtime/module.cpp b/runtime/tools/python/ttrt/runtime/module.cpp index f55b6b81e..ff1d9b8d0 100644 --- a/runtime/tools/python/ttrt/runtime/module.cpp +++ b/runtime/tools/python/ttrt/runtime/module.cpp @@ -21,14 +21,32 @@ namespace py = pybind11; PYBIND11_MODULE(_C, m) { m.doc() = "ttrt.runtime python extension for interacting with the " "Tenstorrent devices"; + py::class_(m, "MemoryView") + .def_readonly("num_banks", &tt::runtime::MemoryView::numBanks) + .def_readonly("total_bytes_per_bank", + &tt::runtime::MemoryView::totalBytesPerBank) + .def_readonly("total_bytes_allocated_per_bank", + &tt::runtime::MemoryView::totalBytesAllocatedPerBank) + .def_readonly("total_bytes_free_per_bank", + &tt::runtime::MemoryView::totalBytesFreePerBank) + .def_readonly("largest_contiguous_bytes_free_per_bank", + &tt::runtime::MemoryView::largestContiguousBytesFreePerBank) + .def_readonly("block_table", &tt::runtime::MemoryView::blockTable); py::class_(m, "Device") .def("deallocate_buffers", &tt::runtime::detail::deallocateBuffers) - .def("dump_memory_report", &tt::runtime::detail::dumpMemoryReport); + .def("dump_memory_report", &tt::runtime::detail::dumpMemoryReport) + .def("get_memory_view", &tt::runtime::detail::getMemoryView, + py::arg("device_id") = 0); py::class_(m, "Event"); py::class_(m, "Tensor"); py::class_(m, "Layout"); py::class_(m, "OpContext"); py::class_(m, "CallbackContext"); + py::enum_(m, "MemoryBufferType") + .value("DRAM", tt::runtime::MemoryBufferType::DRAM) + .value("L1", tt::runtime::MemoryBufferType::L1) + .value("L1_SMALL", tt::runtime::MemoryBufferType::L1_SMALL) + .value("TRACE", tt::runtime::MemoryBufferType::TRACE); py::enum_<::tt::target::DataType>(m, "DataType") .value("Float32", ::tt::target::DataType::Float32) .value("Float16", ::tt::target::DataType::Float16)