Skip to content

Commit

Permalink
Fix Post From-Device Copy Processing Issue
Browse files Browse the repository at this point in the history
This change fixes an issue with processing output tensors after they are
pulled off the device when checking golden-ness. The resulting tensor
had the first 32 bytes overwritten with seemingly uninitialized memory,
resulting in nondeterministic failures during similarity checks. Now,
the unadulterated tensor is returned.

This change also fixes a minor bug relating to the binding of `ceil`, as
well as includes a convenience change to `.gitignore`, ignoring top
level `.mlir` files often used during debugging.
Fixes #1795
  • Loading branch information
ctodTT authored Jan 16, 2025
1 parent e4f3ce0 commit 9278f5d
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ test/lit.site.cfg.py
# TTNN and TTMetal flatbuffers
*.ttnn
*.ttm

# Root level mlir files (often used for testing, none should be at root)
/*.mlir
2 changes: 1 addition & 1 deletion python/test_infra/ttir_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def bitwise_not(self, in0: Operand) -> OpView:
return self.eltwise_proxy(torch.bitwise_not, ttir.BitwiseNotOp, [in0])

def ceil(self, in0: Operand) -> OpView:
return self.eltwise_proxy(torch.log, ttir.CeilOp, [in0])
return self.eltwise_proxy(torch.ceil, ttir.CeilOp, [in0])

def sin(self, in0: Operand) -> OpView:
return self.eltwise_proxy(torch.sin, ttir.SinOp, [in0])
Expand Down
21 changes: 5 additions & 16 deletions runtime/lib/ttnn/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,23 +510,12 @@ Tensor getOpOutputTensor(OpContext opContextHandle,
return createNullTensor();
}

::ttnn::Tensor hostTensor = ::ttnn::from_device(*outPtr);
::ttnn::Tensor outCopy =
::ttnn::to_layout(hostTensor, ::ttnn::ROW_MAJOR_LAYOUT, std::nullopt,
std::nullopt, static_cast<::ttnn::IDevice *>(nullptr));

void *src = ::tt::tt_metal::get_raw_host_data_ptr(outCopy);
std::uint32_t outCopySize = outCopy.volume() * outCopy.element_size();
std::shared_ptr<void> data = ::tt::runtime::utils::malloc_shared(outCopySize);
std::memcpy(data.get(), src, outCopySize);

auto tensor = std::make_shared<::ttnn::Tensor>(
ttnn::createStorage<BorrowedStorage>(data.get(), outCopy.volume(),
::tt::target::DataType::Float32),
outCopy.shape().value, ::ttnn::DataType::FLOAT32,
::ttnn::Layout::ROW_MAJOR);
std::shared_ptr<::ttnn::Tensor> hostTensor =
std::make_shared<::ttnn::Tensor>(::ttnn::to_layout(
::ttnn::from_device(*outPtr), ::ttnn::Layout::ROW_MAJOR, std::nullopt,
std::nullopt, static_cast<::ttnn::IDevice *>(nullptr)));

return Tensor(std::static_pointer_cast<void>(tensor), nullptr,
return Tensor(std::static_pointer_cast<void>(hostTensor), nullptr,
DeviceRuntime::TTNN);
}

Expand Down

0 comments on commit 9278f5d

Please sign in to comment.