Fix Post From-Device Copy Processing Issue

This change fixes an issue with processing output tensors after they are pulled off the device when checking golden-ness. The resulting tensor had the first 32 bytes overwritten with seemingly uninitialized memory, resulting in nondeterministic failures during similarity checks. Now, the unadulterated tensor is returned. This change also fixes a minor bug relating to the binding of `ceil`, as well as includes a convenience change to `.gitignore`, ignoring top level `.mlir` files often used during debugging. Fixes #1795
tenstorrent · Jan 16, 2025 · 9278f5d · 9278f5d
1 parent e4f3ce0
commit 9278f5d
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 17 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,6 @@ test/lit.site.cfg.py
 # TTNN and TTMetal flatbuffers
 *.ttnn
 *.ttm
+
+# Root level mlir files (often used for testing, none should be at root)
+/*.mlir
diff --git a/python/test_infra/ttir_builder.py b/python/test_infra/ttir_builder.py
@@ -404,7 +404,7 @@ def bitwise_not(self, in0: Operand) -> OpView:
         return self.eltwise_proxy(torch.bitwise_not, ttir.BitwiseNotOp, [in0])
 
     def ceil(self, in0: Operand) -> OpView:
-        return self.eltwise_proxy(torch.log, ttir.CeilOp, [in0])
+        return self.eltwise_proxy(torch.ceil, ttir.CeilOp, [in0])
 
     def sin(self, in0: Operand) -> OpView:
         return self.eltwise_proxy(torch.sin, ttir.SinOp, [in0])

diff --git a/runtime/lib/ttnn/runtime.cpp b/runtime/lib/ttnn/runtime.cpp
@@ -510,23 +510,12 @@ Tensor getOpOutputTensor(OpContext opContextHandle,
     return createNullTensor();
   }
 
-  ::ttnn::Tensor hostTensor = ::ttnn::from_device(*outPtr);
-  ::ttnn::Tensor outCopy =
-      ::ttnn::to_layout(hostTensor, ::ttnn::ROW_MAJOR_LAYOUT, std::nullopt,
-                        std::nullopt, static_cast<::ttnn::IDevice *>(nullptr));
-
-  void *src = ::tt::tt_metal::get_raw_host_data_ptr(outCopy);
-  std::uint32_t outCopySize = outCopy.volume() * outCopy.element_size();
-  std::shared_ptr<void> data = ::tt::runtime::utils::malloc_shared(outCopySize);
-  std::memcpy(data.get(), src, outCopySize);
-
-  auto tensor = std::make_shared<::ttnn::Tensor>(
-      ttnn::createStorage<BorrowedStorage>(data.get(), outCopy.volume(),
-                                           ::tt::target::DataType::Float32),
-      outCopy.shape().value, ::ttnn::DataType::FLOAT32,
-      ::ttnn::Layout::ROW_MAJOR);
+  std::shared_ptr<::ttnn::Tensor> hostTensor =
+      std::make_shared<::ttnn::Tensor>(::ttnn::to_layout(
+          ::ttnn::from_device(*outPtr), ::ttnn::Layout::ROW_MAJOR, std::nullopt,
+          std::nullopt, static_cast<::ttnn::IDevice *>(nullptr)));
 
-  return Tensor(std::static_pointer_cast<void>(tensor), nullptr,
+  return Tensor(std::static_pointer_cast<void>(hostTensor), nullptr,
                 DeviceRuntime::TTNN);
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,3 +18,6 @@ test/lit.site.cfg.py @@
     # TTNN and TTMetal flatbuffers
     *.ttnn
     *.ttm
+    # Root level mlir files (often used for testing, none should be at root)
+    /*.mlir