Merge remote-tracking branch 'origin/main' into video-writer-async-std

kornia · Sep 28, 2024 · f08a7be · f08a7be
2 parents f54d0bd + eeabb12
commit f08a7be
Show file tree

Hide file tree

Showing 5 changed files with 138 additions and 23 deletions.
diff --git a/crates/kornia-core/src/allocator.rs b/crates/kornia-core/src/allocator.rs
@@ -1,4 +1,5 @@
-use std::alloc::{GlobalAlloc, Layout, System};
+use std::alloc;
+use std::alloc::Layout;
 
 use thiserror::Error;
 
@@ -55,7 +56,7 @@ impl TensorAllocator for CpuAllocator {
     ///
     /// A non-null pointer to the allocated memory if successful, otherwise an error.
     fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> {
-        let ptr = unsafe { System.alloc(layout) };
+        let ptr = unsafe { alloc::alloc(layout) };
         if ptr.is_null() {
             Err(TensorAllocatorError::NullPointer)?
         }
@@ -74,7 +75,7 @@ impl TensorAllocator for CpuAllocator {
     /// The pointer must be non-null and the layout must be correct.
     #[allow(clippy::not_unsafe_ptr_arg_deref)]
     fn dealloc(&self, ptr: *mut u8, layout: Layout) {
-        unsafe { System.dealloc(ptr, layout) }
+        unsafe { alloc::dealloc(ptr, layout) }
     }
 }
 

diff --git a/crates/kornia-core/src/serde.rs b/crates/kornia-core/src/serde.rs
@@ -7,9 +7,10 @@ use crate::{
 use serde::ser::SerializeStruct;
 use serde::Deserialize;
 
-impl<T, const N: usize, A: TensorAllocator> serde::Serialize for Tensor<T, N, A>
+impl<T, const N: usize, A> serde::Serialize for Tensor<T, N, A>
 where
     T: serde::Serialize + SafeTensorType,
+    A: TensorAllocator + 'static,
 {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
@@ -23,7 +24,7 @@ where
     }
 }
 
-impl<'de, T, const N: usize, A: TensorAllocator + Default> serde::Deserialize<'de>
+impl<'de, T, const N: usize, A: TensorAllocator + Default + 'static> serde::Deserialize<'de>
     for Tensor<T, N, A>
 where
     T: serde::Deserialize<'de> + SafeTensorType,

diff --git a/crates/kornia-core/src/storage.rs b/crates/kornia-core/src/storage.rs
@@ -18,6 +18,30 @@ impl SafeTensorType for i64 {}
 impl SafeTensorType for f32 {}
 impl SafeTensorType for f64 {}
 
+/// Represents the owner of custom Arrow Buffer memory allocations.
+///
+/// This struct is used to facilitate the automatic deallocation of the memory it owns,
+/// using the `Drop` trait.
+pub struct TensorCustomAllocationOwner<A: TensorAllocator> {
+    /// The allocator used to allocate the tensor storage.
+    alloc: Arc<A>,
+    /// The layout used for the allocation.
+    layout: Layout,
+    /// The pointer to the allocated memory
+    ptr: NonNull<u8>,
+}
+
+// SAFETY: TensorCustomAllocationOwner is never modifed from multiple threads.
+impl<A: TensorAllocator> std::panic::RefUnwindSafe for TensorCustomAllocationOwner<A> {}
+unsafe impl<A: TensorAllocator> Sync for TensorCustomAllocationOwner<A> {}
+unsafe impl<A: TensorAllocator> Send for TensorCustomAllocationOwner<A> {}
+
+impl<A: TensorAllocator> Drop for TensorCustomAllocationOwner<A> {
+    fn drop(&mut self) {
+        self.alloc.dealloc(self.ptr.as_ptr(), self.layout);
+    }
+}
+
 /// Represents a contiguous memory region that can be shared with other buffers and across thread boundaries.
 ///
 /// This struct provides methods to create, access, and manage tensor storage using a custom allocator.
@@ -32,12 +56,13 @@ where
     /// The buffer containing the tensor storage.
     data: ScalarBuffer<T>,
     /// The allocator used to allocate the tensor storage.
-    alloc: A,
+    alloc: Arc<A>,
 }
 
-impl<T, A: TensorAllocator> TensorStorage<T, A>
+impl<T, A> TensorStorage<T, A>
 where
     T: SafeTensorType + Clone,
+    A: TensorAllocator + 'static,
 {
     /// Creates a new tensor storage with the given length and allocator.
     ///
@@ -51,17 +76,19 @@ where
     /// A new tensor storage if successful, otherwise an error.
     pub fn new(len: usize, alloc: A) -> Result<Self, TensorAllocatorError> {
         // allocate memory for tensor storage
-        let ptr =
-            alloc.alloc(Layout::array::<T>(len).map_err(TensorAllocatorError::LayoutError)?)?;
+        let layout = Layout::array::<T>(len).map_err(TensorAllocatorError::LayoutError)?;
+        let ptr = NonNull::new(alloc.alloc(layout)?).ok_or(TensorAllocatorError::NullPointer)?;
+        let alloc = Arc::new(alloc);
+        let owner = TensorCustomAllocationOwner {
+            alloc: alloc.clone(),
+            layout,
+            ptr,
+        };
 
         // create the buffer
         let buffer = unsafe {
             // SAFETY: `ptr` is non-null and properly aligned, and `len` is the correct size.
-            Buffer::from_custom_allocation(
-                NonNull::new_unchecked(ptr),
-                len * std::mem::size_of::<T>(),
-                Arc::new(Vec::<T>::with_capacity(len)),
-            )
+            Buffer::from_custom_allocation(ptr, len * std::mem::size_of::<T>(), Arc::new(owner))
         };
 
         Ok(Self {
@@ -99,7 +126,7 @@ where
         // create tensor storage
         Self {
             data: buffer.into(),
-            alloc,
+            alloc: Arc::new(alloc),
         }
     }
 
@@ -223,13 +250,13 @@ where
         let buffer = Buffer::from_custom_allocation(
             NonNull::new_unchecked(ptr as *mut u8),
             len * std::mem::size_of::<T>(),
-            Arc::new(Vec::<T>::with_capacity(len)),
+            Arc::new(()),
         );
 
         // create tensor storage
         Self {
             data: buffer.into(),
-            alloc: alloc.clone(),
+            alloc: Arc::new(alloc.clone()),
         }
     }
 }
@@ -238,10 +265,10 @@ where
 impl<T, A> Clone for TensorStorage<T, A>
 where
     T: SafeTensorType + Clone,
-    A: TensorAllocator + Clone,
+    A: TensorAllocator + Clone + 'static,
 {
     fn clone(&self) -> Self {
-        let mut new_storage = Self::new(self.len(), self.alloc.clone())
+        let mut new_storage = Self::new(self.len(), (*self.alloc).clone())
             .expect("Failed to allocate memory for cloned TensorStorage");
         new_storage.as_mut_slice().clone_from_slice(self.as_slice());
         new_storage
@@ -253,6 +280,8 @@ mod tests {
     use super::*;
     use crate::allocator::CpuAllocator;
     use std::alloc::Layout;
+    use std::cell::RefCell;
+    use std::rc::Rc;
 
     #[test]
     fn test_tensor_storage() -> Result<(), TensorAllocatorError> {
@@ -365,4 +394,88 @@ mod tests {
         assert_eq!(result_vec.capacity(), original_vec_capacity);
         assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr));
     }
+
+    #[test]
+    fn test_tensor_storage_allocator() -> Result<(), TensorAllocatorError> {
+        // A test TensorAllocator that keeps a count of the bytes that are allocated but not yet
+        // deallocated via the allocator.
+        #[derive(Clone)]
+        struct TestAllocator {
+            bytes_allocated: Rc<RefCell<i32>>,
+        }
+        impl TensorAllocator for TestAllocator {
+            fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> {
+                *self.bytes_allocated.borrow_mut() += layout.size() as i32;
+                CpuAllocator.alloc(layout)
+            }
+            fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+                *self.bytes_allocated.borrow_mut() -= layout.size() as i32;
+                CpuAllocator.dealloc(ptr, layout)
+            }
+        }
+
+        let allocator = TestAllocator {
+            bytes_allocated: Rc::new(RefCell::new(0)),
+        };
+        let len = 1024;
+
+        // TensorStorage::new()
+        // Deallocation should happen when `storage` goes out of scope.
+        {
+            let _storage = TensorStorage::<u8, _>::new(len, allocator.clone())?;
+            assert_eq!(*allocator.bytes_allocated.borrow(), len as i32);
+        }
+        assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+        // TensorStorage::new() -> TensorStorage::into_vec()
+        // TensorStorage::into_vec() consumes the storage and creates a copy (in this case).
+        // This should cause deallocation of the original memory.
+        {
+            let storage = TensorStorage::<u8, _>::new(len, allocator.clone())?;
+            assert_eq!(*allocator.bytes_allocated.borrow(), len as i32);
+
+            let _vec = storage.into_vec();
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+        }
+        assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+        // TensorStorage::from_vec()  -> TensorStorage::into_vec()
+        // TensorStorage::from_vec() currently does not use the custom allocator, so the
+        // bytes_allocated value should not change.
+        {
+            let original_vec = Vec::<u8>::with_capacity(len);
+            let original_vec_ptr = original_vec.as_ptr();
+            let original_vec_capacity = original_vec.capacity();
+
+            let storage = TensorStorage::<u8, _>::from_vec(original_vec, allocator.clone());
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+            let result_vec = storage.into_vec();
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+            assert_eq!(result_vec.capacity(), original_vec_capacity);
+            assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr));
+        }
+        assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+        // TensorStorage::from_ptr()
+        // TensorStorage::from_ptr() does not take ownership of buffer. So the memory should not be
+        // deallocated when the TensorStorage goes out of scope.
+        // In this case, the memory will be deallocated when the vector goes out of scope.
+        {
+            let mut original_vec = Vec::<u8>::with_capacity(len);
+            let original_ptr = original_vec.as_ptr();
+            {
+                let storage = unsafe {
+                    TensorStorage::<u8, _>::from_ptr(original_vec.as_mut_ptr(), len, &allocator)
+                };
+                assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+                assert_eq!(storage.as_ptr(), original_ptr);
+            }
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+        }
+
+        Ok(())
+    }
 }
diff --git a/crates/kornia-core/src/tensor.rs b/crates/kornia-core/src/tensor.rs
@@ -90,7 +90,7 @@ where
 impl<T, const N: usize, A> Tensor<T, N, A>
 where
     T: SafeTensorType,
-    A: TensorAllocator,
+    A: TensorAllocator + 'static,
 {
     /// Create a new `Tensor` with uninitialized data.
     ///
@@ -875,7 +875,7 @@ where
 impl<T, const N: usize, A> Clone for Tensor<T, N, A>
 where
     T: SafeTensorType + Clone,
-    A: TensorAllocator + Clone,
+    A: TensorAllocator + Clone + 'static,
 {
     fn clone(&self) -> Self {
         Self {
@@ -889,7 +889,7 @@ where
 impl<T, const N: usize, A> std::fmt::Display for Tensor<T, N, A>
 where
     T: SafeTensorType + std::fmt::Display + std::fmt::LowerExp,
-    A: TensorAllocator,
+    A: TensorAllocator + 'static,
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let width = self

diff --git a/crates/kornia-core/src/view.rs b/crates/kornia-core/src/view.rs
@@ -14,7 +14,7 @@ pub struct TensorView<'a, T: SafeTensorType, const N: usize, A: TensorAllocator>
     pub strides: [usize; N],
 }
 
-impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator> TensorView<'a, T, N, A> {
+impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator + 'static> TensorView<'a, T, N, A> {
     /// Returns the data slice of the tensor.
     #[inline]
     pub fn as_slice(&self) -> &[T] {