apache · jorgecarleitao · Feb 14, 2021 · Feb 14, 2021 · Feb 14, 2021 · alamb
diff --git a/rust/arrow/src/alloc/alignment.rs b/rust/arrow/src/alloc/alignment.rs
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation
+// should align well with usage pattern of cache access and block sizes on layers of storage levels from
+// registers to non-volatile memory. These alignments are all cache aware alignments incorporated
+// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach mimicks Intel TBB's
+// cache_aligned_allocator which exploits cache locality and minimizes prefetch signals
+// resulting in less round trip time between the layers of storage.
+// For further info: https://software.intel.com/en-us/node/506094
+
+// 32-bit architecture and things other than netburst microarchitecture are using 64 bytes.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "x86")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// Intel x86_64:
+// L2D streamer from L1:
+// Loads data or instructions from memory to the second-level cache. To use the streamer,
+// organize the data or instructions in blocks of 128 bytes, aligned on 128 bytes.
+// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "x86_64")]
+pub const ALIGNMENT: usize = 1 << 7;
+
+// 24Kc:
+// Data Line Size
+// - https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
+// - https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "mips")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "mips64")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Defaults for powerpc
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "powerpc")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Defaults for the ppc 64
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "powerpc64")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// e.g.: sifive
+// - https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
+// in general all of them are the same.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "riscv")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// This size is same across all hardware for this architecture.
+// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "s390x")]
+pub const ALIGNMENT: usize = 1 << 8;
+
+// This size is same across all hardware for this architecture.
+// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "sparc")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "sparc64")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// On ARM cache line sizes are fixed. both v6 and v7.
+// Need to add board specific or platform specific things later.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "thumbv6")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "thumbv7")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Operating Systems cache size determines this.
+// Currently no way to determine this without runtime inference.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "wasm32")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// Same as v6 and v7.
+// List goes like that:
+// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "arm")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Combined from 4 sectors. Volta says 128.
+// Prevent chunk optimizations better to go to the default size.
+// If you have smaller data with less padded functionality then use 32 with force option.
+// - https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "nvptx")]
+pub const ALIGNMENT: usize = 1 << 7;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "nvptx64")]
+pub const ALIGNMENT: usize = 1 << 7;
+
+// This size is same across all hardware for this architecture.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "aarch64")]
+pub const ALIGNMENT: usize = 1 << 6;
diff --git a/rust/arrow/src/alloc/mod.rs b/rust/arrow/src/alloc/mod.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines memory-related functions, such as allocate/deallocate/reallocate memory
+//! regions, cache and allocation alignments.
+
+use std::mem::size_of;
+use std::ptr::NonNull;
+use std::{
+    alloc::{handle_alloc_error, Layout},
+    sync::atomic::AtomicIsize,
+};
-use std::{
-    alloc::{handle_alloc_error, Layout},
-    sync::atomic::AtomicIsize,
-};
+use std::alloc::{handle_alloc_error, Layout};
+use std::sync::atomic::*;
-use std::{
-    alloc::{handle_alloc_error, Layout},
-    sync::atomic::AtomicIsize,
-};
+use std::alloc::{handle_alloc_error, Layout};
+use std::sync::atomic::*;
+
+mod alignment;
+mod types;
+
+pub use alignment::ALIGNMENT;
+pub use types::NativeType;
+
+// If this number is not zero after all objects have been `drop`, there is a memory leak
+pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
-// If this number is not zero after all objects have been `drop`, there is a memory leak
-pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
+// If this number is not zero after all objects have been `drop`, there is a memory leak
+#[cfg(feature = "memory-check")]
+pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
-// If this number is not zero after all objects have been `drop`, there is a memory leak
-pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
+// If this number is not zero after all objects have been `drop`, there is a memory leak
+#[cfg(feature = "memory-check")]
+pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
+
+#[inline]
+unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
+    NonNull::new_unchecked(ALIGNMENT as *mut T)
+}
+
+/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values.
+/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have
+/// an unknown or non-zero value and is semantically similar to `malloc`.
+pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
+    unsafe {
+        if size == 0 {
+            null_pointer()
+        } else {
+            let size = size * size_of::<T>();
+            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
+            #[cfg(feature = "memory-check")]
+            ALLOCATIONS.fetch_add(size as isize, Ordering::Relaxed);
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
+            #[cfg(feature = "memory-check")]
+            ALLOCATIONS.fetch_add(size as isize, Ordering::Relaxed);
+
+            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
+            let raw_ptr = std::alloc::alloc(layout) as *mut T;
+            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
+        }
+    }
+}
+
+/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them.
+/// This is more performant than using [allocate_aligned] and setting all bytes to zero
+/// and is semantically similar to `calloc`.
+pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
+    unsafe {
+        if size == 0 {
+            null_pointer()
+        } else {
+            let size = size * size_of::<T>();
+            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
+            #[cfg(feature = "memory-check")]
+            ALLOCATIONS.fetch_add(size as isize, Ordering::Relaxed);
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
+            #[cfg(feature = "memory-check")]
+            ALLOCATIONS.fetch_add(size as isize, Ordering::Relaxed);
+
+            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
+            let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
+            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
+        }
+    }
+}
+
+/// # Safety
+///
+/// This function is unsafe because undefined behavior can result if the caller does not ensure all
+/// of the following:
+///
+/// * ptr must denote a block of memory currently allocated via this allocator,
-/// * ptr must denote a block of memory currently allocated via this allocator,
+/// * ptr must denote a block of memory previously returned from `allocate_aligned` or
+/// `allocate_aligned_zeroed`
-/// * ptr must denote a block of memory currently allocated via this allocator,
+/// * ptr must denote a block of memory previously returned from `allocate_aligned` or
+/// `allocate_aligned_zeroed`
+///
+/// * size must be the same size that was used to allocate that block of memory,
+pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
+    if ptr != null_pointer() {
+        let size = size * size_of::<T>();
+        ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst);
-        ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst);
+        #[cfg(feature = "memory-check")]
+        ALLOCATIONS.fetch_sub(size as isize, Ordering::Relaxed);
-        ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst);
+        #[cfg(feature = "memory-check")]
+        ALLOCATIONS.fetch_sub(size as isize, Ordering::Relaxed);
+        std::alloc::dealloc(
+            ptr.as_ptr() as *mut u8,
+            Layout::from_size_align_unchecked(size, ALIGNMENT),
+        );
+    }
+}
+
+/// # Safety
+///
+/// This function is unsafe because undefined behavior can result if the caller does not ensure all
+/// of the following:
+///
+/// * ptr must be currently allocated via this allocator,
-/// * ptr must be currently allocated via this allocator,
+///  ptr must denote a block of memory previously returned from `allocate_aligned` or
+/// `allocate_aligned_zeroed`
-/// * ptr must be currently allocated via this allocator,
+///  ptr must denote a block of memory previously returned from `allocate_aligned` or
+/// `allocate_aligned_zeroed`
+///
+/// * new_size must be greater than zero.
+///
+/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e.,
+/// the rounded value must be less than usize::MAX).
+pub unsafe fn reallocate<T: NativeType>(
+    ptr: NonNull<T>,
+    old_size: usize,
+    new_size: usize,
+) -> NonNull<T> {
+    let old_size = old_size * size_of::<T>();
+    let new_size = new_size * size_of::<T>();
+    if ptr == null_pointer() {
+        return allocate_aligned(new_size);
+    }
+
+    if new_size == 0 {
+        free_aligned(ptr, old_size);
+        return null_pointer();
+    }
+
+    ALLOCATIONS.fetch_add(
+        new_size as isize - old_size as isize,
+        std::sync::atomic::Ordering::SeqCst,
+    );
-
-    ALLOCATIONS.fetch_add(
-        new_size as isize - old_size as isize,
-        std::sync::atomic::Ordering::SeqCst,
-    );
+
+    #[cfg(feature = "memory-check")]
+    ALLOCATIONS.fetch_add(
+        new_size.checked_sub(old_size).unwrap() as isize,
+        Ordering::Relaxed,
+    );
-
-    ALLOCATIONS.fetch_add(
-        new_size as isize - old_size as isize,
-        std::sync::atomic::Ordering::SeqCst,
-    );
+
+    #[cfg(feature = "memory-check")]
+    ALLOCATIONS.fetch_add(
+        new_size.checked_sub(old_size).unwrap() as isize,
+        Ordering::Relaxed,
+    );
+    let raw_ptr = std::alloc::realloc(
+        ptr.as_ptr() as *mut u8,
+        Layout::from_size_align_unchecked(old_size, ALIGNMENT),
+        new_size,
+    ) as *mut T;
+    NonNull::new(raw_ptr).unwrap_or_else(|| {
+        handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT))
+    })
+}