diff --git a/ci/scripts/rust_test.sh b/ci/scripts/rust_test.sh index 254ea43d295..8aeb68b8a3d 100755 --- a/ci/scripts/rust_test.sh +++ b/ci/scripts/rust_test.sh @@ -42,6 +42,34 @@ cargo run --example read_csv cargo run --example read_csv_infer_schema popd +# Install cross +cargo install cross + +# Linker issues exists with the triples below. +# Needs to be resolved with shipping triple specific toolchain inside containers: +# +# armv7-unknown-linux-musleabi +# armv7-unknown-linux-gnueabi + +# Define targets +export TARGETS="armv7-unknown-linux-musleabihf \ +arm-unknown-linux-musleabihf \ +arm-unknown-linux-musleabi \ +armv7-unknown-linux-gnueabihf \ +arm-unknown-linux-gnueabihf \ +arm-unknown-linux-gnueabi" + +# Run on targets +pushd arrow +for target in $TARGETS; do + # Target specific runs + cross run --target ${target} --example builders + cross run --target ${target} --example dynamic_types + cross run --target ${target} --example read_csv + cross run --target ${target} --example read_csv_infer_schema +done +popd + # test datafusion examples pushd datafusion cargo run --example csv_sql diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 2ba1bc98d08..53a42da3ed1 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -257,6 +257,7 @@ r/inst/include/cpp11/*.hpp ruby/red-arrow/.yardopts rust/arrow/test/data/*.csv rust/rust-toolchain +rust/arrow/Cross.toml rust/arrow-flight/src/arrow.flight.protocol.rs julia/Arrow/Project.toml julia/Arrow/README.md diff --git a/rust/arrow/Cross.toml b/rust/arrow/Cross.toml new file mode 100644 index 00000000000..098c8cfafe5 --- /dev/null +++ b/rust/arrow/Cross.toml @@ -0,0 +1,7 @@ +[build.env] +passthrough = [ + "RUST_BACKTRACE", + "RUST_LOG", + "ARROW_TEST_DATA", + "PARQUET_TEST_DATA" +] diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index d1014dbb940..2e7b811495a 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -246,21 +246,21 @@ impl fmt::Debug for PrimitiveArray { write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?; print_long_array(self, f, |array, index, f| match T::DATA_TYPE { DataType::Date32(_) | DataType::Date64(_) => { - let v = self.value(index).to_usize().unwrap() as i64; + let v = self.value(index).cast().unwrap(); match as_date::(v) { Some(date) => write!(f, "{:?}", date), None => write!(f, "null"), } } DataType::Time32(_) | DataType::Time64(_) => { - let v = self.value(index).to_usize().unwrap() as i64; + let v = self.value(index).cast().unwrap(); match as_time::(v) { Some(time) => write!(f, "{:?}", time), None => write!(f, "null"), } } DataType::Timestamp(_, _) => { - let v = self.value(index).to_usize().unwrap() as i64; + let v = self.value(index).cast().unwrap(); match as_datetime::(v) { Some(datetime) => write!(f, "{:?}", datetime), None => write!(f, "null"), @@ -500,6 +500,7 @@ mod tests { use crate::buffer::Buffer; use crate::datatypes::DataType; + use crate::memory::POINTER_WIDTH; #[test] fn test_primitive_array_from_vec() { @@ -518,7 +519,7 @@ mod tests { } assert_eq!(64, arr.get_buffer_memory_size()); - let internals_of_primitive_array = 8 + 72; // RawPtrBox & Arc combined. + let internals_of_primitive_array = POINTER_WIDTH / 4 + POINTER_WIDTH; // RawPtrBox & Arc combined. assert_eq!( arr.get_buffer_memory_size() + internals_of_primitive_array, arr.get_array_memory_size() @@ -544,7 +545,8 @@ mod tests { } assert_eq!(128, arr.get_buffer_memory_size()); - let internals_of_primitive_array = 8 + 72 + 16; // RawPtrBox & Arc and it's null_bitmap combined. + let internals_of_primitive_array = + POINTER_WIDTH / 4 + POINTER_WIDTH + POINTER_WIDTH / 4; // RawPtrBox & Arc and it's null_bitmap combined. assert_eq!( arr.get_buffer_memory_size() + internals_of_primitive_array, arr.get_array_memory_size() diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs index a26404ff912..816de6ed44c 100644 --- a/rust/arrow/src/array/array_union.rs +++ b/rust/arrow/src/array/array_union.rs @@ -351,6 +351,7 @@ impl fmt::Debug for UnionArray { mod tests { use super::*; + use crate::memory::POINTER_WIDTH; use std::sync::Arc; use crate::array::*; @@ -419,7 +420,11 @@ mod tests { 4 * 8 * 4 * mem::size_of::(), union.get_buffer_memory_size() ); - let internals_of_union_array = (8 + 72) + (union.boxed_fields.len() * 144); // Arc & Vec combined. + let tagged_pointer_size = POINTER_WIDTH / 4 + POINTER_WIDTH; + let internals_of_union_array = tagged_pointer_size + + ((union.boxed_fields.len() * tagged_pointer_size) + + POINTER_WIDTH * 2 + + POINTER_WIDTH); // Arc & Vec combined. assert_eq!( union.get_buffer_memory_size() + internals_of_union_array, union.get_array_memory_size() diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs index 08c7cf1f21e..17534ac65fb 100644 --- a/rust/arrow/src/array/null.rs +++ b/rust/arrow/src/array/null.rs @@ -120,6 +120,7 @@ impl fmt::Debug for NullArray { #[cfg(test)] mod tests { use super::*; + use crate::memory::POINTER_WIDTH; #[test] fn test_null_array() { @@ -130,7 +131,7 @@ mod tests { assert_eq!(null_arr.is_valid(0), false); assert_eq!(0, null_arr.get_buffer_memory_size()); - let internals_of_null_array = 64; // Arc + let internals_of_null_array = POINTER_WIDTH; // Arc assert_eq!( null_arr.get_buffer_memory_size() + internals_of_null_array, null_arr.get_array_memory_size() diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 49049b0195e..e0ee090c16f 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -564,10 +564,7 @@ pub(super) fn buffer_bin_and( } } -#[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - not(any(feature = "simd", feature = "avx512")) -))] +#[cfg(not(any(feature = "simd", feature = "avx512")))] pub(super) fn buffer_bin_and( left: &Buffer, left_offset_in_bits: usize, @@ -674,10 +671,7 @@ pub(super) fn buffer_bin_or( } } -#[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - not(any(feature = "simd", feature = "avx512")) -))] +#[cfg(not(any(feature = "simd", feature = "avx512")))] pub(super) fn buffer_bin_or( left: &Buffer, left_offset_in_bits: usize, diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 0a26d2e5fd2..28127e1efe9 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -39,6 +39,8 @@ use serde_json::{ json, Number, Value, Value::Number as VNumber, Value::String as VString, }; +use num::NumCast; + use crate::error::{ArrowError, Result}; use crate::util::bit_util; @@ -209,6 +211,14 @@ pub trait ArrowNativeType: fn to_usize(&self) -> Option { None } + + /// Cast native type to destination type + fn cast(self) -> Option + where + T: NumCast, + { + None + } } /// Trait indicating a primitive fixed-width type (bool, ints and floats). @@ -259,6 +269,13 @@ impl ArrowNativeType for i8 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for i16 { @@ -273,6 +290,13 @@ impl ArrowNativeType for i16 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for i32 { @@ -287,6 +311,13 @@ impl ArrowNativeType for i32 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for i64 { @@ -301,6 +332,13 @@ impl ArrowNativeType for i64 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for u8 { @@ -315,6 +353,13 @@ impl ArrowNativeType for u8 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for u16 { @@ -329,6 +374,13 @@ impl ArrowNativeType for u16 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for u32 { @@ -343,6 +395,13 @@ impl ArrowNativeType for u32 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for u64 { @@ -357,18 +416,39 @@ impl ArrowNativeType for u64 { fn to_usize(&self) -> Option { num::ToPrimitive::to_usize(self) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for f32 { fn into_json_value(self) -> Option { Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(VNumber) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } impl ArrowNativeType for f64 { fn into_json_value(self) -> Option { Number::from_f64(self).map(VNumber) } + + fn cast(self) -> Option + where + T: NumCast, + { + NumCast::from(self) + } } // BooleanType is special: its bit-width is not the size of the primitive type, and its `index` diff --git a/rust/arrow/src/memory.rs b/rust/arrow/src/memory.rs index 802498abd22..2b1a898064a 100644 --- a/rust/arrow/src/memory.rs +++ b/rust/arrow/src/memory.rs @@ -22,6 +22,13 @@ use std::alloc::Layout; use std::mem::align_of; use std::ptr::NonNull; +/// Target pointer width of the targeted platform +#[cfg(target_pointer_width = "64")] +pub(crate) const POINTER_WIDTH: usize = 64; +/// Target pointer width of the targeted platform +#[cfg(target_pointer_width = "32")] +pub(crate) const POINTER_WIDTH: usize = 32; + // NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation // should align well with usage pattern of cache access and block sizes on layers of storage levels from // registers to non-volatile memory. These alignments are all cache aware alignments incorporated @@ -239,8 +246,8 @@ mod tests { fn test_allocate() { for _ in 0..10 { let p = allocate_aligned(1024); - // make sure this is 64-byte aligned - assert_eq!(0, (p as usize) % 64); + // make sure this is native pointer size aligned + assert_eq!(0, (p as usize) % POINTER_WIDTH); } }