Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion rust/arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use crate::util::bit_util;
/// An generic representation of Arrow array data which encapsulates common attributes and
/// operations for Arrow array. Specific operations for different arrays types (e.g.,
/// primitive, list, struct) are implemented in `Array`.
#[derive(PartialEq, Debug, Clone)]
#[derive(Debug, Clone)]
pub struct ArrayData {
/// The data type for this array data
data_type: DataType,
Expand Down Expand Up @@ -209,6 +209,61 @@ impl ArrayData {
}
}

impl PartialEq for ArrayData {
fn eq(&self, other: &Self) -> bool {
assert_eq!(
self.data_type(),
other.data_type(),
"Data types not the same"
);
assert_eq!(self.len(), other.len(), "Lengths not the same");
// TODO: when adding tests for this, test that we can compare with arrays that have offsets
assert_eq!(self.offset(), other.offset(), "Offsets not the same");
assert_eq!(self.null_count(), other.null_count());
// compare buffers excluding padding
let self_buffers = self.buffers();
let other_buffers = other.buffers();
assert_eq!(self_buffers.len(), other_buffers.len());
self_buffers.iter().zip(other_buffers).for_each(|(s, o)| {
compare_buffer_regions(
s,
self.offset(), // TODO mul by data length
o,
other.offset(), // TODO mul by data len
);
});
// assert_eq!(self.buffers(), other.buffers());

assert_eq!(self.child_data(), other.child_data());
// null arrays can skip the null bitmap, thus only compare if there are no nulls
if self.null_count() != 0 || other.null_count() != 0 {
compare_buffer_regions(
self.null_buffer().unwrap(),
self.offset(),
other.null_buffer().unwrap(),
other.offset(),
)
}
true
}
}

/// A helper to compare buffer regions of 2 buffers.
/// Compares the length of the shorter buffer.
fn compare_buffer_regions(
left: &Buffer,
left_offset: usize,
right: &Buffer,
right_offset: usize,
) {
// for convenience, we assume that the buffer lengths are only unequal if one has padding,
// so we take the shorter length so we can discard the padding from the longer length
let shorter_len = left.len().min(right.len());
let s_sliced = left.bit_slice(left_offset, shorter_len);
let o_sliced = right.bit_slice(right_offset, shorter_len);
assert_eq!(s_sliced, o_sliced);
}

Comment on lines +212 to +266
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this is not correct, see this comment

/// Builder for `ArrayData` type
#[derive(Debug)]
pub struct ArrayDataBuilder {
Expand Down
8 changes: 7 additions & 1 deletion rust/arrow/src/array/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ impl From<ArrayDataRef> for NullArray {

impl fmt::Debug for NullArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "NullArray")
write!(f, "NullArray({})", self.len())
}
}

Expand Down Expand Up @@ -146,4 +146,10 @@ mod tests {
assert_eq!(array2.null_count(), 16);
assert_eq!(array2.offset(), 8);
}

#[test]
fn test_debug_null_array() {
let array = NullArray::new(1024 * 1024);
assert_eq!(format!("{:?}", array), "NullArray(1048576)");
}
}
81 changes: 50 additions & 31 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
(Timestamp(_, _), Date32(_)) => true,
(Timestamp(_, _), Date64(_)) => true,
// date64 to timestamp might not make sense,

// end temporal casts
(Null, Int32) => true,
(_, _) => false,
}
}
Expand Down Expand Up @@ -729,25 +728,31 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
// single integer operation, but need to avoid integer
// math rounding down to zero

if to_size > from_size {
let time_array = Date64Array::from(array.data());
Ok(Arc::new(multiply(
&time_array,
&Date64Array::from(vec![to_size / from_size; array.len()]),
)?) as ArrayRef)
} else if to_size < from_size {
let time_array = Date64Array::from(array.data());
Ok(Arc::new(divide(
&time_array,
&Date64Array::from(vec![from_size / to_size; array.len()]),
)?) as ArrayRef)
} else {
cast_array_data::<Date64Type>(array, to_type.clone())
match to_size.cmp(&from_size) {
std::cmp::Ordering::Less => {
let time_array = Date64Array::from(array.data());
Ok(Arc::new(divide(
&time_array,
&Date64Array::from(vec![from_size / to_size; array.len()]),
)?) as ArrayRef)
}
std::cmp::Ordering::Equal => {
cast_array_data::<Date64Type>(array, to_type.clone())
}
std::cmp::Ordering::Greater => {
let time_array = Date64Array::from(array.data());
Ok(Arc::new(multiply(
&time_array,
&Date64Array::from(vec![to_size / from_size; array.len()]),
)?) as ArrayRef)
}
}
}
// date64 to timestamp might not make sense,

// end temporal casts
// null to primitive/flat types
(Null, Int32) => Ok(Arc::new(Int32Array::from(vec![None; array.len()]))),

(_, _) => Err(ArrowError::ComputeError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
Expand Down Expand Up @@ -2476,44 +2481,44 @@ mod tests {

// Test casting TO StringArray
let cast_type = Utf8;
let cast_array = cast(&array, &cast_type).expect("cast to UTF-8 succeeded");
let cast_array = cast(&array, &cast_type).expect("cast to UTF-8 failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

// Test casting TO Dictionary (with different index sizes)

let cast_type = Dictionary(Box::new(Int16), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

let cast_type = Dictionary(Box::new(Int32), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

let cast_type = Dictionary(Box::new(Int64), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

let cast_type = Dictionary(Box::new(UInt16), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

let cast_type = Dictionary(Box::new(UInt32), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

let cast_type = Dictionary(Box::new(UInt64), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}
Expand Down Expand Up @@ -2598,11 +2603,11 @@ mod tests {
let expected = vec!["1", "null", "3"];

// Test casting TO PrimitiveArray, different dictionary type
let cast_array = cast(&array, &Utf8).expect("cast to UTF-8 succeeded");
let cast_array = cast(&array, &Utf8).expect("cast to UTF-8 failed");
assert_eq!(array_to_strings(&cast_array), expected);
assert_eq!(cast_array.data_type(), &Utf8);

let cast_array = cast(&array, &Int64).expect("cast to int64 succeeded");
let cast_array = cast(&array, &Int64).expect("cast to int64 failed");
assert_eq!(array_to_strings(&cast_array), expected);
assert_eq!(cast_array.data_type(), &Int64);
}
Expand All @@ -2621,13 +2626,13 @@ mod tests {

// Cast to a dictionary (same value type, Int32)
let cast_type = Dictionary(Box::new(UInt8), Box::new(Int32));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);

// Cast to a dictionary (different value type, Int8)
let cast_type = Dictionary(Box::new(UInt8), Box::new(Int8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}
Expand All @@ -2646,11 +2651,25 @@ mod tests {

// Cast to a dictionary (same value type, Utf8)
let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast succeeded");
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}

#[test]
fn test_cast_null_array_to_int32() {
let array = Arc::new(NullArray::new(6)) as ArrayRef;

let expected = Int32Array::from(vec![None; 6]);

// Cast to a dictionary (same value type, Utf8)
let cast_type = DataType::Int32;
let cast_array = cast(&array, &cast_type).expect("cast failed");
let cast_array = as_primitive_array::<Int32Type>(&cast_array);
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(cast_array, &expected);
}

/// Print the `DictionaryArray` `array` as a vector of strings
fn array_to_strings(array: &ArrayRef) -> Vec<String> {
(0..array.len())
Expand Down Expand Up @@ -2768,7 +2787,7 @@ mod tests {
)),
Arc::new(TimestampNanosecondArray::from_vec(
vec![1000, 2000],
Some(tz_name.clone()),
Some(tz_name),
)),
Arc::new(Date32Array::from(vec![1000, 2000])),
Arc::new(Date64Array::from(vec![1000, 2000])),
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ pub struct Field {
name: String,
data_type: DataType,
nullable: bool,
dict_id: i64,
dict_is_ordered: bool,
pub(crate) dict_id: i64,
pub(crate) dict_is_ordered: bool,
}

pub trait ArrowNativeType:
Expand Down
Loading