Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ jobs:

# Switch to arrow crate
cd arrow
# re-run tests on arrow crate with additional features
# re-run tests on arrow crate to ensure
# all arrays are created correctly
cargo test --features=force_validate
cargo test --features=prettyprint
# run test on arrow crate with minimal set of features
cargo test --no-default-features
Expand Down
4 changes: 4 additions & 0 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ prettyprint = ["comfy-table"]
# target without assuming an environment containing JavaScript.
test_utils = ["rand"]
pyarrow = ["pyo3"]
# force_validate runs full data validation for all arrays that are created
# this is not enabled by default as it is too computationally expensive
# but is run as part of our CI checks
force_validate = []

[dev-dependencies]
rand = "0.8"
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,9 @@ mod tests {
#[should_panic(
expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_fixed_size_binary_array_from_incorrect_list_array() {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ mod tests {
#[test]
#[should_panic(expected = "BooleanArray data should contain a single buffer only \
(values buffer)")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_boolean_array_invalid_buffer_len() {
let data = unsafe {
ArrayData::builder(DataType::Boolean)
Expand Down
12 changes: 12 additions & 0 deletions arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,9 @@ mod tests {
#[should_panic(
expected = "FixedSizeListArray child array length should be a multiple of 3"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_fixed_size_list_array_unequal_children() {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
Expand Down Expand Up @@ -1065,6 +1068,9 @@ mod tests {
#[should_panic(
expected = "ListArray data should contain a single buffer only (value offsets)"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_invalid_buffer_len() {
let value_data = unsafe {
ArrayData::builder(DataType::Int32)
Expand All @@ -1087,6 +1093,9 @@ mod tests {
#[should_panic(
expected = "ListArray should contain a single child array (values array)"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_invalid_child_array_len() {
let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
let list_data_type =
Expand Down Expand Up @@ -1137,6 +1146,9 @@ mod tests {

#[test]
#[should_panic(expected = "memory is not aligned")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_alignment() {
let ptr = alloc::allocate_aligned::<u8>(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,9 @@ mod tests {
#[test]
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
(values buffer)")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_primitive_array_invalid_buffer_len() {
let buffer = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4]);
let data = unsafe {
Expand Down
12 changes: 9 additions & 3 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ impl ArrayData {
/// Note: This is a low level API and most users of the arrow
/// crate should create arrays using the methods in the `array`
/// module.
#[allow(clippy::let_and_return)]
pub unsafe fn new_unchecked(
data_type: DataType,
len: usize,
Expand All @@ -286,15 +287,20 @@ impl ArrayData {
Some(null_count) => null_count,
};
let null_bitmap = null_bit_buffer.map(Bitmap::from);
Self {
let new_self = Self {
data_type,
len,
null_count,
offset,
buffers,
child_data,
null_bitmap,
}
};

// Provide a force_validate mode
#[cfg(feature = "force_validate")]
new_self.validate_full().unwrap();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the validation call

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

new_self
}

/// Create a new ArrayData, validating that the provided buffers
Expand Down Expand Up @@ -2340,7 +2346,7 @@ mod tests {

#[test]
#[should_panic(
expected = "child #0 invalid: Invalid argument error: Value at position 1 out of bounds: -1 (should be in [0, 1])"
expected = "Value at position 1 out of bounds: -1 (should be in [0, 1])"
)]
/// test that children are validated recursively (aka bugs in child data of struct also are flagged)
fn test_validate_recursive() {
Expand Down
6 changes: 6 additions & 0 deletions arrow/src/compute/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,9 @@ mod tests {
}

#[test]
// Fails when validation enabled
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😢

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at least we know there is a problem. So it is progress 🤷 we'll get it fixed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

going to fix it at #1567

// https://github.com/apache/arrow-rs/issues/1547
#[cfg(not(feature = "force_validate"))]
fn test_filter_union_array_sparse() {
let mut builder = UnionBuilder::new_sparse(3);
builder.append::<Int32Type>("A", 1).unwrap();
Expand All @@ -1703,6 +1706,9 @@ mod tests {
}

#[test]
// Fails when validation enabled
// https://github.com/apache/arrow-rs/issues/1547
#[cfg(not(feature = "force_validate"))]
fn test_filter_union_array_sparse_with_nulls() {
let mut builder = UnionBuilder::new_sparse(4);
builder.append::<Int32Type>("A", 1).unwrap();
Expand Down
10 changes: 9 additions & 1 deletion arrow/src/compute/kernels/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,23 @@ fn generic_substring<OffsetSize: StringOffsetSizeTrait>(
///
/// # Warning
///
/// This function **might** return in invalid utf-8 format if the character length falls on a non-utf8 boundary.
/// This function **might** return in invalid utf-8 format if the
/// character length falls on a non-utf8 boundary, which we
/// [hope to fix](https://github.com/apache/arrow-rs/issues/1531)
/// in a future release.
///
/// ## Example of getting an invalid substring
/// ```
/// # // Doesn't pass due to https://github.com/apache/arrow-rs/issues/1531
/// # #[cfg(not(feature = "force_validate"))]
/// # {
/// # use arrow::array::StringArray;
/// # use arrow::compute::kernels::substring::substring;
/// let array = StringArray::from(vec![Some("E=mc²")]);
/// let result = substring(&array, -1, &None).unwrap();
/// let result = result.as_any().downcast_ref::<StringArray>().unwrap();
/// assert_eq!(result.value(0).as_bytes(), &[0x00B2]); // invalid utf-8 format
/// # }
/// ```
///
/// # Error
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/ipc/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,8 @@ mod tests {
}

#[test]
// https://github.com/apache/arrow-rs/issues/1548
#[cfg(not(feature = "force_validate"))]
fn projection_should_work() {
// complementary to the previous test
let testdata = crate::util::test_util::arrow_test_data();
Expand Down