diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index 15d6ccd0045..602fd409c95 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -358,6 +358,21 @@ impl FixedSizeBinaryArray { } } +impl From>> for FixedSizeBinaryArray { + fn from(data: Vec>) -> Self { + let len = data.len(); + assert!(len > 0); + let size = data[0].len(); + assert!(data.iter().all(|item| item.len() == size)); + let data = data.into_iter().flatten().collect::>(); + let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as i32)) + .len(len) + .add_buffer(Buffer::from(&data)) + .build(); + FixedSizeBinaryArray::from(array_data) + } +} + impl From for FixedSizeBinaryArray { fn from(data: ArrayDataRef) -> Self { assert_eq!( diff --git a/rust/arrow/src/array/transform/fixed_binary.rs b/rust/arrow/src/array/transform/fixed_binary.rs new file mode 100644 index 00000000000..d287c0f7364 --- /dev/null +++ b/rust/arrow/src/array/transform/fixed_binary.rs @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{array::ArrayData, datatypes::DataType}; + +use super::{Extend, _MutableArrayData}; + +pub(super) fn build_extend(array: &ArrayData) -> Extend { + let size = match array.data_type() { + DataType::FixedSizeBinary(i) => *i as usize, + _ => unreachable!(), + }; + + let values = &array.buffers()[0].data()[array.offset() * size..]; + if array.null_count() == 0 { + // fast case where we can copy regions without null issues + Box::new( + move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { + let buffer = &mut mutable.buffers[0]; + buffer.extend_from_slice(&values[start * size..(start + len) * size]); + }, + ) + } else { + Box::new( + move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { + // nulls present: append item by item, ignoring null entries + let values_buffer = &mut mutable.buffers[0]; + + (start..start + len).for_each(|i| { + if array.is_valid(i) { + // append value + let bytes = &values[start * size..(start + len) * size]; + values_buffer.extend_from_slice(bytes); + } else { + values_buffer.extend(size); + } + }) + }, + ) + } +} + +pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { + let size = match mutable.data_type { + DataType::FixedSizeBinary(i) => i as usize, + _ => unreachable!(), + }; + + let values_buffer = &mut mutable.buffers[0]; + values_buffer.extend(len * size); +} diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 7b93e58e3fb..3930d241dcd 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -22,6 +22,7 @@ use crate::{buffer::MutableBuffer, datatypes::DataType, util::bit_util}; use super::{ArrayData, ArrayDataRef}; mod boolean; +mod fixed_binary; mod list; mod primitive; mod structure; @@ -219,10 +220,10 @@ fn build_extend(array: &ArrayData) -> Extend { _ => unreachable!(), }, DataType::Struct(_) => structure::build_extend(array), + DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array), DataType::Float16 => unreachable!(), /* DataType::Null => {} - DataType::FixedSizeBinary(_) => {} DataType::FixedSizeList(_, _) => {} DataType::Struct(_) => {} DataType::Union(_) => {} @@ -269,11 +270,10 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls { _ => unreachable!(), }, DataType::Struct(_) => structure::extend_nulls, - //DataType::Struct(_) => structure::build_extend(array), + DataType::FixedSizeBinary(_) => fixed_binary::extend_nulls, DataType::Float16 => unreachable!(), /* DataType::Null => {} - DataType::FixedSizeBinary(_) => {} DataType::FixedSizeList(_, _) => {} DataType::Union(_) => {} */ @@ -351,6 +351,9 @@ impl<'a> MutableArrayData<'a> { buffer.extend_from_slice(&[0i64].to_byte_slice()); vec![buffer] } + DataType::FixedSizeBinary(size) => { + vec![MutableBuffer::new(capacity * *size as usize)] + } DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() { DataType::UInt8 => vec![MutableBuffer::new(capacity * size_of::())], DataType::UInt16 => vec![MutableBuffer::new(capacity * size_of::())], @@ -484,9 +487,10 @@ mod tests { use super::*; use crate::array::{ - Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, Int16Array, - Int16Type, Int32Array, Int64Builder, ListBuilder, PrimitiveBuilder, StringArray, - StringDictionaryBuilder, StructArray, UInt8Array, + Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, + FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Builder, + ListBuilder, PrimitiveBuilder, StringArray, StringDictionaryBuilder, StructArray, + UInt8Array, }; use crate::{array::ListArray, error::Result}; @@ -842,4 +846,25 @@ mod tests { .unwrap(); assert_eq!(array, expected) } + + #[test] + fn test_binary_fixed_sized_offsets() { + let array = + FixedSizeBinaryArray::from(vec![vec![0, 0], vec![0, 1], vec![0, 2]]).data(); + let array = array.slice(1, 2); + // = [[0, 1], [0, 2]] due to the offset = 1 + + let arrays = vec![&array]; + + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 2); + mutable.extend(0, 0, 1); + + let result = mutable.freeze(); + let result = FixedSizeBinaryArray::from(Arc::new(result)); + + let expected = FixedSizeBinaryArray::from(vec![vec![0, 2], vec![0, 1]]); + assert_eq!(result, expected); + } }