diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 9c4149ee5f2..7b93e58e3fb 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -24,6 +24,7 @@ use super::{ArrayData, ArrayDataRef}; mod boolean; mod list; mod primitive; +mod structure; mod utils; mod variable_size; @@ -217,6 +218,7 @@ fn build_extend(array: &ArrayData) -> Extend { DataType::Int64 => primitive::build_extend::(array), _ => unreachable!(), }, + DataType::Struct(_) => structure::build_extend(array), DataType::Float16 => unreachable!(), /* DataType::Null => {} @@ -266,6 +268,7 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls { DataType::Int64 => primitive::extend_nulls::, _ => unreachable!(), }, + DataType::Struct(_) => structure::extend_nulls, //DataType::Struct(_) => structure::build_extend(array), DataType::Float16 => unreachable!(), /* @@ -285,10 +288,16 @@ impl<'a> MutableArrayData<'a> { /// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls /// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls]. /// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used. - pub fn new(arrays: Vec<&'a ArrayData>, use_nulls: bool, capacity: usize) -> Self { + pub fn new(arrays: Vec<&'a ArrayData>, mut use_nulls: bool, capacity: usize) -> Self { let data_type = arrays[0].data_type(); use crate::datatypes::*; + // if any of the arrays has nulls, insertions from any array requires setting bits + // as there is at least one array with nulls. + if arrays.iter().any(|array| array.null_count() > 0) { + use_nulls = true; + }; + let buffers = match &data_type { DataType::Boolean => { let bytes = bit_util::ceil(capacity, 8); @@ -354,6 +363,7 @@ impl<'a> MutableArrayData<'a> { _ => unreachable!(), }, DataType::Float16 => unreachable!(), + DataType::Struct(_) => vec![], _ => { todo!("Take and filter operations still not supported for this datatype") } @@ -394,6 +404,15 @@ impl<'a> MutableArrayData<'a> { // the dictionary type just appends keys and clones the values. DataType::Dictionary(_, _) => vec![], DataType::Float16 => unreachable!(), + DataType::Struct(fields) => (0..fields.len()) + .map(|i| { + let child_arrays = arrays + .iter() + .map(|array| array.child_data()[i].as_ref()) + .collect::>(); + MutableArrayData::new(child_arrays, use_nulls, capacity) + }) + .collect::>(), _ => { todo!("Take and filter operations still not supported for this datatype") } @@ -460,12 +479,14 @@ impl<'a> MutableArrayData<'a> { #[cfg(test)] mod tests { + use std::convert::TryFrom; + use super::*; use crate::array::{ - Array, ArrayDataRef, BooleanArray, DictionaryArray, Int16Array, Int16Type, - Int64Builder, ListBuilder, PrimitiveBuilder, StringArray, - StringDictionaryBuilder, UInt8Array, + Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, Int16Array, + Int16Type, Int32Array, Int64Builder, ListBuilder, PrimitiveBuilder, StringArray, + StringDictionaryBuilder, StructArray, UInt8Array, }; use crate::{array::ListArray, error::Result}; @@ -614,6 +635,26 @@ mod tests { assert_eq!(result, expected); } + #[test] + fn test_multiple_with_nulls() { + let array1 = StringArray::from(vec!["hello", "world"]).data(); + let array2 = StringArray::from(vec![Some("1"), None]).data(); + + let arrays = vec![array1.as_ref(), array2.as_ref()]; + + let mut mutable = MutableArrayData::new(arrays, false, 5); + + mutable.extend(0, 0, 2); + mutable.extend(1, 0, 2); + + let result = mutable.freeze(); + let result = StringArray::from(Arc::new(result)); + + let expected = + StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]); + assert_eq!(result, expected); + } + #[test] fn test_string_null_offset_nulls() { let array = @@ -687,4 +728,118 @@ mod tests { let expected = Int16Array::from(vec![Some(1), None]); assert_eq!(result.keys(), &expected); } + + #[test] + fn test_struct() { + let strings: ArrayRef = Arc::new(StringArray::from(vec![ + Some("joe"), + None, + None, + Some("mark"), + Some("doe"), + ])); + let ints: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + Some(3), + Some(4), + Some(5), + ])); + + let array = + StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) + .unwrap() + .data(); + let arrays = vec![array.as_ref()]; + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 3); + let data = mutable.freeze(); + let array = StructArray::from(Arc::new(data)); + + let expected = StructArray::try_from(vec![ + ("f1", strings.slice(1, 2)), + ("f2", ints.slice(1, 2)), + ]) + .unwrap(); + assert_eq!(array, expected) + } + + #[test] + fn test_struct_nulls() { + let strings: ArrayRef = Arc::new(StringArray::from(vec![ + Some("joe"), + None, + None, + Some("mark"), + Some("doe"), + ])); + let ints: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + None, + Some(4), + Some(5), + ])); + + let array = + StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) + .unwrap() + .data(); + let arrays = vec![array.as_ref()]; + + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 3); + let data = mutable.freeze(); + let array = StructArray::from(Arc::new(data)); + + let expected_string = Arc::new(StringArray::from(vec![None, None])) as ArrayRef; + let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef; + + let expected = + StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)]) + .unwrap(); + assert_eq!(array, expected) + } + + #[test] + fn test_struct_many() { + let strings: ArrayRef = Arc::new(StringArray::from(vec![ + Some("joe"), + None, + None, + Some("mark"), + Some("doe"), + ])); + let ints: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + None, + Some(4), + Some(5), + ])); + + let array = + StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) + .unwrap() + .data(); + let arrays = vec![array.as_ref(), array.as_ref()]; + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 3); + mutable.extend(1, 0, 2); + let data = mutable.freeze(); + let array = StructArray::from(Arc::new(data)); + + let expected_string = + Arc::new(StringArray::from(vec![None, None, Some("joe"), None])) as ArrayRef; + let expected_int = + Arc::new(Int32Array::from(vec![Some(2), None, Some(1), Some(2)])) as ArrayRef; + + let expected = + StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)]) + .unwrap(); + assert_eq!(array, expected) + } } diff --git a/rust/arrow/src/array/transform/structure.rs b/rust/arrow/src/array/transform/structure.rs new file mode 100644 index 00000000000..5c41d76a7f1 --- /dev/null +++ b/rust/arrow/src/array/transform/structure.rs @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::array::ArrayData; + +use super::{Extend, _MutableArrayData}; + +pub(super) fn build_extend(array: &ArrayData) -> Extend { + if array.null_count() == 0 { + Box::new( + move |mutable: &mut _MutableArrayData, + index: usize, + start: usize, + len: usize| { + mutable + .child_data + .iter_mut() + .for_each(|child| child.extend(index, start, start + len)) + }, + ) + } else { + Box::new( + move |mutable: &mut _MutableArrayData, + index: usize, + start: usize, + len: usize| { + (start..start + len).for_each(|i| { + if array.is_valid(i) { + mutable + .child_data + .iter_mut() + .for_each(|child| child.extend(index, i, i + 1)) + } else { + mutable + .child_data + .iter_mut() + .for_each(|child| child.extend_nulls(1)) + } + }) + }, + ) + } +} + +pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { + mutable + .child_data + .iter_mut() + .for_each(|child| child.extend_nulls(len)) +}