Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 159 additions & 4 deletions rust/arrow/src/array/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use super::{ArrayData, ArrayDataRef};
mod boolean;
mod list;
mod primitive;
mod structure;
mod utils;
mod variable_size;

Expand Down Expand Up @@ -217,6 +218,7 @@ fn build_extend(array: &ArrayData) -> Extend {
DataType::Int64 => primitive::build_extend::<i64>(array),
_ => unreachable!(),
},
DataType::Struct(_) => structure::build_extend(array),
DataType::Float16 => unreachable!(),
/*
DataType::Null => {}
Expand Down Expand Up @@ -266,6 +268,7 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
DataType::Int64 => primitive::extend_nulls::<i64>,
_ => unreachable!(),
},
DataType::Struct(_) => structure::extend_nulls,
//DataType::Struct(_) => structure::build_extend(array),
DataType::Float16 => unreachable!(),
/*
Expand All @@ -285,10 +288,16 @@ impl<'a> MutableArrayData<'a> {
/// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls
/// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls].
/// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used.
pub fn new(arrays: Vec<&'a ArrayData>, use_nulls: bool, capacity: usize) -> Self {
pub fn new(arrays: Vec<&'a ArrayData>, mut use_nulls: bool, capacity: usize) -> Self {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#8848 appears to contain this same code -- though github seems to think merging will not be a problem

let data_type = arrays[0].data_type();
use crate::datatypes::*;

// if any of the arrays has nulls, insertions from any array requires setting bits
Copy link
Contributor

@Dandandan Dandandan Dec 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this code could move to the struct branch in the childdata match below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is unrelated to this PR: it was a commit from a bug fix, that this PR was build on top of. It is required also for non-struct arrays :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok 👍 better to keep it then maybe in that case.

// as there is at least one array with nulls.
if arrays.iter().any(|array| array.null_count() > 0) {
use_nulls = true;
};

let buffers = match &data_type {
DataType::Boolean => {
let bytes = bit_util::ceil(capacity, 8);
Expand Down Expand Up @@ -354,6 +363,7 @@ impl<'a> MutableArrayData<'a> {
_ => unreachable!(),
},
DataType::Float16 => unreachable!(),
DataType::Struct(_) => vec![],
_ => {
todo!("Take and filter operations still not supported for this datatype")
}
Expand Down Expand Up @@ -394,6 +404,15 @@ impl<'a> MutableArrayData<'a> {
// the dictionary type just appends keys and clones the values.
DataType::Dictionary(_, _) => vec![],
DataType::Float16 => unreachable!(),
DataType::Struct(fields) => (0..fields.len())
.map(|i| {
let child_arrays = arrays
.iter()
.map(|array| array.child_data()[i].as_ref())
.collect::<Vec<_>>();
MutableArrayData::new(child_arrays, use_nulls, capacity)
})
.collect::<Vec<_>>(),
_ => {
todo!("Take and filter operations still not supported for this datatype")
}
Expand Down Expand Up @@ -460,12 +479,14 @@ impl<'a> MutableArrayData<'a> {

#[cfg(test)]
mod tests {
use std::convert::TryFrom;

use super::*;

use crate::array::{
Array, ArrayDataRef, BooleanArray, DictionaryArray, Int16Array, Int16Type,
Int64Builder, ListBuilder, PrimitiveBuilder, StringArray,
StringDictionaryBuilder, UInt8Array,
Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, Int16Array,
Int16Type, Int32Array, Int64Builder, ListBuilder, PrimitiveBuilder, StringArray,
StringDictionaryBuilder, StructArray, UInt8Array,
};
use crate::{array::ListArray, error::Result};

Expand Down Expand Up @@ -614,6 +635,26 @@ mod tests {
assert_eq!(result, expected);
}

#[test]
fn test_multiple_with_nulls() {
let array1 = StringArray::from(vec!["hello", "world"]).data();
let array2 = StringArray::from(vec![Some("1"), None]).data();

let arrays = vec![array1.as_ref(), array2.as_ref()];

let mut mutable = MutableArrayData::new(arrays, false, 5);

mutable.extend(0, 0, 2);
mutable.extend(1, 0, 2);

let result = mutable.freeze();
let result = StringArray::from(Arc::new(result));

let expected =
StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]);
assert_eq!(result, expected);
}

#[test]
fn test_string_null_offset_nulls() {
let array =
Expand Down Expand Up @@ -687,4 +728,118 @@ mod tests {
let expected = Int16Array::from(vec![Some(1), None]);
assert_eq!(result.keys(), &expected);
}

#[test]
fn test_struct() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
None,
None,
Some("mark"),
Some("doe"),
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
]));

let array =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap()
.data();
let arrays = vec![array.as_ref()];
let mut mutable = MutableArrayData::new(arrays, false, 0);

mutable.extend(0, 1, 3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if ensuring the slice covers an actual string value would be useful (this slice just takes the two None, None values). It it it was like mutable.extend(0, 2, 4) that would also include Some("mark"). The same comment applies to test_struct_nulls as well

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sorry, I did not notice this comment. You are right, I will address it on a separate PR.

let data = mutable.freeze();
let array = StructArray::from(Arc::new(data));

let expected = StructArray::try_from(vec![
("f1", strings.slice(1, 2)),
("f2", ints.slice(1, 2)),
])
.unwrap();
assert_eq!(array, expected)
}

#[test]
fn test_struct_nulls() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
None,
None,
Some("mark"),
Some("doe"),
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![
Some(1),
Some(2),
None,
Some(4),
Some(5),
]));

let array =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap()
.data();
let arrays = vec![array.as_ref()];

let mut mutable = MutableArrayData::new(arrays, false, 0);

mutable.extend(0, 1, 3);
let data = mutable.freeze();
let array = StructArray::from(Arc::new(data));

let expected_string = Arc::new(StringArray::from(vec![None, None])) as ArrayRef;
let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef;

let expected =
StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
.unwrap();
assert_eq!(array, expected)
}

#[test]
fn test_struct_many() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
None,
None,
Some("mark"),
Some("doe"),
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![
Some(1),
Some(2),
None,
Some(4),
Some(5),
]));

let array =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap()
.data();
let arrays = vec![array.as_ref(), array.as_ref()];
let mut mutable = MutableArrayData::new(arrays, false, 0);

mutable.extend(0, 1, 3);
mutable.extend(1, 0, 2);
let data = mutable.freeze();
let array = StructArray::from(Arc::new(data));

let expected_string =
Arc::new(StringArray::from(vec![None, None, Some("joe"), None])) as ArrayRef;
let expected_int =
Arc::new(Int32Array::from(vec![Some(2), None, Some(1), Some(2)])) as ArrayRef;

let expected =
StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
.unwrap();
assert_eq!(array, expected)
}
}
64 changes: 64 additions & 0 deletions rust/arrow/src/array/transform/structure.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::array::ArrayData;

use super::{Extend, _MutableArrayData};

pub(super) fn build_extend(array: &ArrayData) -> Extend {
if array.null_count() == 0 {
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, start, start + len))
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
(start..start + len).for_each(|i| {
if array.is_valid(i) {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, i, i + 1))
} else {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend_nulls(1))
}
})
},
)
}
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend_nulls(len))
}