Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,21 @@ impl FixedSizeBinaryArray {
}
}

impl From<Vec<Vec<u8>>> for FixedSizeBinaryArray {
fn from(data: Vec<Vec<u8>>) -> Self {
let len = data.len();
assert!(len > 0);
let size = data[0].len();
assert!(data.iter().all(|item| item.len() == size));
let data = data.into_iter().flatten().collect::<Vec<_>>();
let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as i32))
.len(len)
.add_buffer(Buffer::from(&data))
.build();
FixedSizeBinaryArray::from(array_data)
}
}

impl From<ArrayDataRef> for FixedSizeBinaryArray {
fn from(data: ArrayDataRef) -> Self {
assert_eq!(
Expand Down
65 changes: 65 additions & 0 deletions rust/arrow/src/array/transform/fixed_binary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::{array::ArrayData, datatypes::DataType};

use super::{Extend, _MutableArrayData};

pub(super) fn build_extend(array: &ArrayData) -> Extend {
let size = match array.data_type() {
DataType::FixedSizeBinary(i) => *i as usize,
_ => unreachable!(),
};

let values = &array.buffers()[0].data()[array.offset() * size..];
if array.null_count() == 0 {
// fast case where we can copy regions without null issues
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let buffer = &mut mutable.buffers[0];
buffer.extend_from_slice(&values[start * size..(start + len) * size]);
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
// nulls present: append item by item, ignoring null entries
let values_buffer = &mut mutable.buffers[0];

(start..start + len).for_each(|i| {
if array.is_valid(i) {
// append value
let bytes = &values[start * size..(start + len) * size];
values_buffer.extend_from_slice(bytes);
} else {
values_buffer.extend(size);
}
})
},
)
}
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
let size = match mutable.data_type {
DataType::FixedSizeBinary(i) => i as usize,
_ => unreachable!(),
};

let values_buffer = &mut mutable.buffers[0];
values_buffer.extend(len * size);
}
37 changes: 31 additions & 6 deletions rust/arrow/src/array/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::{buffer::MutableBuffer, datatypes::DataType, util::bit_util};
use super::{ArrayData, ArrayDataRef};

mod boolean;
mod fixed_binary;
mod list;
mod primitive;
mod structure;
Expand Down Expand Up @@ -219,10 +220,10 @@ fn build_extend(array: &ArrayData) -> Extend {
_ => unreachable!(),
},
DataType::Struct(_) => structure::build_extend(array),
DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array),
DataType::Float16 => unreachable!(),
/*
DataType::Null => {}
DataType::FixedSizeBinary(_) => {}
DataType::FixedSizeList(_, _) => {}
DataType::Struct(_) => {}
DataType::Union(_) => {}
Expand Down Expand Up @@ -269,11 +270,10 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
_ => unreachable!(),
},
DataType::Struct(_) => structure::extend_nulls,
//DataType::Struct(_) => structure::build_extend(array),
DataType::FixedSizeBinary(_) => fixed_binary::extend_nulls,
DataType::Float16 => unreachable!(),
/*
DataType::Null => {}
DataType::FixedSizeBinary(_) => {}
DataType::FixedSizeList(_, _) => {}
DataType::Union(_) => {}
*/
Expand Down Expand Up @@ -351,6 +351,9 @@ impl<'a> MutableArrayData<'a> {
buffer.extend_from_slice(&[0i64].to_byte_slice());
vec![buffer]
}
DataType::FixedSizeBinary(size) => {
vec![MutableBuffer::new(capacity * *size as usize)]
}
DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
DataType::UInt8 => vec![MutableBuffer::new(capacity * size_of::<u8>())],
DataType::UInt16 => vec![MutableBuffer::new(capacity * size_of::<u16>())],
Expand Down Expand Up @@ -484,9 +487,10 @@ mod tests {
use super::*;

use crate::array::{
Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, Int16Array,
Int16Type, Int32Array, Int64Builder, ListBuilder, PrimitiveBuilder, StringArray,
StringDictionaryBuilder, StructArray, UInt8Array,
Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray,
FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Builder,
ListBuilder, PrimitiveBuilder, StringArray, StringDictionaryBuilder, StructArray,
UInt8Array,
};
use crate::{array::ListArray, error::Result};

Expand Down Expand Up @@ -842,4 +846,25 @@ mod tests {
.unwrap();
assert_eq!(array, expected)
}

#[test]
fn test_binary_fixed_sized_offsets() {
let array =
FixedSizeBinaryArray::from(vec![vec![0, 0], vec![0, 1], vec![0, 2]]).data();
let array = array.slice(1, 2);
// = [[0, 1], [0, 2]] due to the offset = 1

let arrays = vec![&array];

let mut mutable = MutableArrayData::new(arrays, false, 0);

mutable.extend(0, 1, 2);
mutable.extend(0, 0, 1);

let result = mutable.freeze();
let result = FixedSizeBinaryArray::from(Arc::new(result));

let expected = FixedSizeBinaryArray::from(vec![vec![0, 2], vec![0, 1]]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am having trouble figuring out why this is the expected output. I expected something like

        let expected = FixedSizeBinaryArray::from(vec![vec![0, 1], vec![0, 0]]);

As I read

        mutable.extend(0, 1, 2); // --> pick element 1 (aka vec[0, 1])
        mutable.extend(0, 0, 1); // --> pick element 0 (aka vec[0, 0])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sorry, I should have documented the test better. I added a comment to it. Does it became understandable with the comment? The idea is to check that this works with arrays with offsets.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, man, I missed the call to let array = array.slice(1, 2); 👍 thanks @jorgecarleitao

assert_eq!(result, expected);
}
}