Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions parquet-variant-compute/src/arrow_to_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

use crate::type_conversion::{decimal_to_variant_decimal, CastOptions};
use arrow::array::{
Array, AsArray, GenericBinaryArray, GenericListArray, GenericListViewArray, GenericStringArray,
OffsetSizeTrait, PrimitiveArray,
Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
GenericStringArray, OffsetSizeTrait, PrimitiveArray,
};
use arrow::compute::kernels::cast;
use arrow::datatypes::{
Expand Down Expand Up @@ -82,6 +82,7 @@ pub(crate) enum ArrowToVariantRowBuilder<'a> {
LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
Struct(StructArrowToVariantBuilder<'a>),
Map(MapArrowToVariantBuilder<'a>),
Union(UnionArrowToVariantBuilder<'a>),
Expand Down Expand Up @@ -138,6 +139,7 @@ impl<'a> ArrowToVariantRowBuilder<'a> {
LargeList(b) => b.append_row(builder, index),
ListView(b) => b.append_row(builder, index),
LargeListView(b) => b.append_row(builder, index),
FixedSizeList(b) => b.append_row(builder, index),
Struct(b) => b.append_row(builder, index),
Map(b) => b.append_row(builder, index),
Union(b) => b.append_row(builder, index),
Expand Down Expand Up @@ -255,6 +257,10 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
array.as_list_view(),
options,
)?),
DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
array.as_fixed_size_list(),
options,
)?),
DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
array.as_struct(),
options,
Expand All @@ -281,11 +287,6 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
)));
}
},
dt => {
return Err(ArrowError::CastError(format!(
"Unsupported data type for casting to Variant: {dt}",
)));
}
};
Ok(builder)
}
Expand Down Expand Up @@ -523,7 +524,8 @@ impl NullArrowToVariantBuilder {
}
}

/// Generic list builder for List, LargeList, ListView, and LargeListView types
/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
/// and FixedSizeList
pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
list_array: &'a L,
values_builder: Box<ArrowToVariantRowBuilder<'a>>,
Expand Down Expand Up @@ -599,6 +601,18 @@ impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
}
}

impl ListLikeArray for FixedSizeListArray {
fn values(&self) -> &dyn Array {
self.values()
}

fn element_range(&self, index: usize) -> Range<usize> {
let value_length = self.value_length().as_usize();
let offset = index * value_length;
offset..(offset + value_length)
}
}

/// Struct builder for StructArray
pub(crate) struct StructArrowToVariantBuilder<'a> {
struct_array: &'a arrow::array::StructArray,
Expand Down Expand Up @@ -645,8 +659,7 @@ impl<'a> StructArrowToVariantBuilder<'a> {

// Process each field
for (field_name, row_builder) in &mut self.field_builders {
let mut field_builder =
parquet_variant::ObjectFieldBuilder::new(field_name, &mut obj_builder);
let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
row_builder.append_row(&mut field_builder, index)?;
}

Expand Down
96 changes: 94 additions & 2 deletions parquet-variant-compute/src/cast_to_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ mod tests {
ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray, DurationMicrosecondArray,
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray,
FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder,
GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
FixedSizeBinaryBuilder, FixedSizeListBuilder, Float16Array, Float32Array, Float64Array,
GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeListArray,
LargeListViewBuilder, LargeStringArray, ListArray, ListViewBuilder, MapArray, NullArray,
StringArray, StringRunBuilder, StringViewArray, StructArray, Time32MillisecondArray,
Expand Down Expand Up @@ -1407,6 +1407,98 @@ mod tests {
);
}

#[test]
fn test_cast_to_variant_fixed_size_list() {
let mut builder = FixedSizeListBuilder::new(Int32Array::builder(0), 2);
builder.values().append_value(0);
builder.values().append_value(1);
builder.append(true); // First list: [0, 1]

builder.values().append_null();
builder.values().append_value(3);
builder.append(true); // Second list: [null, 3]

builder.values().append_value(4);
builder.values().append_null();
builder.append(false); // Third list: null

builder.values().append_nulls(2);
builder.append(true); // Last list: [null, null]

let fixed_size_list_array = builder.finish();

// Expected values
let (metadata, value) = {
let mut builder = VariantBuilder::new();
let mut list = builder.new_list();
list.append_value(0i32);
list.append_value(1i32);
list.finish();
builder.finish()
};
let variant0 = Variant::new(&metadata, &value);

let (metadata, value) = {
let mut builder = VariantBuilder::new();
let mut list = builder.new_list();
list.append_null();
list.append_value(3i32);
list.finish();
builder.finish()
};
let variant1 = Variant::new(&metadata, &value);

let (metadata, value) = {
let mut builder = VariantBuilder::new();
let mut list = builder.new_list();
list.append_null();
list.append_null();
list.finish();
builder.finish()
};
let variant3 = Variant::new(&metadata, &value);

run_test(
Arc::new(fixed_size_list_array),
vec![Some(variant0), Some(variant1), None, Some(variant3)],
);
}

#[test]
fn test_cast_to_variant_sliced_fixed_size_list() {
// Create a FixedSizeListArray with size 2
let mut builder = FixedSizeListBuilder::new(Int64Array::builder(0), 2);
builder.values().append_value(0);
builder.values().append_value(1);
builder.append(true); // First list: [0, 1]

builder.values().append_null();
builder.values().append_value(3);
builder.append(true); // Second list: [null, 3]

builder.values().append_value(4);
builder.values().append_null();
builder.append(false); // Third list: null

let fixed_size_list_array = builder.finish();

// Expected value for slice(1, 2) - should get the second and third elements
let (metadata, value) = {
let mut builder = VariantBuilder::new();
let mut list = builder.new_list();
list.append_null();
list.append_value(3i64);
list.finish();
builder.finish()
};
let variant = Variant::new(&metadata, &value);

run_test(
Arc::new(fixed_size_list_array.slice(1, 2)),
vec![Some(variant), None],
);
}

#[test]
fn test_cast_to_variant_struct() {
// Test a simple struct with two fields: id (int64) and age (int32)
Expand Down
Loading