diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 96e92676051d..f60688dc3337 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -518,12 +518,37 @@ impl Codec { } Codec::List(converter) => { let values = match array.data_type() { - DataType::List(_) => as_list_array(array).values(), - DataType::LargeList(_) => as_large_list_array(array).values(), - DataType::FixedSizeList(_, _) => as_fixed_size_list_array(array).values(), + DataType::List(_) => { + let list_array = as_list_array(array); + let first_offset = list_array.offsets()[0] as usize; + let last_offset = + list_array.offsets()[list_array.offsets().len() - 1] as usize; + + // values can include more data than referenced in the ListArray, only encode + // the referenced values. + list_array + .values() + .slice(first_offset, last_offset - first_offset) + } + DataType::LargeList(_) => { + let list_array = as_large_list_array(array); + + let first_offset = list_array.offsets()[0] as usize; + let last_offset = + list_array.offsets()[list_array.offsets().len() - 1] as usize; + + // values can include more data than referenced in the LargeListArray, only encode + // the referenced values. + list_array + .values() + .slice(first_offset, last_offset - first_offset) + } + DataType::FixedSizeList(_, _) => { + as_fixed_size_list_array(array).values().clone() + } _ => unreachable!(), }; - let rows = converter.convert_columns(&[values.clone()])?; + let rows = converter.convert_columns(&[values])?; Ok(Encoder::List(rows)) } Codec::RunEndEncoded(converter) => { diff --git a/arrow-row/src/list.rs b/arrow-row/src/list.rs index e9dc38e0fbe3..91c788fc8f41 100644 --- a/arrow-row/src/list.rs +++ b/arrow-row/src/list.rs @@ -27,14 +27,16 @@ pub fn compute_lengths( rows: &Rows, array: &GenericListArray, ) { + let shift = array.value_offsets()[0].as_usize(); + let offsets = array.value_offsets().windows(2); lengths .iter_mut() .zip(offsets) .enumerate() .for_each(|(idx, (length, offsets))| { - let start = offsets[0].as_usize(); - let end = offsets[1].as_usize(); + let start = offsets[0].as_usize() - shift; + let end = offsets[1].as_usize() - shift; let range = array.is_valid(idx).then_some(start..end); *length += encoded_len(rows, range); }); @@ -61,14 +63,16 @@ pub fn encode( opts: SortOptions, array: &GenericListArray, ) { + let shift = array.value_offsets()[0].as_usize(); + offsets .iter_mut() .skip(1) .zip(array.value_offsets().windows(2)) .enumerate() .for_each(|(idx, (offset, offsets))| { - let start = offsets[0].as_usize(); - let end = offsets[1].as_usize(); + let start = offsets[0].as_usize() - shift; + let end = offsets[1].as_usize() - shift; let range = array.is_valid(idx).then_some(start..end); let out = &mut data[*offset..]; *offset += encode_one(out, rows, range, opts)