Skip to content
11 changes: 11 additions & 0 deletions rust/arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,22 @@ fn concatenate(array: PyObject, py: Python) -> PyResult<PyObject> {
to_py(array, py)
}

/// Converts to rust and back to python
#[pyfunction]
fn round_trip(array: PyObject, py: Python) -> PyResult<PyObject> {
// import
let array = to_rust(array, py)?;

// export
to_py(array, py)
}

#[pymodule]
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(double))?;
m.add_wrapped(wrap_pyfunction!(double_py))?;
m.add_wrapped(wrap_pyfunction!(substring))?;
m.add_wrapped(wrap_pyfunction!(concatenate))?;
m.add_wrapped(wrap_pyfunction!(round_trip))?;
Ok(())
}
19 changes: 19 additions & 0 deletions rust/arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,22 @@ def test_time32_python(self):
del expected
# No leak of C++ memory
self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

def test_list_array(self):
"""
Python -> Rust -> Python
"""
old_allocated = pyarrow.total_allocated_bytes()
a = pyarrow.array([[], None, [1, 2], [4, 5, 6]], pyarrow.list_(pyarrow.int64()))
b = arrow_pyarrow_integration_testing.round_trip(a)

b.validate(full=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be interesting to add del a above this, to make sure that b keeps the data alive.

assert a.to_pylist() == b.to_pylist()
assert a.type == b.type
del a
del b
# No leak of C++ memory
self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())



56 changes: 49 additions & 7 deletions rust/arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,23 @@ use crate::{
};

use super::ArrayData;
use crate::datatypes::DataType;
use crate::ffi::ArrowArray;

impl TryFrom<ffi::ArrowArray> for ArrayData {
type Error = ArrowError;

fn try_from(value: ffi::ArrowArray) -> Result<Self> {
let data_type = value.data_type()?;
let child_data = value.children()?;

let child_type = if !child_data.is_empty() {
Some(child_data[0].data_type().clone())
} else {
None
};

let data_type = value.data_type(child_type)?;

let len = value.len();
let offset = value.offset();
let null_count = value.null_count();
Expand All @@ -44,9 +55,7 @@ impl TryFrom<ffi::ArrowArray> for ArrayData {
null_bit_buffer,
offset,
buffers,
// this is empty because ffi still does not support it.
// this is ok because FFI only supports datatypes without childs
vec![],
child_data,
))
}
}
Expand All @@ -55,11 +64,45 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
type Error = ArrowError;

fn try_from(value: ArrayData) -> Result<Self> {
// If parent is nullable, then children also must be nullable
// so we pass this nullable to the creation of hte child data
let nullable = match value.data_type() {
DataType::List(field) => field.is_nullable(),
DataType::LargeList(field) => field.is_nullable(),
_ => false,
};

let len = value.len();
let offset = value.offset() as usize;
let null_count = value.null_count();
let buffers = value.buffers().to_vec();
let null_buffer = value.null_buffer().cloned();
let child_data = value
.child_data()
.iter()
.map(|arr| {
let len = arr.len();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand: why isn't try_from called recursively?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because I otherwise cannot pass nullable: bool information from the parent. If should split this up in a function separate from try_from to make this more explicit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you'll need to handle recursive types more generally anyway. Think list(list(int8)).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that would not work like this. I will fix that.

let offset = arr.offset() as usize;
let null_count = arr.null_count();
let buffers = arr.buffers().to_vec();
let null_buffer = arr.null_buffer().cloned();

// Note: the nullable comes from the parent data.
unsafe {
ArrowArray::try_new(
arr.data_type(),
len,
null_count,
null_buffer,
offset,
buffers,
vec![],
nullable,
)
.expect("infallible")
}
})
.collect::<Vec<_>>();

unsafe {
ffi::ArrowArray::try_new(
Expand All @@ -69,9 +112,8 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
null_buffer,
offset,
buffers,
// this is empty because ffi still does not support it.
// this is ok because FFI only supports datatypes without childs
vec![],
child_data,
nullable,
)
}
}
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ impl Field {
};
Ok(Field {
name,
nullable,
data_type,
nullable,
dict_id,
dict_is_ordered,
metadata,
Expand Down
Loading