Skip to content

Commit

Permalink
Update py03 from 0.20 to 0.21 (#5566)
Browse files Browse the repository at this point in the history
* Update py03 from 0.20 to 0.21

* Bump pyo3 in arrow-pyarrow-integration-testing

* Update pyarrow API to align with pyo3 0.21 changes

* Fix arrow-pyarrow-integration-testing clippy

* Minor

* Fix typo

* Use PyBackedStr when extracting

* Bump to pyo3 0.21.1

* Trigger
  • Loading branch information
Jefffrey authored Apr 5, 2024
1 parent bc2a73d commit 1b0ef02
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 61 deletions.
2 changes: 1 addition & 1 deletion arrow-pyarrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ crate-type = ["cdylib"]

[dependencies]
arrow = { path = "../arrow", features = ["pyarrow"] }
pyo3 = { version = "0.20", features = ["extension-module"] }
pyo3 = { version = "0.21.1", features = ["extension-module"] }
25 changes: 9 additions & 16 deletions arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ fn to_py_err(err: ArrowError) -> PyErr {

/// Returns `array + array` of an int64 array.
#[pyfunction]
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
// import
let array = make_array(ArrayData::from_pyarrow(array)?);
let array = make_array(ArrayData::from_pyarrow_bound(&array)?);

// perform some operation
let array = array
Expand All @@ -60,15 +60,15 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
/// calls a lambda function that receives and returns an array
/// whose result must be the array multiplied by two
#[pyfunction]
fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
// create
let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;

// to py
let pyarray = array.to_data().to_pyarrow(py)?;
let pyarray = lambda.call1((pyarray,))?;
let array = make_array(ArrayData::from_pyarrow(pyarray)?);
let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?);

Ok(array == expected)
}
Expand All @@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType<DataType>, py: Python) -> PyResult<PyO

/// Returns the substring
#[pyfunction]
fn substring(
array: PyArrowType<ArrayData>,
start: i64,
) -> PyResult<PyArrowType<ArrayData>> {
fn substring(array: PyArrowType<ArrayData>, start: i64) -> PyResult<PyArrowType<ArrayData>> {
// import
let array = make_array(array.0);

// substring
let array =
kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;

Ok(array.to_data().into())
}
Expand All @@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject>
let array = make_array(array.0);

// concat
let array =
kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;

array.to_data().to_pyarrow(py)
}
Expand All @@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayDa
}

#[pyfunction]
fn round_trip_record_batch(
obj: PyArrowType<RecordBatch>,
) -> PyResult<PyArrowType<RecordBatch>> {
fn round_trip_record_batch(obj: PyArrowType<RecordBatch>) -> PyResult<PyArrowType<RecordBatch>> {
Ok(obj)
}

Expand Down Expand Up @@ -168,7 +161,7 @@ fn boxed_reader_roundtrip(
}

#[pymodule]
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(double))?;
m.add_wrapped(wrap_pyfunction!(double_py))?;
m.add_wrapped(wrap_pyfunction!(make_empty_array))?;
Expand Down
2 changes: 1 addition & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ arrow-select = { workspace = true }
arrow-string = { workspace = true }

rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
pyo3 = { version = "0.20", default-features = false, optional = true }
pyo3 = { version = "0.21.1", default-features = false, optional = true }

[package.metadata.docs.rs]
features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
Expand Down
95 changes: 54 additions & 41 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ use pyo3::exceptions::{PyTypeError, PyValueError};
use pyo3::ffi::Py_uintptr_t;
use pyo3::import_exception;
use pyo3::prelude::*;
use pyo3::pybacked::PyBackedStr;
use pyo3::types::{PyCapsule, PyList, PyTuple};

use crate::array::{make_array, ArrayData};
Expand All @@ -82,7 +83,12 @@ fn to_py_err(err: ArrowError) -> PyErr {
}

pub trait FromPyArrow: Sized {
fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
#[deprecated(since = "52.0.0", note = "Use from_pyarrow_bound")]
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
Self::from_pyarrow_bound(&value.as_borrowed())
}

fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self>;
}

/// Create a new PyArrow object from a arrow-rs type.
Expand All @@ -101,15 +107,17 @@ impl<T: ToPyArrow> IntoPyArrow for T {
}
}

fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
let pyarrow = PyModule::import(value.py(), "pyarrow")?;
fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
let class = pyarrow.getattr(expected)?;
if !value.is_instance(class)? {
let expected_module = class.getattr("__module__")?.extract::<&str>()?;
let expected_name = class.getattr("__name__")?.extract::<&str>()?;
if !value.is_instance(&class)? {
let expected_module = class.getattr("__module__")?.extract::<PyBackedStr>()?;
let expected_name = class.getattr("__name__")?.extract::<PyBackedStr>()?;
let found_class = value.get_type();
let found_module = found_class.getattr("__module__")?.extract::<&str>()?;
let found_name = found_class.getattr("__name__")?.extract::<&str>()?;
let found_module = found_class
.getattr("__module__")?
.extract::<PyBackedStr>()?;
let found_name = found_class.getattr("__name__")?.extract::<PyBackedStr>()?;
return Err(PyTypeError::new_err(format!(
"Expected instance of {}.{}, got {}.{}",
expected_module, expected_name, found_module, found_name
Expand All @@ -118,7 +126,7 @@ fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
Ok(())
}

fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
let capsule_name = capsule.name()?;
if capsule_name.is_none() {
return Err(PyValueError::new_err(
Expand All @@ -138,13 +146,13 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
}

impl FromPyArrow for DataType {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -166,21 +174,21 @@ impl ToPyArrow for DataType {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("DataType")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
}
}

impl FromPyArrow for Field {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -202,21 +210,21 @@ impl ToPyArrow for Field {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Field")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
}
}

impl FromPyArrow for Schema {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -238,15 +246,15 @@ impl ToPyArrow for Schema {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Schema")?;
let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(schema.into())
}
}

impl FromPyArrow for ArrayData {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand All @@ -259,8 +267,10 @@ impl FromPyArrow for ArrayData {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule = tuple.get_item(0)?;
let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
let array_capsule = tuple.get_item(1)?;
let array_capsule = array_capsule.downcast::<PyCapsule>()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -296,7 +306,7 @@ impl ToPyArrow for ArrayData {
let array = FFI_ArrowArray::new(self);
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;

let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Array")?;
let array = class.call_method1(
"_import_from_c",
Expand All @@ -310,9 +320,9 @@ impl ToPyArrow for ArrayData {
}

impl<T: FromPyArrow> FromPyArrow for Vec<T> {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
let list = value.downcast::<PyList>()?;
list.iter().map(|x| T::from_pyarrow(x)).collect()
list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
}
}

Expand All @@ -327,7 +337,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
}

impl FromPyArrow for RecordBatch {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand All @@ -340,8 +350,10 @@ impl FromPyArrow for RecordBatch {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule = tuple.get_item(0)?;
let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
let array_capsule = tuple.get_item(1)?;
let array_capsule = array_capsule.downcast::<PyCapsule>()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -370,12 +382,13 @@ impl FromPyArrow for RecordBatch {
validate_class("RecordBatch", value)?;
// TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches
let schema = value.getattr("schema")?;
let schema = Arc::new(Schema::from_pyarrow(schema)?);
let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?);

let arrays = value.getattr("columns")?.downcast::<PyList>()?;
let arrays = value.getattr("columns")?;
let arrays = arrays
.downcast::<PyList>()?
.iter()
.map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?)))
.map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
.collect::<PyResult<_>>()?;

let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
Expand All @@ -395,13 +408,13 @@ impl ToPyArrow for RecordBatch {

/// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader].
impl FromPyArrow for ArrowArrayStreamReader {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_stream__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_array_stream")?;

let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
Expand All @@ -421,7 +434,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
// In particular, `_export_to_c` can go out of bounds
let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]);
value.call_method1("_export_to_c", args)?;

let stream_reader = ArrowArrayStreamReader::try_new(stream)
Expand All @@ -439,9 +452,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
let mut stream = FFI_ArrowArrayStream::new(self);

let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;

Ok(PyObject::from(reader))
Expand All @@ -463,8 +476,8 @@ impl IntoPyArrow for ArrowArrayStreamReader {
pub struct PyArrowType<T>(pub T);

impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
fn extract(value: &'source PyAny) -> PyResult<Self> {
Ok(Self(T::from_pyarrow(value)?))
fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
Ok(Self(T::from_pyarrow_bound(value)?))
}
}

Expand Down
4 changes: 2 additions & 2 deletions arrow/tests/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ fn test_to_pyarrow() {

let res = Python::with_gil(|py| {
let py_input = input.to_pyarrow(py)?;
let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
let py_records = records.to_pyarrow(py)?;
RecordBatch::from_pyarrow(py_records.as_ref(py))
RecordBatch::from_pyarrow_bound(py_records.bind(py))
})
.unwrap();

Expand Down

0 comments on commit 1b0ef02

Please sign in to comment.