diff --git a/h3ronpy/CHANGES.rst b/h3ronpy/CHANGES.rst index 40484c4..5c80506 100644 --- a/h3ronpy/CHANGES.rst +++ b/h3ronpy/CHANGES.rst @@ -13,6 +13,7 @@ Unreleased ---------- - The minimum supported python version is now 3.9. +- Migrate to the Arrow PyCapsule Interface, supported by pyarrow, polars (v1.2+), pandas (v2.2+), nanoarrow, ... 0.21.1 - 2024-10-04 ------------------- diff --git a/h3ronpy/Cargo.toml b/h3ronpy/Cargo.toml index 4a39a44..1bfed0d 100644 --- a/h3ronpy/Cargo.toml +++ b/h3ronpy/Cargo.toml @@ -14,7 +14,7 @@ name = "h3ronpy" crate-type = ["cdylib"] [dependencies] -arrow = { workspace = true, features = ["pyarrow"] } +arrow = { workspace = true} env_logger = "^0.11" geo-types = { workspace = true } geo = { workspace = true } @@ -26,5 +26,6 @@ numpy = "0.21" ordered-float = ">=2.0.1" py_geo_interface = { version = "0.8", features = ["f64", "wkb"] } pyo3 = { version = "^0.21", features = ["extension-module", "abi3", "abi3-py39"] } +pyo3-arrow = "0.2" rasterh3 = { version = "^0.8", features = ["rayon"] } rayon = { workspace = true } diff --git a/h3ronpy/pyproject.toml b/h3ronpy/pyproject.toml index 412f92e..79b9de1 100644 --- a/h3ronpy/pyproject.toml +++ b/h3ronpy/pyproject.toml @@ -21,7 +21,7 @@ readme = "../README.rst" dependencies = [ "numpy<2", - "pyarrow>=17.0" + "arro3-core" ] classifiers = [ "Programming Language :: Python :: 3", @@ -32,11 +32,15 @@ classifiers = [ [project.optional-dependencies] polars = [ - "polars>=1" + "polars>=1.2" ] pandas = [ + "pyarrow>=17.0", "geopandas>=1" ] +pyarrow = [ + "pyarrow>=17.0" +] test = [ "rasterio", "Shapely>=1.7", diff --git a/h3ronpy/python/h3ronpy/arrow/__init__.py b/h3ronpy/python/h3ronpy/arrow/__init__.py index c99f61a..26c2e6a 100644 --- a/h3ronpy/python/h3ronpy/arrow/__init__.py +++ b/h3ronpy/python/h3ronpy/arrow/__init__.py @@ -25,9 +25,6 @@ def _to_arrow_array(arr, dtype) -> pa.Array: return converted -def _to_uint64_array(arr) -> pa.Array: - return _to_arrow_array(arr, pa.uint64()) - def change_resolution(arr, resolution: int) -> pa.Array: """ @@ -38,7 +35,7 @@ def change_resolution(arr, resolution: int) -> pa.Array: Invalid/empty values are omitted. """ - return op.change_resolution(_to_uint64_array(arr), resolution) + return op.change_resolution(arr, resolution) def change_resolution_list(arr, resolution: int) -> pa.Array: @@ -50,7 +47,7 @@ def change_resolution_list(arr, resolution: int) -> pa.Array: Invalid/empty values are preserved as such. """ - return op.change_resolution_list(_to_uint64_array(arr), resolution) + return op.change_resolution_list(arr, resolution) def change_resolution_paired(arr, resolution: int) -> pa.Table: @@ -61,7 +58,7 @@ def change_resolution_paired(arr, resolution: int) -> pa.Table: This can be helpful when joining data in different resolutions via dataframe libraries """ - return op.change_resolution_paired(_to_uint64_array(arr), resolution) + return op.change_resolution_paired(arr, resolution) def cells_resolution(arr) -> pa.Array: @@ -72,7 +69,7 @@ def cells_resolution(arr) -> pa.Array: :param arr: :return: """ - return op.cells_resolution(_to_uint64_array(arr)) + return op.cells_resolution(arr) def cells_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: @@ -121,7 +118,7 @@ def compact(arr, mixed_resolutions: bool = False) -> pa.Array: The cells are expected to be of the same resolution, otherwise this operation will fail unless `mixed_resolutions` is set to True. Setting this may lead to slight slow-downs. """ - return op.compact(_to_uint64_array(arr), mixed_resolutions=mixed_resolutions) + return op.compact(arr, mixed_resolutions=mixed_resolutions) def uncompact(arr, target_resolution: int) -> pa.Array: @@ -131,12 +128,12 @@ def uncompact(arr, target_resolution: int) -> pa.Array: All higher resolution cells contained in the input array than the given `target_resolution` will be omitted from the output. """ - return op.uncompact(_to_uint64_array(arr), target_resolution) + return op.uncompact(arr, target_resolution) def _make_h3index_valid_wrapper(fn, h3index_name, wrapper_name): def valid_wrapper(arr, booleanarray: bool = False) -> pa.Array: - return fn(_to_uint64_array(arr), booleanarray=booleanarray) + return fn(arr, booleanarray=booleanarray) valid_wrapper.__doc__ = f""" Validate an array of potentially invalid {h3index_name} values by returning a new @@ -155,46 +152,46 @@ def valid_wrapper(arr, booleanarray: bool = False) -> pa.Array: def grid_disk(cellarray, k: int, flatten: bool = False) -> Union[pa.ListArray, pa.Array]: - return op.grid_disk(_to_uint64_array(cellarray), k, flatten=flatten) + return op.grid_disk(cellarray, k, flatten=flatten) def grid_disk_distances(cellarray, k: int, flatten: bool = False) -> pa.Table: - return op.grid_disk_distances(_to_uint64_array(cellarray), k, flatten=flatten) + return op.grid_disk_distances(cellarray, k, flatten=flatten) def grid_disk_aggregate_k(cellarray, k: int, aggregation_method: str) -> pa.Table: """ Valid values for `aggregation_method` are `"min"` and `"max"`. """ - return op.grid_disk_aggregate_k(_to_uint64_array(cellarray), k, aggregation_method) + return op.grid_disk_aggregate_k(cellarray, k, aggregation_method) def grid_ring_distances(cellarray, k_min: int, k_max: int, flatten: bool = False) -> pa.Table: - return op.grid_ring_distances(_to_uint64_array(cellarray), k_min, k_max, flatten=flatten) + return op.grid_ring_distances(cellarray, k_min, k_max, flatten=flatten) def cells_area_m2(cellarray) -> pa.Array: - return op.cells_area_m2(_to_uint64_array(cellarray)) + return op.cells_area_m2(cellarray) def cells_area_km2(cellarray) -> pa.Array: - return op.cells_area_km2(_to_uint64_array(cellarray)) + return op.cells_area_km2(cellarray) def cells_area_rads2(cellarray) -> pa.Array: - return op.cells_area_rads2(_to_uint64_array(cellarray)) + return op.cells_area_rads2(cellarray) def cells_to_string(cellarray) -> pa.Array: - return op.cells_to_string(_to_uint64_array(cellarray)) + return op.cells_to_string(cellarray) def vertexes_to_string(vertexesarray) -> pa.Array: - return op.vertexes_to_string(_to_uint64_array(vertexesarray)) + return op.vertexes_to_string(vertexesarray) def directededges_to_string(directededgearray) -> pa.Array: - return op.directededges_to_string(_to_uint64_array(directededgearray)) + return op.directededges_to_string(directededgearray) def cells_to_localij(cellarray, anchor, set_failing_to_invalid: bool = False) -> pa.Table: @@ -213,9 +210,7 @@ def cells_to_localij(cellarray, anchor, set_failing_to_invalid: bool = False) -> successfully. When `set_failing_to_invalid` is set to True, only the failing positions of the output arrays will be set to null. """ - if type(anchor) is not int: - anchor = _to_uint64_array(anchor) - return op.cells_to_localij(_to_uint64_array(cellarray), anchor, set_failing_to_invalid=set_failing_to_invalid) + return op.cells_to_localij(cellarray, anchor, set_failing_to_invalid=set_failing_to_invalid) def localij_to_cells(anchor, i, j, set_failing_to_invalid: bool = False) -> pa.Array: @@ -226,8 +221,6 @@ def localij_to_cells(anchor, i, j, set_failing_to_invalid: bool = False) -> pa.A successfully. When `set_failing_to_invalid` is set to True, only the failing positions of the output arrays will be set to null. """ - if type(anchor) is not int: - anchor = _to_uint64_array(anchor) return op.localij_to_cells( anchor, _to_arrow_array(i, pa.int32()), diff --git a/h3ronpy/python/h3ronpy/arrow/raster.py b/h3ronpy/python/h3ronpy/arrow/raster.py index 1dbb03b..081c705 100644 --- a/h3ronpy/python/h3ronpy/arrow/raster.py +++ b/h3ronpy/python/h3ronpy/arrow/raster.py @@ -30,7 +30,7 @@ from h3ronpy.h3ronpyrs import raster from .. import DEFAULT_CELL_COLUMN_NAME -from . import _to_uint64_array, _to_arrow_array +from . import _to_arrow_array from .vector import cells_to_wkb_polygons, cells_bounds import numpy as np import pyarrow as pa @@ -95,7 +95,7 @@ def raster_to_dataframe( :param h3_resolution: Target h3 resolution :param compact: Return compacted h3 indexes (see H3 docs). This results in mixed H3 resolutions, but also can reduce the amount of required memory. - :return: Tuple of arrow arrays + :return: arrow table """ dtype = in_raster.dtype @@ -123,10 +123,7 @@ def raster_to_dataframe( else: raise NotImplementedError(f"no raster_to_h3 implementation for dtype {dtype.name}") - return pa.Table.from_arrays( - arrays=func(in_raster, _get_transform(transform), h3_resolution, axis_order, compact, nodata_value), - names=["value", DEFAULT_CELL_COLUMN_NAME], - ) + return func(in_raster, _get_transform(transform), h3_resolution, axis_order, compact, nodata_value) def rasterize_cells( @@ -149,7 +146,6 @@ def rasterize_cells( from rasterio.features import rasterize import shapely - cells = _to_uint64_array(cells) values = _to_arrow_array(values, None) if len(cells) != len(values): @@ -190,7 +186,7 @@ def rasterize_cells( # linking cells should speed up rendering in case of large homogenous areas polygons = cells_to_wkb_polygons(cells, link_cells=True) - polygons = [shapely.from_wkb(polygon.as_py()) for polygon in polygons.filter(polygons.is_valid())] + polygons = [shapely.from_wkb(polygon.as_py()) for polygon in polygons if polygon] # draw rasterize( diff --git a/h3ronpy/python/h3ronpy/arrow/vector.py b/h3ronpy/python/h3ronpy/arrow/vector.py index c8a3275..6195365 100644 --- a/h3ronpy/python/h3ronpy/arrow/vector.py +++ b/h3ronpy/python/h3ronpy/arrow/vector.py @@ -1,6 +1,6 @@ from h3ronpy.h3ronpyrs import vector from .. import ContainmentMode -from . import _to_uint64_array, _HAS_POLARS, _to_arrow_array +from . import _HAS_POLARS, _to_arrow_array from typing import Optional, Tuple, Union import pyarrow as pa @@ -9,7 +9,7 @@ def cells_to_coordinates(arr, radians: bool = False) -> pa.Table: """ convert to point coordinates in degrees """ - return vector.cells_to_coordinates(_to_uint64_array(arr), radians=radians) + return vector.cells_to_coordinates(arr, radians=radians) def coordinates_to_cells(latarray, lngarray, resarray, radians: bool = False) -> pa.Array: @@ -35,7 +35,7 @@ def cells_bounds(arr) -> Optional[Tuple]: """ Bounds of the complete array as a tuple `(minx, miny, maxx, maxy)`. """ - return vector.cells_bounds(_to_uint64_array(arr)) + return vector.cells_bounds(arr) def cells_bounds_arrays(arr) -> pa.Table: @@ -43,7 +43,7 @@ def cells_bounds_arrays(arr) -> pa.Table: Build a table/dataframe with the columns `minx`, `miny`, `maxx` and `maxy` containing the bounds of the individual cells from the input array. """ - return vector.cells_bounds_arrays(_to_uint64_array(arr)) + return vector.cells_bounds_arrays(arr) def cells_to_wkb_polygons(arr, radians: bool = False, link_cells: bool = False) -> pa.Array: @@ -57,7 +57,7 @@ def cells_to_wkb_polygons(arr, radians: bool = False, link_cells: bool = False) :param radians: Generate geometries using radians instead of degrees :param link_cells: Combine neighboring cells into a single polygon geometry. """ - return vector.cells_to_wkb_polygons(_to_uint64_array(arr), radians=radians, link_cells=link_cells) + return vector.cells_to_wkb_polygons(arr, radians=radians, link_cells=link_cells) def cells_to_wkb_points(arr, radians: bool = False) -> pa.Array: @@ -69,7 +69,7 @@ def cells_to_wkb_points(arr, radians: bool = False) -> pa.Array: :param: arr: The cell array :param radians: Generate geometries using radians instead of degrees """ - return vector.cells_to_wkb_points(_to_uint64_array(arr), radians=radians) + return vector.cells_to_wkb_points(arr, radians=radians) def vertexes_to_wkb_points(arr, radians: bool = False) -> pa.Array: @@ -81,7 +81,7 @@ def vertexes_to_wkb_points(arr, radians: bool = False) -> pa.Array: :param: arr: The vertex array :param radians: Generate geometries using radians instead of degrees """ - return vector.vertexes_to_wkb_points(_to_uint64_array(arr), radians=radians) + return vector.vertexes_to_wkb_points(arr, radians=radians) def directededges_to_wkb_linestrings(arr, radians: bool = False) -> pa.Array: @@ -93,7 +93,7 @@ def directededges_to_wkb_linestrings(arr, radians: bool = False) -> pa.Array: :param: arr: The directed edge array :param radians: Generate geometries using radians instead of degrees """ - return vector.directededges_to_wkb_linestrings(_to_uint64_array(arr), radians=radians) + return vector.directededges_to_wkb_linestrings(arr, radians=radians) def wkb_to_cells( diff --git a/h3ronpy/python/h3ronpy/pandas/__init__.py b/h3ronpy/python/h3ronpy/pandas/__init__.py index 99af347..b2bf87c 100644 --- a/h3ronpy/python/h3ronpy/pandas/__init__.py +++ b/h3ronpy/python/h3ronpy/pandas/__init__.py @@ -11,17 +11,23 @@ from .. import arrow as _arrow import pyarrow as pa from functools import wraps +from arro3.core import Array, Table import pandas as pd +import pyarrow as pa def _wrap(func, ret_type=None): @wraps(func) def wrapper(*args, **kw): result = func(*args, **kw) - if isinstance(result, pa.Table): - return result.to_pandas(split_blocks=True, self_destruct=True) - elif isinstance(result, pa.Array): - return result.to_pandas() + if isinstance(result, Array): + return pa.array(result).to_pandas() + elif isinstance(result, Table): + return pa.table(result).to_pandas(split_blocks=True, self_destruct=True) + #elif isinstance(result, pa.Table): + # return result.to_pandas(split_blocks=True, self_destruct=True) + #elif isinstance(result, pa.Array): + # return result.to_pandas() return result if ret_type: diff --git a/h3ronpy/python/h3ronpy/pandas/raster.py b/h3ronpy/python/h3ronpy/pandas/raster.py index 0cbea5f..27a73a6 100644 --- a/h3ronpy/python/h3ronpy/pandas/raster.py +++ b/h3ronpy/python/h3ronpy/pandas/raster.py @@ -1,6 +1,7 @@ import geopandas as gpd import numpy as np import pandas as pd +import pyarrow as pa import typing from ..arrow import raster as arrow_raster @@ -39,9 +40,9 @@ def raster_to_dataframe( :return: pandas `DataFrame` or `GeoDataFrame` """ - df = arrow_raster.raster_to_dataframe( + df = pa.table(arrow_raster.raster_to_dataframe( in_raster, transform, h3_resolution, nodata_value=nodata_value, axis_order=axis_order, compact=compact - ).to_pandas() + )).to_pandas() if geo: return cells_dataframe_to_geodataframe(df) diff --git a/h3ronpy/python/h3ronpy/pandas/vector.py b/h3ronpy/python/h3ronpy/pandas/vector.py index 75e320d..71ec6bf 100644 --- a/h3ronpy/python/h3ronpy/pandas/vector.py +++ b/h3ronpy/python/h3ronpy/pandas/vector.py @@ -60,7 +60,7 @@ def wrapper(*args, **kw): @wraps(wkb_to_cells) def geoseries_to_cells(geoseries: gpd.GeoSeries, *args, **kw): - return _av.wkb_to_cells(geoseries.to_wkb(), *args, **kw).to_pandas() + return pa.array(_av.wkb_to_cells(geoseries.to_wkb(), *args, **kw)).to_pandas() geoseries_to_cells.__name__ = "geoseries_to_cells" diff --git a/h3ronpy/python/h3ronpy/polars/__init__.py b/h3ronpy/python/h3ronpy/polars/__init__.py index a0fe60a..56e7d1e 100644 --- a/h3ronpy/python/h3ronpy/polars/__init__.py +++ b/h3ronpy/python/h3ronpy/polars/__init__.py @@ -11,7 +11,7 @@ from functools import wraps import typing import polars as pl -import pyarrow as pa +from arro3.core import Array, Table from .. import arrow as _arrow @@ -19,8 +19,10 @@ def _wrap(func, ret_type=None): @wraps(func, updated=()) def wrapper(*args, **kw): result = func(*args, **kw) - if isinstance(result, pa.Table) or isinstance(result, pa.Array): - return pl.from_arrow(result) + if isinstance(result, Array): + return pl.Series(result) + elif isinstance(result, Table): + return pl.DataFrame(result) return result if ret_type: diff --git a/h3ronpy/src/arrow_interop.rs b/h3ronpy/src/arrow_interop.rs index 058f91b..4714747 100644 --- a/h3ronpy/src/arrow_interop.rs +++ b/h3ronpy/src/arrow_interop.rs @@ -1,38 +1,20 @@ -use arrow::array::{make_array, Array, ArrayData, UInt64Array}; -use arrow::pyarrow::{FromPyArrow, IntoPyArrow}; +use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, UInt64Array, UInt8Array}; +use arrow::compute::concat; +use arrow::datatypes::{Field, FieldRef}; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::{PyArray, PyChunkedArray}; use std::any::{type_name, Any}; +use std::sync::Arc; -use h3arrow::array::{ - CellIndexArray, DirectedEdgeIndexArray, H3Array, H3IndexArrayValue, VertexIndexArray, -}; +use h3arrow::array::{CellIndexArray, DirectedEdgeIndexArray, ResolutionArray, VertexIndexArray}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::Python; use crate::error::{IntoPyErr, IntoPyResult}; -pub(crate) fn with_pyarrow(f: F) -> PyResult -where - F: FnOnce(Python, Bound) -> PyResult, -{ - Python::with_gil(|py| { - let pyarrow = py.import_bound("pyarrow")?; - f(py, pyarrow) - }) -} - -#[inline] -pub fn h3array_to_pyarray(h3array: H3Array, py: Python) -> PyResult -where - IX: H3IndexArrayValue, -{ - let pa: UInt64Array = h3array.into(); - pa.into_data().into_pyarrow(py) -} - -pub(crate) fn pyarray_to_native(obj: &Bound) -> PyResult { - let array = make_array(ArrayData::from_pyarrow_bound(obj)?); - +pub(crate) fn pyarray_to_native(array: PyArray) -> PyResult { + let array = array.array(); let array = array .as_any() .downcast_ref::() @@ -49,29 +31,113 @@ pub(crate) fn pyarray_to_native(obj: &Bound) -> P Ok(array) } -pub(crate) fn pyarray_to_cellindexarray(obj: &Bound) -> PyResult { +/* +pub(crate) fn pyarray_to_cellindexarray(obj: PyArray) -> PyResult { pyarray_to_h3array::(obj) } -pub(crate) fn pyarray_to_vertexindexarray(obj: &Bound) -> PyResult { +pub(crate) fn pyarray_to_vertexindexarray(obj: PyArray) -> PyResult { pyarray_to_h3array::(obj) } -pub(crate) fn pyarray_to_directededgeindexarray( - obj: &Bound, -) -> PyResult { +pub(crate) fn pyarray_to_directededgeindexarray(obj: PyArray) -> PyResult { pyarray_to_h3array::(obj) } + */ -pub(crate) fn pyarray_to_uint64array(obj: &Bound) -> PyResult { - pyarray_to_native::(obj) +pub(crate) fn pyarray_to_uint64array(array: PyArray) -> PyResult { + pyarray_to_native::(array) } #[inline] -fn pyarray_to_h3array(obj: &Bound) -> PyResult +fn pyarray_to_h3array(obj: PyArray) -> PyResult where T: TryFrom, >::Error: IntoPyErr, { T::try_from(pyarray_to_uint64array(obj)?).into_pyresult() } + +pub(crate) fn array_to_arro3>( + py: Python<'_>, + array: A, + name: S, + nullable: bool, +) -> PyArrowResult { + let data_type = array.data_type().clone(); + Ok(PyArray::new( + Arc::new(array), + Field::new(name, data_type, nullable).into(), + ) + .to_arro3(py)?) +} + +/// Workaround to avoid implementing all algorithms for &[ArrayRef] / chunked arrays. +/// +/// This comes at the cost of having to copy data arround and should be improved. +pub struct PyConcatedArray(PyArray); + +impl PyConcatedArray { + #[allow(unused)] + pub fn array(&self) -> &ArrayRef { + self.0.array() + } + + pub fn field(&self) -> &FieldRef { + self.0.field() + } + + pub fn into_cellindexarray(self) -> PyResult { + pyarray_to_h3array::(self.0) + } + + pub fn into_vertexindexarray(self) -> PyResult { + pyarray_to_h3array::(self.0) + } + + pub fn into_directededgeindexarray(self) -> PyResult { + pyarray_to_h3array::(self.0) + } + + pub fn into_int32array(self) -> PyResult { + pyarray_to_native::(self.0) + } + + pub fn into_uint64array(self) -> PyResult { + pyarray_to_native::(self.0) + } + + pub fn into_float64array(self) -> PyResult { + pyarray_to_native::(self.0) + } + + pub fn into_resolutionarray(self) -> PyResult { + ResolutionArray::try_from(pyarray_to_native::(self.0)?).into_pyresult() + } +} + +impl From for PyArray { + fn from(value: PyConcatedArray) -> Self { + value.0 + } +} + +impl<'a> FromPyObject<'a> for PyConcatedArray { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { + if let Ok(array) = ob.extract::() { + Ok(Self(array)) + } else if let Ok(chunked_array) = ob.extract::() { + let (arrays, field) = chunked_array.into_inner(); + let array_refs: Vec<_> = arrays.iter().map(|a| a.as_ref()).collect(); + Ok(Self( + PyArray::try_new(concat(array_refs.as_ref()).into_pyresult()?, field) + .into_pyresult()?, + )) + // TODO: numpy support. is available in pyo3-arrow, but not available in the rust api + } else { + Err(PyValueError::new_err( + "Expected object with __arrow_c_array__ method, __arrow_c_stream__ method or implementing buffer protocol.", + )) + } + } +} diff --git a/h3ronpy/src/op/compact.rs b/h3ronpy/src/op/compact.rs index 37b2333..e5d575c 100644 --- a/h3ronpy/src/op/compact.rs +++ b/h3ronpy/src/op/compact.rs @@ -1,28 +1,44 @@ +use arrow::array::PrimitiveArray; use h3arrow::algorithm::CompactOp; use h3arrow::export::h3o::Resolution; use pyo3::prelude::*; +use pyo3_arrow::error::PyArrowResult; use crate::arrow_interop::*; use crate::error::IntoPyResult; #[pyfunction] #[pyo3(signature = (cellarray, mixed_resolutions = false))] -pub(crate) fn compact(cellarray: &Bound, mixed_resolutions: bool) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; - let compacted = if mixed_resolutions { - cellindexarray.compact_mixed_resolutions() - } else { - cellindexarray.compact() - } - .into_pyresult()?; +pub(crate) fn compact( + py: Python<'_>, + cellarray: PyConcatedArray, + mixed_resolutions: bool, +) -> PyArrowResult { + let name = cellarray.field().name().to_string(); + let cellindexarray = cellarray.into_cellindexarray()?; + let compacted = py + .allow_threads(|| { + if mixed_resolutions { + cellindexarray.compact_mixed_resolutions() + } else { + cellindexarray.compact() + } + }) + .into_pyresult()?; - Python::with_gil(|py| h3array_to_pyarray(compacted, py)) + array_to_arro3(py, PrimitiveArray::from(compacted), name, true) } #[pyfunction] #[pyo3(signature = (cellarray, target_resolution))] -pub(crate) fn uncompact(cellarray: &Bound, target_resolution: u8) -> PyResult { +pub(crate) fn uncompact( + py: Python<'_>, + cellarray: PyConcatedArray, + target_resolution: u8, +) -> PyArrowResult { + let name = cellarray.field().name().to_string(); let target_resolution = Resolution::try_from(target_resolution).into_pyresult()?; - let out = pyarray_to_cellindexarray(cellarray)?.uncompact(target_resolution); - Python::with_gil(|py| h3array_to_pyarray(out, py)) + let cellindexarray = cellarray.into_cellindexarray()?; + let out = py.allow_threads(|| cellindexarray.uncompact(target_resolution)); + array_to_arro3(py, PrimitiveArray::from(out), name, true) } diff --git a/h3ronpy/src/op/localij.rs b/h3ronpy/src/op/localij.rs index 5ddcbee..ee4001f 100644 --- a/h3ronpy/src/op/localij.rs +++ b/h3ronpy/src/op/localij.rs @@ -1,78 +1,84 @@ -use crate::arrow_interop::{ - h3array_to_pyarray, pyarray_to_cellindexarray, pyarray_to_native, with_pyarrow, -}; +use crate::arrow_interop::{array_to_arro3, PyConcatedArray}; use crate::error::IntoPyResult; -use arrow::array::{Array, Int32Array}; -use arrow::pyarrow::ToPyArrow; +use arrow::array::{Array, ArrayRef, PrimitiveArray, RecordBatch}; +use arrow::datatypes::{Field, Schema}; use h3arrow::algorithm::localij::{LocalIJArrays, ToLocalIJOp}; use h3arrow::array::CellIndexArray; use h3arrow::h3o::CellIndex; use pyo3::exceptions::PyValueError; use pyo3::prelude::PyAnyMethods; -use pyo3::{pyfunction, Bound, PyAny, PyObject, PyResult, Python, ToPyObject}; +use pyo3::{pyfunction, Bound, PyAny, PyObject, PyResult, Python}; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::PyTable; use std::iter::repeat; +use std::sync::Arc; #[pyfunction] #[pyo3(signature = (cellarray, anchor, set_failing_to_invalid = false))] pub(crate) fn cells_to_localij( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, anchor: &Bound, set_failing_to_invalid: bool, ) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; + let cellindexarray = cellarray.into_cellindexarray()?; let anchorarray = get_anchor_array(anchor, cellindexarray.len())?; let localij_arrays = cellindexarray .to_local_ij_array(anchorarray, set_failing_to_invalid) .into_pyresult()?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - localij_arrays.i.into_data().to_pyarrow(py)?, - localij_arrays.j.into_data().to_pyarrow(py)?, - localij_arrays - .anchors - .primitive_array() - .into_data() - .to_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, ["i", "j", "anchor"]))?; - Ok(table.to_object(py)) - }) + let outarrays: Vec = vec![ + Arc::new(localij_arrays.i), + Arc::new(localij_arrays.j), + Arc::new(PrimitiveArray::from(localij_arrays.anchors)), + ]; + + let schema = Arc::new(Schema::new(vec![ + Field::new("i", outarrays[0].data_type().clone(), true), + Field::new("j", outarrays[1].data_type().clone(), true), + Field::new("anchor", outarrays[2].data_type().clone(), true), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + + PyTable::try_new(vec![rb], schema)?.to_arro3(py) } #[pyfunction] #[pyo3(signature = (anchor, i_array, j_array, set_failing_to_invalid = false))] pub(crate) fn localij_to_cells( + py: Python<'_>, anchor: &Bound, - i_array: &Bound, - j_array: &Bound, + i_array: PyConcatedArray, + j_array: PyConcatedArray, set_failing_to_invalid: bool, -) -> PyResult { - let i_array = pyarray_to_native::(i_array)?; - let j_array = pyarray_to_native::(j_array)?; +) -> PyArrowResult { + let i_array = i_array.into_int32array()?; + let j_array = j_array.into_int32array()?; let anchorarray = get_anchor_array(anchor, i_array.len())?; let localij_arrays = LocalIJArrays::try_new(anchorarray, i_array, j_array).into_pyresult()?; - let cellarray = if set_failing_to_invalid { - localij_arrays - .to_cells_failing_to_invalid() - .into_pyresult()? - } else { - localij_arrays.to_cells().into_pyresult()? - }; + let cellarray = py.allow_threads(|| { + if set_failing_to_invalid { + localij_arrays.to_cells_failing_to_invalid().into_pyresult() + } else { + localij_arrays.to_cells().into_pyresult() + } + })?; - Python::with_gil(|py| h3array_to_pyarray(cellarray, py)) + array_to_arro3(py, PrimitiveArray::from(cellarray), "cells", true) } fn get_anchor_array(anchor: &Bound, len: usize) -> PyResult { if let Ok(anchor) = anchor.extract::() { let anchor_cell = CellIndex::try_from(anchor).into_pyresult()?; Ok(CellIndexArray::from_iter(repeat(anchor_cell).take(len))) - } else if let Ok(anchorarray) = pyarray_to_cellindexarray(anchor) { + } else if let Ok(anchorarray) = anchor + .extract::() + .and_then(|ca| ca.into_cellindexarray()) + { Ok(anchorarray) } else { return Err(PyValueError::new_err(format!( diff --git a/h3ronpy/src/op/measure.rs b/h3ronpy/src/op/measure.rs index 3f5ccd3..4380976 100644 --- a/h3ronpy/src/op/measure.rs +++ b/h3ronpy/src/op/measure.rs @@ -1,25 +1,32 @@ -use crate::arrow_interop::pyarray_to_cellindexarray; -use arrow::array::Array; -use arrow::pyarrow::IntoPyArrow; +use crate::arrow_interop::{array_to_arro3, PyConcatedArray}; use pyo3::prelude::*; +use pyo3_arrow::error::PyArrowResult; + +const AREA_NAME: &str = "area"; #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_area_m2(cellarray: &Bound) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)?.area_m2(); - Python::with_gil(|py| out.into_data().into_pyarrow(py)) +pub(crate) fn cells_area_m2(py: Python<'_>, cellarray: PyConcatedArray) -> PyArrowResult { + let out = cellarray.into_cellindexarray()?.area_m2(); + array_to_arro3(py, out, AREA_NAME, true) } #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_area_km2(cellarray: &Bound) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)?.area_km2(); - Python::with_gil(|py| out.into_data().into_pyarrow(py)) +pub(crate) fn cells_area_km2( + py: Python<'_>, + cellarray: PyConcatedArray, +) -> PyArrowResult { + let out = cellarray.into_cellindexarray()?.area_km2(); + array_to_arro3(py, out, AREA_NAME, true) } #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_area_rads2(cellarray: &Bound) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)?.area_rads2(); - Python::with_gil(|py| out.into_data().into_pyarrow(py)) +pub(crate) fn cells_area_rads2( + py: Python<'_>, + cellarray: PyConcatedArray, +) -> PyArrowResult { + let out = cellarray.into_cellindexarray()?.area_rads2(); + array_to_arro3(py, out, AREA_NAME, true) } diff --git a/h3ronpy/src/op/neighbor.rs b/h3ronpy/src/op/neighbor.rs index 8a556e5..817320d 100644 --- a/h3ronpy/src/op/neighbor.rs +++ b/h3ronpy/src/op/neighbor.rs @@ -1,90 +1,120 @@ -use arrow::array::{Array, GenericListArray, LargeListArray, PrimitiveArray, UInt32Array}; -use arrow::pyarrow::{IntoPyArrow, ToPyArrow}; +use arrow::array::{ + Array, ArrayRef, GenericListArray, LargeListArray, PrimitiveArray, RecordBatch, UInt32Array, +}; +use arrow::datatypes::{Field, Schema}; use h3arrow::algorithm::{GridDiskDistances, GridOp, KAggregationMethod}; use pyo3::exceptions::{PyRuntimeError, PyValueError}; -use pyo3::{PyObject, PyResult}; +use pyo3::PyObject; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::PyTable; use std::str::FromStr; +use std::sync::Arc; use crate::arrow_interop::*; use crate::error::IntoPyResult; use crate::DEFAULT_CELL_COLUMN_NAME; use pyo3::prelude::*; +const DISK_NAME: &str = "disk"; + #[pyfunction] #[pyo3(signature = (cellarray, k, flatten = false))] -pub(crate) fn grid_disk(cellarray: &Bound, k: u32, flatten: bool) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; - let listarray = cellindexarray.grid_disk(k).into_pyresult()?; +pub(crate) fn grid_disk( + py: Python<'_>, + cellarray: PyConcatedArray, + k: u32, + flatten: bool, +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; + let listarray = py + .allow_threads(|| cellindexarray.grid_disk(k)) + .into_pyresult()?; if flatten { - let cellindexarray = listarray.into_flattened().into_pyresult()?; - Python::with_gil(|py| h3array_to_pyarray(cellindexarray, py)) + let cellindexarray = py + .allow_threads(|| listarray.into_flattened()) + .into_pyresult()?; + array_to_arro3(py, PrimitiveArray::from(cellindexarray), DISK_NAME, true) } else { - Python::with_gil(|py| LargeListArray::from(listarray).into_data().to_pyarrow(py)) + array_to_arro3(py, LargeListArray::from(listarray), DISK_NAME, true) } } #[pyfunction] #[pyo3(signature = (cellarray, k, flatten = false))] pub(crate) fn grid_disk_distances( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, k: u32, flatten: bool, -) -> PyResult { - let griddiskdistances = pyarray_to_cellindexarray(cellarray)? - .grid_disk_distances(k) +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; + let griddiskdistances = py + .allow_threads(|| cellindexarray.grid_disk_distances(k)) .into_pyresult()?; - return_griddiskdistances_table(griddiskdistances, flatten) + return_griddiskdistances_table(py, griddiskdistances, flatten) } #[pyfunction] #[pyo3(signature = (cellarray, k_min, k_max, flatten = false))] pub(crate) fn grid_ring_distances( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, k_min: u32, k_max: u32, flatten: bool, -) -> PyResult { +) -> PyArrowResult { if k_min >= k_max { - return Err(PyValueError::new_err("k_min must be less than k_max")); + return Err(PyValueError::new_err("k_min must be less than k_max").into()); } - let griddiskdistances = pyarray_to_cellindexarray(cellarray)? - .grid_ring_distances(k_min, k_max) + let cellindexarray = cellarray.into_cellindexarray()?; + let griddiskdistances = py + .allow_threads(|| cellindexarray.grid_ring_distances(k_min, k_max)) .into_pyresult()?; - return_griddiskdistances_table(griddiskdistances, flatten) + return_griddiskdistances_table(py, griddiskdistances, flatten) } fn return_griddiskdistances_table( + py: Python<'_>, griddiskdistances: GridDiskDistances, flatten: bool, -) -> PyResult { - let (cells, distances) = if flatten { +) -> PyArrowResult { + let (cells, distances): (ArrayRef, ArrayRef) = if flatten { ( - PrimitiveArray::from(griddiskdistances.cells.into_flattened().into_pyresult()?) - .into_data(), - griddiskdistances - .distances - .values() - .as_any() - .downcast_ref::() - .ok_or_else(|| PyRuntimeError::new_err("expected primitivearray")) - .map(|pa| pa.clone().into_data())?, + Arc::new(PrimitiveArray::from( + py.allow_threads(|| griddiskdistances.cells.into_flattened()) + .into_pyresult()?, + )), + Arc::new( + griddiskdistances + .distances + .values() + .as_any() + .downcast_ref::() + .ok_or_else(|| PyRuntimeError::new_err("expected primitivearray"))? + .clone(), + ), ) } else { ( - GenericListArray::::from(griddiskdistances.cells).into_data(), - griddiskdistances.distances.into_data(), + Arc::new(GenericListArray::::from(griddiskdistances.cells)), + Arc::new(griddiskdistances.distances), ) }; - with_pyarrow(|py, pyarrow| { - let arrays = [cells.into_pyarrow(py)?, distances.into_pyarrow(py)?]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, [DEFAULT_CELL_COLUMN_NAME, "k"]))?; - Ok(table.to_object(py)) - }) + let outarrays: Vec = vec![cells, distances]; + let schema = Arc::new(Schema::new(vec![ + Field::new( + DEFAULT_CELL_COLUMN_NAME, + outarrays[0].data_type().clone(), + true, + ), + Field::new("k", outarrays[1].data_type().clone(), true), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + Ok(PyTable::try_new(vec![rb], schema)?.to_arro3(py)?) } struct KAggregationMethodWrapper(KAggregationMethod); @@ -104,24 +134,31 @@ impl FromStr for KAggregationMethodWrapper { #[pyfunction] #[pyo3(signature = (cellarray, k, aggregation_method))] pub(crate) fn grid_disk_aggregate_k( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, k: u32, aggregation_method: &str, -) -> PyResult { +) -> PyArrowResult { let aggregation_method = KAggregationMethodWrapper::from_str(aggregation_method)?; + let cellindexarray = cellarray.into_cellindexarray()?; - let griddiskaggk = pyarray_to_cellindexarray(cellarray)? - .grid_disk_aggregate_k(k, aggregation_method.0) + let griddiskaggk = py + .allow_threads(|| cellindexarray.grid_disk_aggregate_k(k, aggregation_method.0)) .into_pyresult()?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - h3array_to_pyarray(griddiskaggk.cells, py)?, - griddiskaggk.distances.into_data().into_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, [DEFAULT_CELL_COLUMN_NAME, "k"]))?; - Ok(table.to_object(py)) - }) + let outarrays: Vec = vec![ + Arc::new(PrimitiveArray::from(griddiskaggk.cells)), + Arc::new(griddiskaggk.distances), + ]; + let schema = Arc::new(Schema::new(vec![ + Field::new( + DEFAULT_CELL_COLUMN_NAME, + outarrays[0].data_type().clone(), + true, + ), + Field::new("k", outarrays[1].data_type().clone(), true), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + Ok(PyTable::try_new(vec![rb], schema)?.to_arro3(py)?) } diff --git a/h3ronpy/src/op/resolution.rs b/h3ronpy/src/op/resolution.rs index 26543da..75b9760 100644 --- a/h3ronpy/src/op/resolution.rs +++ b/h3ronpy/src/op/resolution.rs @@ -1,70 +1,96 @@ -use arrow::array::{Array, LargeListArray, PrimitiveArray}; -use arrow::pyarrow::{IntoPyArrow, ToPyArrow}; +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef, LargeListArray, PrimitiveArray, RecordBatch}; +use arrow::datatypes::{Field, Schema}; use h3arrow::algorithm::ChangeResolutionOp; use h3arrow::export::h3o::Resolution; use pyo3::prelude::*; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::PyTable; use crate::arrow_interop::*; use crate::error::IntoPyResult; use crate::DEFAULT_CELL_COLUMN_NAME; +const RESOLUTION_NAME: &str = "resolution"; + #[pyfunction] -pub(crate) fn change_resolution(cellarray: &Bound, h3_resolution: u8) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn change_resolution( + py: Python<'_>, + cellarray: PyConcatedArray, + h3_resolution: u8, +) -> PyArrowResult { + let field = cellarray.field().clone(); + let cellindexarray = cellarray.into_cellindexarray()?; let h3_resolution = Resolution::try_from(h3_resolution).into_pyresult()?; - let out = cellindexarray - .change_resolution(h3_resolution) + let out = py + .allow_threads(|| cellindexarray.change_resolution(h3_resolution)) .into_pyresult()?; - Python::with_gil(|py| h3array_to_pyarray(out, py)) + array_to_arro3(py, PrimitiveArray::from(out), field.name(), true) } #[pyfunction] pub(crate) fn change_resolution_list( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, h3_resolution: u8, -) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; let h3_resolution = Resolution::try_from(h3_resolution).into_pyresult()?; - let listarray = cellindexarray - .change_resolution_list(h3_resolution) - .into_pyresult()?; + let listarray = py.allow_threads(|| { + cellindexarray + .change_resolution_list(h3_resolution) + .into_pyresult() + })?; - Python::with_gil(|py| LargeListArray::from(listarray).into_data().to_pyarrow(py)) + array_to_arro3(py, LargeListArray::from(listarray), RESOLUTION_NAME, true) } #[pyfunction] pub(crate) fn change_resolution_paired( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, h3_resolution: u8, -) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; let h3_resolution = Resolution::try_from(h3_resolution).into_pyresult()?; - let pair = cellindexarray - .change_resolution_paired(h3_resolution) - .into_pyresult()?; + let pair = py.allow_threads(|| { + cellindexarray + .change_resolution_paired(h3_resolution) + .into_pyresult() + })?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - h3array_to_pyarray(pair.before, py)?, - h3array_to_pyarray(pair.after, py)?, - ]; - let table = pyarrow.getattr("Table")?.call_method1( - "from_arrays", - ( - arrays, - [ - format!("{}_before", DEFAULT_CELL_COLUMN_NAME), - format!("{}_after", DEFAULT_CELL_COLUMN_NAME), - ], - ), - )?; - Ok(table.to_object(py)) - }) + let outarrays: Vec = vec![ + Arc::new(PrimitiveArray::from(pair.before)), + Arc::new(PrimitiveArray::from(pair.after)), + ]; + + let schema = Arc::new(Schema::new(vec![ + Field::new( + format!("{}_before", DEFAULT_CELL_COLUMN_NAME), + outarrays[0].data_type().clone(), + true, + ), + Field::new( + format!("{}_after", DEFAULT_CELL_COLUMN_NAME), + outarrays[1].data_type().clone(), + true, + ), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + + Ok(PyTable::try_new(vec![rb], schema)?.to_arro3(py)?) } #[pyfunction] -pub(crate) fn cells_resolution(cellarray: &Bound) -> PyResult { - let resarray = pyarray_to_cellindexarray(cellarray)?.resolution(); - Python::with_gil(|py| PrimitiveArray::from(resarray).into_data().into_pyarrow(py)) +pub(crate) fn cells_resolution( + py: Python<'_>, + cellarray: PyConcatedArray, +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; + let resarray = PrimitiveArray::from(py.allow_threads(|| cellindexarray.resolution())); + + array_to_arro3(py, resarray, RESOLUTION_NAME, true) } diff --git a/h3ronpy/src/op/string.rs b/h3ronpy/src/op/string.rs index 9bd1f09..7deee4d 100644 --- a/h3ronpy/src/op/string.rs +++ b/h3ronpy/src/op/string.rs @@ -1,105 +1,115 @@ -use arrow::array::{make_array, Array, ArrayData, LargeStringArray, StringArray}; -use arrow::pyarrow::{FromPyArrow, IntoPyArrow}; +use arrow::array::{Array, LargeStringArray, PrimitiveArray, StringArray}; use h3arrow::algorithm::{ParseGenericStringArray, ToGenericStringArray}; use h3arrow::array::{CellIndexArray, DirectedEdgeIndexArray, VertexIndexArray}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::PyArray; use crate::arrow_interop::*; use crate::error::IntoPyResult; -#[pyfunction] -#[pyo3(signature = (stringarray, set_failing_to_invalid = false))] -pub(crate) fn cells_parse( - stringarray: &Bound, - set_failing_to_invalid: bool, -) -> PyResult { - let boxed_array = make_array(ArrayData::from_pyarrow_bound(stringarray)?); - let cells = if let Some(stringarray) = boxed_array.as_any().downcast_ref::() { - CellIndexArray::parse_genericstringarray(stringarray, set_failing_to_invalid) +fn parse(py: Python<'_>, stringarray: PyArray, set_failing_to_invalid: bool) -> PyResult +where + O: ParseGenericStringArray + Send, +{ + let any_array = stringarray.array().as_any(); + let h3entities = if let Some(stringarray) = any_array.downcast_ref::() { + py.allow_threads(|| O::parse_genericstringarray(stringarray, set_failing_to_invalid)) .into_pyresult()? - } else if let Some(stringarray) = boxed_array.as_any().downcast_ref::() { - CellIndexArray::parse_genericstringarray(stringarray, set_failing_to_invalid) + } else if let Some(stringarray) = any_array.downcast_ref::() { + py.allow_threads(|| O::parse_genericstringarray(stringarray, set_failing_to_invalid)) .into_pyresult()? } else { return Err(PyValueError::new_err( - "unsupported array type to parse cells from", + "unsupported array type to parse h3 indexes from", )); }; - Python::with_gil(|py| h3array_to_pyarray(cells, py)) + Ok(h3entities) } #[pyfunction] #[pyo3(signature = (stringarray, set_failing_to_invalid = false))] -pub(crate) fn vertexes_parse( - stringarray: &Bound, +pub(crate) fn cells_parse( + py: Python<'_>, + stringarray: PyConcatedArray, set_failing_to_invalid: bool, -) -> PyResult { - let boxed_array = make_array(ArrayData::from_pyarrow_bound(stringarray)?); - let vertexes = if let Some(utf8array) = boxed_array.as_any().downcast_ref::() { - VertexIndexArray::parse_genericstringarray(utf8array, set_failing_to_invalid) - .into_pyresult()? - } else if let Some(utf8array) = boxed_array.as_any().downcast_ref::() { - VertexIndexArray::parse_genericstringarray(utf8array, set_failing_to_invalid) - .into_pyresult()? - } else { - return Err(PyValueError::new_err( - "unsupported array type to parse vertexes from", - )); - }; +) -> PyArrowResult { + let name = stringarray.field().name().to_string(); + let cells: CellIndexArray = parse(py, stringarray.into(), set_failing_to_invalid)?; + array_to_arro3(py, PrimitiveArray::from(cells), name, true) +} - Python::with_gil(|py| h3array_to_pyarray(vertexes, py)) +#[pyfunction] +#[pyo3(signature = (stringarray, set_failing_to_invalid = false))] +pub(crate) fn vertexes_parse( + py: Python<'_>, + stringarray: PyConcatedArray, + set_failing_to_invalid: bool, +) -> PyArrowResult { + let name = stringarray.field().name().to_string(); + let vertexes: VertexIndexArray = parse(py, stringarray.into(), set_failing_to_invalid)?; + array_to_arro3(py, PrimitiveArray::from(vertexes), name, true) } #[pyfunction] #[pyo3(signature = (stringarray, set_failing_to_invalid = false))] pub(crate) fn directededges_parse( - stringarray: &Bound, + py: Python<'_>, + stringarray: PyConcatedArray, set_failing_to_invalid: bool, -) -> PyResult { - let boxed_array = make_array(ArrayData::from_pyarrow_bound(stringarray)?); - let edges = if let Some(stringarray) = boxed_array.as_any().downcast_ref::() { - DirectedEdgeIndexArray::parse_genericstringarray(stringarray, set_failing_to_invalid) - .into_pyresult()? - } else if let Some(stringarray) = boxed_array.as_any().downcast_ref::() { - DirectedEdgeIndexArray::parse_genericstringarray(stringarray, set_failing_to_invalid) - .into_pyresult()? - } else { - return Err(PyValueError::new_err( - "unsupported array type to parse directededges from", - )); - }; +) -> PyArrowResult { + let name = stringarray.field().name().to_string(); + let edges: DirectedEdgeIndexArray = parse(py, stringarray.into(), set_failing_to_invalid)?; + array_to_arro3(py, PrimitiveArray::from(edges), name, true) +} + +fn to_string + Send + Sync, S: Into>( + py: Python<'_>, + name: S, + array: A, +) -> PyArrowResult { + let stringarray = py.allow_threads(|| array.to_genericstringarray().into_pyresult())?; - Python::with_gil(|py| h3array_to_pyarray(edges, py)) + array_to_arro3(py, stringarray, name, true) } #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_to_string(cellarray: &Bound) -> PyResult { - let stringarray: LargeStringArray = pyarray_to_cellindexarray(cellarray)? - .to_genericstringarray() - .into_pyresult()?; - - Python::with_gil(|py| stringarray.into_data().into_pyarrow(py)) +pub(crate) fn cells_to_string( + py: Python<'_>, + cellarray: PyConcatedArray, +) -> PyArrowResult { + to_string( + py, + cellarray.field().name().clone(), + cellarray.into_cellindexarray()?, + ) } #[pyfunction] #[pyo3(signature = (vertexarray))] -pub(crate) fn vertexes_to_string(vertexarray: &Bound) -> PyResult { - let stringarray: LargeStringArray = pyarray_to_vertexindexarray(vertexarray)? - .to_genericstringarray() - .into_pyresult()?; - - Python::with_gil(|py| stringarray.into_data().into_pyarrow(py)) +pub(crate) fn vertexes_to_string( + py: Python<'_>, + vertexarray: PyConcatedArray, +) -> PyArrowResult { + to_string( + py, + vertexarray.field().name().clone(), + vertexarray.into_vertexindexarray()?, + ) } #[pyfunction] #[pyo3(signature = (directededgearray))] -pub(crate) fn directededges_to_string(directededgearray: &Bound) -> PyResult { - let stringarray: LargeStringArray = pyarray_to_directededgeindexarray(directededgearray)? - .to_genericstringarray() - .into_pyresult()?; - - Python::with_gil(|py| stringarray.into_data().into_pyarrow(py)) +pub(crate) fn directededges_to_string( + py: Python<'_>, + directededgearray: PyConcatedArray, +) -> PyArrowResult { + to_string( + py, + directededgearray.field().name().clone(), + directededgearray.into_directededgeindexarray()?, + ) } diff --git a/h3ronpy/src/op/valid.rs b/h3ronpy/src/op/valid.rs index e1c7428..de7ffc8 100644 --- a/h3ronpy/src/op/valid.rs +++ b/h3ronpy/src/op/valid.rs @@ -1,42 +1,50 @@ -use arrow::array::{Array, BooleanArray}; +use arrow::array::{Array, BooleanArray, PrimitiveArray}; use arrow::buffer::NullBuffer; -use arrow::pyarrow::IntoPyArrow; use h3arrow::array::{FromIteratorWithValidity, H3Array, H3IndexArrayValue}; use h3arrow::h3o; use h3o::{CellIndex, DirectedEdgeIndex, VertexIndex}; use pyo3::prelude::*; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::PyArray; use crate::arrow_interop::*; -fn h3index_valid(arr: &Bound, booleanarray: bool) -> PyResult +fn h3index_valid(py: Python<'_>, arr: PyArray, booleanarray: bool) -> PyArrowResult where IX: H3IndexArrayValue, { + let name = arr.field().name().to_string(); let u64array = pyarray_to_uint64array(arr)?; let validated = H3Array::::from_iter_with_validity(u64array.iter()); - Python::with_gil(|py| { - if booleanarray { - let nullbuffer = validated - .primitive_array() - .nulls() - .cloned() - .unwrap_or_else(|| NullBuffer::new_valid(validated.len())); - BooleanArray::from(nullbuffer.into_inner()) - .into_data() - .into_pyarrow(py) - } else { - h3array_to_pyarray(validated, py) - } - }) + if booleanarray { + let nullbuffer = validated + .primitive_array() + .nulls() + .cloned() + .unwrap_or_else(|| NullBuffer::new_valid(validated.len())); + + array_to_arro3( + py, + BooleanArray::from(nullbuffer.into_inner()), + "is_valid", + true, + ) + } else { + array_to_arro3(py, PrimitiveArray::from(validated), name, true) + } } macro_rules! impl_h3index_valid { ($name:ident, $arr_type:ty) => { #[pyfunction] #[pyo3(signature = (array, booleanarray = false))] - pub(crate) fn $name(array: &Bound, booleanarray: bool) -> PyResult { - h3index_valid::<$arr_type>(array, booleanarray) + pub(crate) fn $name( + py: Python<'_>, + array: PyConcatedArray, + booleanarray: bool, + ) -> PyArrowResult { + h3index_valid::<$arr_type>(py, array.into(), booleanarray) } }; } diff --git a/h3ronpy/src/raster.rs b/h3ronpy/src/raster.rs index 2de5fa8..aa28ddd 100644 --- a/h3ronpy/src/raster.rs +++ b/h3ronpy/src/raster.rs @@ -1,13 +1,16 @@ +use arrow::datatypes::{Field, Schema}; use geo_types::Point; +use pyo3_arrow::PyTable; use std::hash::Hash; use std::iter::repeat; use std::str::FromStr; +use std::sync::Arc; use arrow::array::{ - Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, - UInt32Array, UInt64Array, UInt8Array, + Array, ArrayRef, ArrowPrimitiveType, Float32Array, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, PrimitiveArray, RecordBatch, UInt16Array, UInt32Array, UInt64Array, + UInt8Array, }; -use arrow::pyarrow::IntoPyArrow; use geo::{AffineOps, AffineTransform}; use h3arrow::array::CellIndexArray; use h3arrow::export::h3o::{CellIndex, Resolution}; @@ -16,10 +19,11 @@ use numpy::PyReadonlyArray2; use ordered_float::OrderedFloat; use pyo3::exceptions::PyValueError; use pyo3::{prelude::*, wrap_pyfunction}; +use pyo3_arrow::error::PyArrowResult; -use crate::arrow_interop::h3array_to_pyarray; use crate::error::IntoPyResult; use crate::transform::Transform; +use crate::DEFAULT_CELL_COLUMN_NAME; pub struct AxisOrder { pub inner: rasterh3::AxisOrder, @@ -142,29 +146,31 @@ macro_rules! make_raster_to_h3_variant { ($name:ident, $dtype:ty, $array_dtype:ty) => { #[pyfunction] fn $name( + py: Python<'_>, np_array: PyReadonlyArray2<$dtype>, transform: &Transform, h3_resolution: u8, axis_order_str: &str, compact: bool, nodata_value: Option<$dtype>, - ) -> PyResult<(PyObject, PyObject)> { + ) -> PyArrowResult { let arr = np_array.as_array(); - let (values, cells) = raster_to_h3( - &arr, - transform, - &nodata_value, - h3_resolution, - axis_order_str, - compact, - )?; - - Python::with_gil(|py| { - let values = <$array_dtype>::from(values).into_data().into_pyarrow(py)?; - let cells = h3array_to_pyarray(CellIndexArray::from(cells), py)?; - - Ok((values, cells)) - }) + let (values, cells) = py.allow_threads(|| { + raster_to_h3( + &arr, + transform, + &nodata_value, + h3_resolution, + axis_order_str, + compact, + ) + })?; + + return_table( + py, + CellIndexArray::from(cells), + <$array_dtype>::from(values), + ) } }; } @@ -173,37 +179,59 @@ macro_rules! make_raster_to_h3_float_variant { ($name:ident, $dtype:ty, $array_dtype:ty) => { #[pyfunction] fn $name( + py: Python<'_>, np_array: PyReadonlyArray2<$dtype>, transform: &Transform, h3_resolution: u8, axis_order_str: &str, compact: bool, nodata_value: Option<$dtype>, - ) -> PyResult<(PyObject, PyObject)> { + ) -> PyArrowResult { let arr = np_array.as_array(); // create a copy with the values wrapped in ordered floats to // support the internal hashing let of_arr = arr.map(|v| OrderedFloat::from(*v)); - let (values, cells) = raster_to_h3( - &of_arr.view(), - transform, - &nodata_value.map(OrderedFloat::from), - h3_resolution, - axis_order_str, - compact, - )?; - - Python::with_gil(|py| { - let values: Vec<$dtype> = values.into_iter().map(|v| v.into_inner()).collect(); - let values = <$array_dtype>::from(values).into_data().into_pyarrow(py)?; - let cells = h3array_to_pyarray(CellIndexArray::from(cells), py)?; - - Ok((values, cells)) - }) + let (values, cells) = py.allow_threads(|| { + raster_to_h3( + &of_arr.view(), + transform, + &nodata_value.map(OrderedFloat::from), + h3_resolution, + axis_order_str, + compact, + ) + })?; + + let values: Vec<$dtype> = values.into_iter().map(|v| v.into_inner()).collect(); + return_table( + py, + CellIndexArray::from(cells), + <$array_dtype>::from(values), + ) } }; } +fn return_table( + py: Python<'_>, + cells: CellIndexArray, + values: PrimitiveArray, +) -> PyArrowResult { + let outarrays: Vec = vec![Arc::new(PrimitiveArray::from(cells)), Arc::new(values)]; + + let schema = Arc::new(Schema::new(vec![ + Field::new( + DEFAULT_CELL_COLUMN_NAME, + outarrays[0].data_type().clone(), + true, + ), + Field::new("value", outarrays[1].data_type().clone(), true), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + Ok(PyTable::try_new(vec![rb], schema)?.to_arro3(py)?) +} + // generate some specialized variants of raster_to_h3 to expose to python make_raster_to_h3_variant!(raster_to_h3_u8, u8, UInt8Array); make_raster_to_h3_variant!(raster_to_h3_i8, i8, Int8Array); diff --git a/h3ronpy/src/vector.rs b/h3ronpy/src/vector.rs index 92c080d..b8ed18e 100644 --- a/h3ronpy/src/vector.rs +++ b/h3ronpy/src/vector.rs @@ -1,14 +1,16 @@ +use std::sync::Arc; + use arrow::array::{ - make_array, Array, ArrayData, BinaryArray, Float64Array, GenericBinaryArray, GenericListArray, - LargeBinaryArray, OffsetSizeTrait, UInt8Array, + Array, ArrayRef, BinaryArray, Float64Array, GenericBinaryArray, GenericListArray, + LargeBinaryArray, OffsetSizeTrait, PrimitiveArray, RecordBatch, }; use arrow::buffer::NullBuffer; -use arrow::pyarrow::{FromPyArrow, IntoPyArrow, ToPyArrow}; +use arrow::datatypes::{Field, Schema}; use geo::{BoundingRect, HasDimensions}; use h3arrow::algorithm::ToCoordinatesOp; use h3arrow::array::from_geo::{ToCellIndexArray, ToCellListArray, ToCellsOptions}; use h3arrow::array::to_geoarrow::{ToWKBLineStrings, ToWKBPoints, ToWKBPolygons}; -use h3arrow::array::{CellIndexArray, ResolutionArray}; +use h3arrow::array::CellIndexArray; use h3arrow::export::geoarrow::array::{WKBArray, WKBBuilder, WKBCapacity}; use h3arrow::export::h3o::geom::{ContainmentMode, ToGeo}; use h3arrow::export::h3o::Resolution; @@ -18,9 +20,13 @@ use itertools::multizip; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::PyTuple; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::{PyArray, PyTable}; -use crate::arrow_interop::*; use crate::error::IntoPyResult; +use crate::{arrow_interop::*, DEFAULT_CELL_COLUMN_NAME}; + +const WKB_NAME: &str = "wkb"; /// Containment mode used to decide if a cell is contained in a polygon or not. /// @@ -81,8 +87,8 @@ impl PyContainmentMode { #[pyfunction] #[pyo3(signature = (cellarray,))] -pub(crate) fn cells_bounds(cellarray: &Bound) -> PyResult> { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn cells_bounds(cellarray: PyConcatedArray) -> PyResult> { + let cellindexarray = cellarray.into_cellindexarray()?; if let Some(rect) = cellindexarray.bounding_rect() { Python::with_gil(|py| { Ok(Some( @@ -97,217 +103,256 @@ pub(crate) fn cells_bounds(cellarray: &Bound) -> PyResult) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; - let mut minx_vec = vec![0.0f64; cellindexarray.len()]; - let mut miny_vec = vec![0.0f64; cellindexarray.len()]; - let mut maxx_vec = vec![0.0f64; cellindexarray.len()]; - let mut maxy_vec = vec![0.0f64; cellindexarray.len()]; - let mut validity_vec = vec![false; cellindexarray.len()]; - - for (cell, minx, miny, maxx, maxy, validity) in multizip(( - cellindexarray.iter(), - minx_vec.iter_mut(), - miny_vec.iter_mut(), - maxx_vec.iter_mut(), - maxy_vec.iter_mut(), - validity_vec.iter_mut(), - )) { - if let Some(cell) = cell { - if let Some(rect) = cell - .to_geom(true) - .ok() - .and_then(|poly| poly.bounding_rect()) - { - *validity = true; - *minx = rect.min().x; - *miny = rect.min().y; - *maxx = rect.max().x; - *maxy = rect.max().y; - }; +pub(crate) fn cells_bounds_arrays( + py: Python<'_>, + cellarray: PyConcatedArray, +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; + + let outarrays = py.allow_threads(|| { + let mut minx_vec = vec![0.0f64; cellindexarray.len()]; + let mut miny_vec = vec![0.0f64; cellindexarray.len()]; + let mut maxx_vec = vec![0.0f64; cellindexarray.len()]; + let mut maxy_vec = vec![0.0f64; cellindexarray.len()]; + let mut validity_vec = vec![false; cellindexarray.len()]; + + for (cell, minx, miny, maxx, maxy, validity) in multizip(( + cellindexarray.iter(), + minx_vec.iter_mut(), + miny_vec.iter_mut(), + maxx_vec.iter_mut(), + maxy_vec.iter_mut(), + validity_vec.iter_mut(), + )) { + if let Some(cell) = cell { + if let Some(rect) = cell + .to_geom(true) + .ok() + .and_then(|poly| poly.bounding_rect()) + { + *validity = true; + *minx = rect.min().x; + *miny = rect.min().y; + *maxx = rect.max().x; + *maxy = rect.max().y; + }; + } } - } - let validity = NullBuffer::from(validity_vec); - - with_pyarrow(|py, pyarrow| { - let arrays = [ - Float64Array::new(minx_vec.into(), Some(validity.clone())) - .into_data() - .into_pyarrow(py)?, - Float64Array::new(miny_vec.into(), Some(validity.clone())) - .into_data() - .into_pyarrow(py)?, - Float64Array::new(maxx_vec.into(), Some(validity.clone())) - .into_data() - .into_pyarrow(py)?, - Float64Array::new(maxy_vec.into(), Some(validity)) - .into_data() - .into_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, ["minx", "miny", "maxx", "maxy"]))?; - Ok(table.to_object(py)) - }) + let validity = NullBuffer::from(validity_vec); + + vec![ + Arc::new(Float64Array::new(minx_vec.into(), Some(validity.clone()))) as ArrayRef, + Arc::new(Float64Array::new(miny_vec.into(), Some(validity.clone()))), + Arc::new(Float64Array::new(maxx_vec.into(), Some(validity.clone()))), + Arc::new(Float64Array::new(maxy_vec.into(), Some(validity))), + ] + }); + + let schema = Arc::new(Schema::new(vec![ + Field::new("minx", outarrays[0].data_type().clone(), true), + Field::new("miny", outarrays[1].data_type().clone(), true), + Field::new("maxx", outarrays[2].data_type().clone(), true), + Field::new("maxy", outarrays[3].data_type().clone(), true), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + + Ok(PyTable::try_new(vec![rb], schema)?.to_arro3(py)?) } #[pyfunction] #[pyo3(signature = (cellarray, radians = false))] -pub(crate) fn cells_to_coordinates(cellarray: &Bound, radians: bool) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn cells_to_coordinates( + py: Python<'_>, + cellarray: PyConcatedArray, + radians: bool, +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; + + let coordinate_arrays = py + .allow_threads(|| { + if radians { + cellindexarray.to_coordinates_radians() + } else { + cellindexarray.to_coordinates() + } + }) + .into_pyresult()?; - let coordinate_arrays = if radians { - cellindexarray.to_coordinates_radians() - } else { - cellindexarray.to_coordinates() - } - .into_pyresult()?; - - with_pyarrow(|py, pyarrow| { - let arrays = [ - coordinate_arrays.lat.into_data().into_pyarrow(py)?, - coordinate_arrays.lng.into_data().into_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, ["lat", "lng"]))?; - Ok(table.to_object(py)) - }) + let outarrays: Vec = vec![ + Arc::new(coordinate_arrays.lat) as ArrayRef, + Arc::new(coordinate_arrays.lng), + ]; + + let schema = Arc::new(Schema::new(vec![ + Field::new("lat", outarrays[0].data_type().clone(), true), + Field::new("lng", outarrays[1].data_type().clone(), true), + ])); + + let rb = RecordBatch::try_new(schema.clone(), outarrays).into_pyresult()?; + Ok(PyTable::try_new(vec![rb], schema)?.to_arro3(py)?) } #[pyfunction] #[pyo3(signature = (latarray, lngarray, resolution, radians = false))] pub(crate) fn coordinates_to_cells( - latarray: &Bound, - lngarray: &Bound, + py: Python<'_>, + latarray: PyConcatedArray, + lngarray: PyConcatedArray, resolution: &Bound, radians: bool, -) -> PyResult { - let latarray: Float64Array = pyarray_to_native(latarray)?; - let lngarray: Float64Array = pyarray_to_native(lngarray)?; +) -> PyArrowResult { + let latarray: Float64Array = latarray.into_float64array()?; + let lngarray: Float64Array = lngarray.into_float64array()?; if lngarray.len() != latarray.len() { - return Err(PyValueError::new_err( - "latarray and lngarray must be of the same length", - )); + return Err( + PyValueError::new_err("latarray and lngarray must be of the same length").into(), + ); } let cells = if let Ok(resolution) = resolution.extract::() { let resolution = Resolution::try_from(resolution).into_pyresult()?; - latarray - .iter() - .zip(lngarray.iter()) - .map(|(lat, lng)| { - if let (Some(lat), Some(lng)) = (lat, lng) { - if radians { - LatLng::from_radians(lat, lng).into_pyresult() + py.allow_threads(|| { + latarray + .iter() + .zip(lngarray.iter()) + .map(|(lat, lng)| { + if let (Some(lat), Some(lng)) = (lat, lng) { + if radians { + LatLng::from_radians(lat, lng).into_pyresult() + } else { + LatLng::new(lat, lng).into_pyresult() + } + .map(|ll| Some(ll.to_cell(resolution))) } else { - LatLng::new(lat, lng).into_pyresult() + Ok(None) } - .map(|ll| Some(ll.to_cell(resolution))) - } else { - Ok(None) - } - }) - .collect::>()? + }) + .collect::>() + })? } else { - let resarray = ResolutionArray::try_from(pyarray_to_native::(resolution)?) - .into_pyresult()?; + let resarray = resolution + .extract::()? + .into_resolutionarray()?; if resarray.len() != latarray.len() { return Err(PyValueError::new_err( "resarray must be of the same length as the coordinate arrays", - )); + ) + .into()); } - multizip((latarray.iter(), lngarray.iter(), resarray.iter())) - .map(|(lat, lng, res)| { - if let (Some(lat), Some(lng), Some(res)) = (lat, lng, res) { - if radians { - LatLng::from_radians(lat, lng).into_pyresult() + py.allow_threads(|| { + multizip((latarray.iter(), lngarray.iter(), resarray.iter())) + .map(|(lat, lng, res)| { + if let (Some(lat), Some(lng), Some(res)) = (lat, lng, res) { + if radians { + LatLng::from_radians(lat, lng).into_pyresult() + } else { + LatLng::new(lat, lng).into_pyresult() + } + .map(|ll| Some(ll.to_cell(res))) } else { - LatLng::new(lat, lng).into_pyresult() + Ok(None) } - .map(|ll| Some(ll.to_cell(res))) - } else { - Ok(None) - } - }) - .collect::>()? + }) + .collect::>() + })? }; - Python::with_gil(|py| h3array_to_pyarray(cells, py)) + array_to_arro3( + py, + PrimitiveArray::from(cells), + DEFAULT_CELL_COLUMN_NAME, + true, + ) } #[pyfunction] #[pyo3(signature = (cellarray, radians = false, link_cells = false))] pub(crate) fn cells_to_wkb_polygons( - cellarray: &Bound, + py: Python<'_>, + cellarray: PyConcatedArray, radians: bool, link_cells: bool, -) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; let use_degrees = !radians; let out: WKBArray = if link_cells { - let geoms = cellindexarray - .iter() - .flatten() - .to_geom(use_degrees) - .into_pyresult()? - .0 - .into_iter() - .map(|poly| Some(geo_types::Geometry::from(poly))) - .collect::>(); - let mut builder = WKBBuilder::with_capacity(WKBCapacity::from_geometries( - geoms.iter().map(|v| v.as_ref()), - )); - builder.extend_from_iter(geoms.iter().map(|v| v.as_ref())); - builder.finish() + let out: Result, PyErr> = py.allow_threads(|| { + let geoms = cellindexarray + .iter() + .flatten() + .to_geom(use_degrees) + .into_pyresult()? + .0 + .into_iter() + .map(|poly| Some(geo_types::Geometry::from(poly))) + .collect::>(); + let mut builder = WKBBuilder::with_capacity(WKBCapacity::from_geometries( + geoms.iter().map(|v| v.as_ref()), + )); + builder.extend_from_iter(geoms.iter().map(|v| v.as_ref())); + Ok(builder.finish()) + }); + out? } else { - cellindexarray - .to_wkb_polygons(use_degrees) - .expect("wkbarray") + py.allow_threads(|| { + cellindexarray + .to_wkb_polygons(use_degrees) + .expect("wkbarray") + }) }; - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + array_to_arro3(py, out.into_inner(), WKB_NAME, true) } #[pyfunction] #[pyo3(signature = (cellarray, radians = false))] -pub(crate) fn cells_to_wkb_points(cellarray: &Bound, radians: bool) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)? - .to_wkb_points::(!radians) +pub(crate) fn cells_to_wkb_points( + py: Python<'_>, + cellarray: PyConcatedArray, + radians: bool, +) -> PyArrowResult { + let cellindexarray = cellarray.into_cellindexarray()?; + let out = py + .allow_threads(|| cellindexarray.to_wkb_points::(!radians)) .expect("wkbarray"); - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + array_to_arro3(py, out.into_inner(), WKB_NAME, true) } #[pyfunction] #[pyo3(signature = (vertexarray, radians = false))] pub(crate) fn vertexes_to_wkb_points( - vertexarray: &Bound, + py: Python<'_>, + vertexarray: PyConcatedArray, radians: bool, -) -> PyResult { - let out = pyarray_to_vertexindexarray(vertexarray)? - .to_wkb_points::(!radians) +) -> PyArrowResult { + let vertexindexarray = vertexarray.into_vertexindexarray()?; + let out = py + .allow_threads(|| vertexindexarray.to_wkb_points::(!radians)) .expect("wkbarray"); - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + array_to_arro3(py, out.into_inner(), WKB_NAME, true) } #[pyfunction] #[pyo3(signature = (array, radians = false))] pub(crate) fn directededges_to_wkb_linestrings( - array: &Bound, + py: Python<'_>, + array: PyConcatedArray, radians: bool, -) -> PyResult { - let out = pyarray_to_directededgeindexarray(array)? - .to_wkb_linestrings::(!radians) +) -> PyArrowResult { + let directededgesindexarray = array.into_directededgeindexarray()?; + let out = py + .allow_threads(|| directededgesindexarray.to_wkb_linestrings::(!radians)) .expect("wkbarray"); - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + array_to_arro3(py, out.into_inner(), WKB_NAME, true) } fn get_to_cells_options( @@ -325,60 +370,77 @@ fn get_to_cells_options( #[pyfunction] #[pyo3(signature = (array, resolution, containment_mode = None, compact = false, flatten = false))] pub(crate) fn wkb_to_cells( - array: &Bound, + py: Python<'_>, + array: PyConcatedArray, resolution: u8, containment_mode: Option, compact: bool, flatten: bool, -) -> PyResult { +) -> PyArrowResult { let options = get_to_cells_options(resolution, containment_mode, compact)?; - let array_ref = make_array(ArrayData::from_pyarrow_bound(array)?); - if let Some(binarray) = array_ref.as_any().downcast_ref::() { - generic_wkb_to_cells(binarray.clone(), flatten, &options) - } else if let Some(binarray) = array_ref.as_any().downcast_ref::() { - generic_wkb_to_cells(binarray.clone(), flatten, &options) + let array: PyArray = array.into(); + if let Some(binarray) = array.array().as_any().downcast_ref::() { + generic_wkb_to_cells(py, binarray.clone(), flatten, &options) + } else if let Some(binarray) = array.array().as_any().downcast_ref::() { + generic_wkb_to_cells(py, binarray.clone(), flatten, &options) } else { - Err(PyValueError::new_err( - "unsupported array type for WKB input", - )) + Err(PyValueError::new_err("unsupported array type for WKB input").into()) } } fn generic_wkb_to_cells( + py: Python<'_>, binarray: GenericBinaryArray, flatten: bool, options: &ToCellsOptions, -) -> PyResult { +) -> PyArrowResult { let wkbarray = WKBArray::new(binarray, Default::default()); if flatten { - let cells = wkbarray.to_cellindexarray(options).into_pyresult()?; + let cells = py + .allow_threads(|| wkbarray.to_cellindexarray(options)) + .into_pyresult()?; - Python::with_gil(|py| h3array_to_pyarray(cells, py)) + array_to_arro3( + py, + PrimitiveArray::from(cells), + DEFAULT_CELL_COLUMN_NAME, + true, + ) } else { - let listarray: GenericListArray = - wkbarray.to_celllistarray(options).into_pyresult()?.into(); - Python::with_gil(|py| listarray.into_data().to_pyarrow(py)) + let listarray: GenericListArray = py + .allow_threads(|| wkbarray.to_celllistarray(options)) + .into_pyresult()? + .into(); + array_to_arro3(py, listarray, DEFAULT_CELL_COLUMN_NAME, true) } } #[pyfunction] #[pyo3(signature = (obj, resolution, containment_mode = None, compact = false))] pub(crate) fn geometry_to_cells( + py: Python<'_>, obj: py_geo_interface::Geometry, resolution: u8, containment_mode: Option, compact: bool, -) -> PyResult { - if obj.0.is_empty() { - return Python::with_gil(|py| h3array_to_pyarray(CellIndexArray::new_null(0), py)); - } - let options = get_to_cells_options(resolution, containment_mode, compact)?; - let cellindexarray = CellIndexArray::from( - h3arrow::array::from_geo::geometry_to_cells(&obj.0, &options).into_pyresult()?, - ); - Python::with_gil(|py| h3array_to_pyarray(cellindexarray, py)) +) -> PyArrowResult { + let cellindexarray = if obj.0.is_empty() { + CellIndexArray::new_null(0) + } else { + let options = get_to_cells_options(resolution, containment_mode, compact)?; + CellIndexArray::from( + py.allow_threads(|| h3arrow::array::from_geo::geometry_to_cells(&obj.0, &options)) + .into_pyresult()?, + ) + }; + array_to_arro3( + py, + PrimitiveArray::from(cellindexarray), + DEFAULT_CELL_COLUMN_NAME, + true, + ) } pub fn init_vector_submodule(m: &Bound) -> PyResult<()> { diff --git a/h3ronpy/tests/arrow/test_vector.py b/h3ronpy/tests/arrow/test_vector.py index cfd7b65..dda7781 100644 --- a/h3ronpy/tests/arrow/test_vector.py +++ b/h3ronpy/tests/arrow/test_vector.py @@ -2,15 +2,16 @@ import pyarrow as pa import shapely from shapely.geometry import Point -from shapely import wkb +from shapely import from_wkb import h3.api.numpy_int as h3 +from arro3.core import Array, DataType def test_geometry_to_cells(): geom = shapely.Polygon(((0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0))) cells = geometry_to_cells(geom, 5, containment_mode=ContainmentMode.IntersectsBoundary) - assert isinstance(cells, pa.Array) - assert cells.type == pa.uint64() + assert isinstance(cells, Array) + assert cells.type == DataType.uint64() assert len(cells) > 10 @@ -39,7 +40,6 @@ def test_coordinate_values_are_not_equal_issue_58(): # Step 4: Decode the WKB point to a Shapely geometry for wkb_point in wkb_points: - assert isinstance(wkb_point, pa.Scalar) # Ensure it's a pyarrow Scalar - shapely_point = wkb.loads(wkb_point.as_buffer().to_pybytes()) + shapely_point = shapely.from_wkb(wkb_point.as_py()) assert int(lat) == int(shapely_point.y) assert int(lon) == int(shapely_point.x) diff --git a/h3ronpy/tests/pandas/test_resolution.py b/h3ronpy/tests/pandas/test_resolution.py index 3c32eef..279a1e6 100644 --- a/h3ronpy/tests/pandas/test_resolution.py +++ b/h3ronpy/tests/pandas/test_resolution.py @@ -1,12 +1,13 @@ import numpy as np import math import h3.api.numpy_int as h3 +from arro3.core import Array from h3ronpy.pandas import change_resolution, change_resolution_paired, cells_resolution def test_change_resolution_up(): - h3indexes = np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64) + h3indexes = Array.from_numpy(np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64)) out_res = 9 changed = change_resolution(h3indexes, out_res) assert changed.shape[0] == (int(math.pow(7, 4)) + 7) @@ -15,12 +16,12 @@ def test_change_resolution_up(): def test_change_resolution_paired_up(): - h3indexes = np.array( + h3indexes = Array.from_numpy(np.array( [ h3.geo_to_h3(10.3, 45.1, 8), ], dtype=np.uint64, - ) + )) out_res = 9 changed_df = change_resolution_paired(h3indexes, out_res) assert len(changed_df) == 7 @@ -30,7 +31,7 @@ def test_change_resolution_paired_up(): def test_change_resolution_down(): - h3indexes = np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64) + h3indexes = Array.from_numpy(np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64)) out_res = 4 changed = change_resolution(h3indexes, out_res) assert changed.shape[0] == 2 @@ -39,7 +40,7 @@ def test_change_resolution_down(): def test_cells_resolution(): - h3indexes = np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64) + h3indexes = Array.from_numpy(np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64)) res = cells_resolution(h3indexes) assert len(res) == 2 assert res[0] == 5 diff --git a/h3ronpy/tests/pandas/test_vector.py b/h3ronpy/tests/pandas/test_vector.py index d6d66e7..5148612 100644 --- a/h3ronpy/tests/pandas/test_vector.py +++ b/h3ronpy/tests/pandas/test_vector.py @@ -17,11 +17,11 @@ def test_cells_to_points(): - gs = cells_to_points( + gs = cells_to_points(pd.Series( [ 0x8009FFFFFFFFFFF, ] - ) + )) assert isinstance(gs, gpd.GeoSeries) assert gs.geom_type[0] == "Point" diff --git a/h3ronpy/tests/polars/test_compact.py b/h3ronpy/tests/polars/test_compact.py index fbd9176..de56994 100644 --- a/h3ronpy/tests/polars/test_compact.py +++ b/h3ronpy/tests/polars/test_compact.py @@ -3,9 +3,10 @@ from h3ronpy.polars import compact, change_resolution, uncompact import numpy as np import h3.api.numpy_int as h3 +from arro3.core import Array -def compact_to_one(expected_cell, input_cells, **kw): +def compact_to_one(expected_cell, input_cells: Array, **kw): compacted = compact(input_cells, **kw) assert len(compacted) == 1 assert compacted[0] == expected_cell @@ -14,12 +15,12 @@ def compact_to_one(expected_cell, input_cells, **kw): def test_compact(): cell = h3.geo_to_h3(10.3, 45.1, 8) h3indexes = change_resolution( - np.array( + pl.Series(np.array( [ cell, ], dtype=np.uint64, - ), + )), 10, ) compact_to_one(cell, h3indexes) @@ -28,12 +29,12 @@ def test_compact(): def test_compact_mixed_fail(): cell = h3.geo_to_h3(10.3, 45.1, 8) with pytest.raises(ValueError, match="heterogen"): - compact_to_one(cell, [cell, h3.h3_to_parent(cell, 4)]) + compact_to_one(cell, Array.from_numpy(np.array([cell, h3.h3_to_parent(cell, 4)]))) def test_compact_mixed(): cell = h3.geo_to_h3(10.3, 45.1, 8) - compact_to_one(cell, [cell, h3.geo_to_h3(10.3, 45.1, 9)], mixed_resolutions=True) + compact_to_one(cell, Array.from_numpy(np.array([cell, h3.geo_to_h3(10.3, 45.1, 9)])), mixed_resolutions=True) def test_uncompact(): diff --git a/h3ronpy/tests/polars/test_measure.py b/h3ronpy/tests/polars/test_measure.py index 9e1b000..861870a 100644 --- a/h3ronpy/tests/polars/test_measure.py +++ b/h3ronpy/tests/polars/test_measure.py @@ -5,14 +5,14 @@ def test_cells_area_km2(): - cells = np.array( + cells = pl.Series(np.array( [ h3.geo_to_h3(10.3, 45.1, 8), h3.geo_to_h3(10.3, 45.1, 5), h3.geo_to_h3(10.3, 45.1, 3), ], dtype=np.uint64, - ) + )) areas = cells_area_km2(cells) assert isinstance(areas, pl.Series) assert len(areas) == 3 diff --git a/h3ronpy/tests/polars/test_utf8.py b/h3ronpy/tests/polars/test_utf8.py index 2ee90e2..96540d2 100644 --- a/h3ronpy/tests/polars/test_utf8.py +++ b/h3ronpy/tests/polars/test_utf8.py @@ -20,7 +20,7 @@ def test_cells_parse_largeutf8(): def test_parse_cell_fail(): - strings = np.array( + strings = pl.Series( [ "invalid", ] @@ -30,7 +30,7 @@ def test_parse_cell_fail(): def test_parse_cell_set_invalid(): - strings = np.array( + strings = pl.Series( [ "invalid", ] @@ -41,9 +41,9 @@ def test_parse_cell_set_invalid(): def test_cells_valid(): - input = np.array( + input = pl.Series( [45, h3.geo_to_h3(45.5, 10.2, 5)], - dtype=np.uint64, + dtype=pl.datatypes.UInt64(), ) cells = cells_valid(input, booleanarray=False) assert len(cells) == 2 @@ -61,7 +61,7 @@ def test_cells_valid(): def test_cells_to_string(): - cells = np.array( + cells = pl.Series( [ h3.geo_to_h3(45.5, 10.2, 5), ]