diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index defb91deb90..a6188b696d5 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -428,6 +428,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CChunkedArray" arrow::ChunkedArray": CChunkedArray(const vector[shared_ptr[CArray]]& arrays) + CChunkedArray(const vector[shared_ptr[CArray]]& arrays, + const shared_ptr[CDataType]& type) int64_t length() int64_t null_count() int num_chunks() diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 96ed55bc313..6e64d329099 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -191,6 +191,7 @@ def chunked_array(arrays, type=None): Array arr vector[shared_ptr[CArray]] c_arrays shared_ptr[CChunkedArray] sp_chunked_array + shared_ptr[CDataType] sp_data_type for x in arrays: if isinstance(x, Array): @@ -202,7 +203,14 @@ def chunked_array(arrays, type=None): c_arrays.push_back(arr.sp_array) - sp_chunked_array.reset(new CChunkedArray(c_arrays)) + if type: + sp_data_type = pyarrow_unwrap_data_type(type) + sp_chunked_array.reset(new CChunkedArray(c_arrays, sp_data_type)) + else: + if c_arrays.size() == 0: + raise ValueError("Cannot construct a chunked array with neither " + "arrays nor type") + sp_chunked_array.reset(new CChunkedArray(c_arrays)) return pyarrow_wrap_chunked_array(sp_chunked_array) diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 634a179279b..1df57bde37e 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -24,6 +24,14 @@ import pyarrow as pa +def test_chunked_array_basics(): + data = pa.chunked_array([], type=pa.string()) + assert data.to_pylist() == [] + + with pytest.raises(ValueError): + pa.chunked_array([]) + + def test_chunked_array_getitem(): data = [ pa.array([1, 2, 3]),