diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 9d5d3885be9..f7fd24db0b7 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -502,7 +502,6 @@ cdef class Array: def __iter__(self): for i in range(len(self)): yield self.getitem(i) - raise StopIteration def __repr__(self): from pyarrow.formatting import array_format @@ -526,8 +525,6 @@ cdef class Array: raise NotImplemented def __getitem__(self, key): - cdef Py_ssize_t n = len(self) - if PySlice_Check(key): return _normalize_slice(self, key) diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index a7e00b7ed5e..5418ddbd487 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -297,6 +297,7 @@ cdef class ChunkedArray: CChunkedArray* chunked_array cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array) + cdef getitem(self, int64_t i) cdef class Column: diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 8a58ff51857..06325a6445d 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -72,23 +72,29 @@ cdef class ChunkedArray: """ return self.chunked_array.null_count() - def __getitem__(self, key): - cdef: - int64_t item - int i + def __iter__(self): + for chunk in self.iterchunks(): + for item in chunk: + yield item + def __getitem__(self, key): if isinstance(key, slice): return _normalize_slice(self, key) elif isinstance(key, six.integer_types): - index = _normalize_index(key, self.chunked_array.length()) - for i in range(self.num_chunks): - if index < self.chunked_array.chunk(i).get().length(): - return self.chunk(i)[index] - else: - index -= self.chunked_array.chunk(i).get().length() + return self.getitem(key) else: raise TypeError("key must either be a slice or integer") + cdef getitem(self, int64_t i): + cdef int j + + index = _normalize_index(i, self.chunked_array.length()) + for j in range(self.num_chunks): + if index < self.chunked_array.chunk(j).get().length(): + return self.chunk(j)[index] + else: + index -= self.chunked_array.chunk(j).get().length() + def slice(self, offset=0, length=None): """ Compute zero-copy slice of this ChunkedArray diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 90679bf23d2..f30203c349a 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import collections import datetime import pytest import struct @@ -162,6 +163,15 @@ def test_array_slice(): assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop] +def test_array_iter(): + arr = pa.array(range(10)) + + for i, j in zip(range(10), arr): + assert i == j + + assert isinstance(arr, collections.Iterable) + + def test_struct_array_slice(): # ARROW-2311: slicing nested arrays needs special care ty = pa.struct([pa.field('a', pa.int8()), diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index a3f0711b3de..634a179279b 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from collections import OrderedDict +from collections import OrderedDict, Iterable import numpy as np from pandas.util.testing import assert_frame_equal import pandas as pd @@ -49,6 +49,21 @@ def test_chunked_array_getitem(): assert data_slice.to_pylist() == [] +def test_chunked_array_iter(): + data = [ + pa.array([0]), + pa.array([1, 2, 3]), + pa.array([4, 5, 6]), + pa.array([7, 8, 9]) + ] + arr = pa.chunked_array(data) + + for i, j in zip(range(10), arr): + assert i == j + + assert isinstance(arr, Iterable) + + def test_column_basics(): data = [ pa.array([-10, -5, 0, 5, 10])