Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cpp/src/arrow/types/list.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ class ListArray : public Array {

// Return a shared pointer in case the requestor desires to share ownership
// with this array.
const ArrayPtr& values() const {return values_;}
const std::shared_ptr<Array>& values() const {return values_;}

const std::shared_ptr<DataType>& value_type() const {
return values_->type();
}

const int32_t* offsets() const { return offsets_;}

Expand Down
6 changes: 5 additions & 1 deletion python/arrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@

from arrow.error import ArrowException

from arrow.scalar import ArrayValue, NA, Scalar
from arrow.scalar import (ArrayValue, Scalar, NA, NAType,
BooleanValue,
Int8Value, Int16Value, Int32Value, Int64Value,
UInt8Value, UInt16Value, UInt32Value, UInt64Value,
FloatValue, DoubleValue, ListValue, StringValue)

from arrow.schema import (null, bool_,
int8, int16, int32, int64,
Expand Down
1 change: 0 additions & 1 deletion python/arrow/array.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ cdef class Array:
DataType type

cdef init(self, const shared_ptr[CArray]& sp_array)
cdef _getitem(self, int i)


cdef class BooleanArray(Array):
Expand Down
17 changes: 8 additions & 9 deletions python/arrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ cimport arrow.includes.pyarrow as pyarrow
from arrow.compat import frombytes, tobytes
from arrow.error cimport check_status

cimport arrow.scalar as scalar
from arrow.scalar import NA

def total_allocated_bytes():
Expand Down Expand Up @@ -73,13 +74,7 @@ cdef class Array:
while key < 0:
key += len(self)

if self.ap.IsNull(key):
return NA
else:
return self._getitem(key)

cdef _getitem(self, int i):
raise NotImplementedError
return scalar.box_arrow_scalar(self.type, self.sp_array, key)

def slice(self, start, end):
pass
Expand Down Expand Up @@ -168,12 +163,16 @@ cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
return arr


def from_pylist(object list_obj, type=None):
def from_pylist(object list_obj, DataType type=None):
"""
Convert Python list to Arrow array
"""
cdef:
shared_ptr[CArray] sp_array

check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
if type is None:
check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
else:
raise NotImplementedError

return box_arrow_array(sp_array)
6 changes: 6 additions & 0 deletions python/arrow/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def dict_values(x):
range = xrange
long = long

def u(s):
return unicode(s, "unicode_escape")

def tobytes(o):
if isinstance(o, unicode):
return o.encode('utf8')
Expand All @@ -73,6 +76,9 @@ def dict_values(x):
from decimal import Decimal
range = range

def u(s):
return s

def tobytes(o):
if isinstance(o, str):
return o.encode('utf8')
Expand Down
36 changes: 32 additions & 4 deletions python/arrow/includes/arrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,41 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool IsNull(int i)

cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
pass
uint8_t Value(int i)

cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
pass
int8_t Value(int i)

cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
uint16_t Value(int i)

cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
int16_t Value(int i)

cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
uint32_t Value(int i)

cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
int32_t Value(int i)

cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
uint64_t Value(int i)

cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
int64_t Value(int i)

cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
float Value(int i)

cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
double Value(int i)

cdef cppclass CListArray" arrow::ListArray"(CArray):
pass
const int32_t* offsets()
int32_t offset(int i)
int32_t value_length(int i)
const shared_ptr[CArray]& values()
const shared_ptr[CDataType]& value_type()

cdef cppclass CStringArray" arrow::StringArray"(CListArray):
pass
c_string GetString(int i)
25 changes: 22 additions & 3 deletions python/arrow/scalar.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

from arrow.includes.common cimport *
from arrow.includes.arrow cimport CArray, CListArray
from arrow.includes.arrow cimport *

from arrow.schema cimport DataType

Expand All @@ -31,17 +31,36 @@ cdef class NAType(Scalar):

cdef class ArrayValue(Scalar):
cdef:
shared_ptr[CArray] array
shared_ptr[CArray] sp_array
int index

cdef void init(self, DataType type,
const shared_ptr[CArray]& sp_array, int index)

cdef void _set_array(self, const shared_ptr[CArray]& sp_array)


cdef class Int8Value(ArrayValue):
pass


cdef class ListValue(ArrayValue):
cdef class Int64Value(ArrayValue):
pass


cdef class ListValue(ArrayValue):
cdef readonly:
DataType value_type

cdef:
CListArray* ap

cdef _getitem(self, int i)


cdef class StringValue(ArrayValue):
pass

cdef object box_arrow_scalar(DataType type,
const shared_ptr[CArray]& sp_array,
int index)
165 changes: 165 additions & 0 deletions python/arrow/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,179 @@
# specific language governing permissions and limitations
# under the License.

from arrow.schema cimport DataType, box_data_type

from arrow.compat import frombytes
import arrow.schema as schema

NA = None

cdef class NAType(Scalar):

def __cinit__(self):
global NA
if NA is not None:
raise Exception('Cannot create multiple NAType instances')

self.type = schema.null()

def __repr__(self):
return 'NA'

def as_py(self):
return None

NA = NAType()

cdef class ArrayValue(Scalar):

cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
int index):
self.type = type
self.index = index
self._set_array(sp_array)

cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
self.sp_array = sp_array

def __repr__(self):
if hasattr(self, 'as_py'):
return repr(self.as_py())
else:
return Scalar.__repr__(self)


cdef class BooleanValue(ArrayValue):
pass


cdef class Int8Value(ArrayValue):

def as_py(self):
cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class UInt8Value(ArrayValue):

def as_py(self):
cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class Int16Value(ArrayValue):

def as_py(self):
cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class UInt16Value(ArrayValue):

def as_py(self):
cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class Int32Value(ArrayValue):

def as_py(self):
cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class UInt32Value(ArrayValue):

def as_py(self):
cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class Int64Value(ArrayValue):

def as_py(self):
cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class UInt64Value(ArrayValue):

def as_py(self):
cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
return ap.Value(self.index)


cdef class FloatValue(ArrayValue):

def as_py(self):
cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
return ap.Value(self.index)


cdef class DoubleValue(ArrayValue):

def as_py(self):
cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
return ap.Value(self.index)


cdef class StringValue(ArrayValue):

def as_py(self):
cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
return frombytes(ap.GetString(self.index))


cdef class ListValue(ArrayValue):

def __len__(self):
return self.ap.value_length(self.index)

def __getitem__(self, i):
return self._getitem(i)

cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
self.sp_array = sp_array
self.ap = <CListArray*> sp_array.get()
self.value_type = box_data_type(self.ap.value_type())

cdef _getitem(self, int i):
cdef int j = self.ap.offset(self.index) + i
return box_arrow_scalar(self.value_type, self.ap.values(), j)

def as_py(self):
cdef:
int j
list result = []

for j in range(len(self)):
result.append(self._getitem(j).as_py())

return result


cdef dict _scalar_classes = {
LogicalType_UINT8: Int8Value,
LogicalType_UINT16: Int16Value,
LogicalType_UINT32: Int32Value,
LogicalType_UINT64: Int64Value,
LogicalType_INT8: Int8Value,
LogicalType_INT16: Int16Value,
LogicalType_INT32: Int32Value,
LogicalType_INT64: Int64Value,
LogicalType_FLOAT: FloatValue,
LogicalType_DOUBLE: DoubleValue,
LogicalType_LIST: ListValue,
LogicalType_STRING: StringValue
}

cdef object box_arrow_scalar(DataType type,
const shared_ptr[CArray]& sp_array,
int index):
cdef ArrayValue val
if sp_array.get().IsNull(index):
return NA
else:
val = _scalar_classes[type.type.type]()
val.init(type, sp_array, index)
return val
2 changes: 2 additions & 0 deletions python/arrow/schema.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,5 @@ cdef class Schema:
cdef:
shared_ptr[CSchema] sp_schema
CSchema* schema

cdef DataType box_data_type(const shared_ptr[CDataType]& type)
Loading