Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,7 @@ struct ValidateVisitor {
}

Status Visit(const StructArray& array) {
const auto& struct_type = checked_cast<const StructType&>(*array.type());
if (array.num_fields() > 0) {
// Validate fields
int64_t array_length = array.field(0)->length();
Expand All @@ -1245,10 +1246,17 @@ struct ValidateVisitor {
it->type()->ToString(), " at position [", idx, "]");
}

auto it_type = struct_type.child(i)->type();
if (!it->type()->Equals(it_type)) {
return Status::Invalid("Child array at position [", idx,
"] does not match type field: ", it->type()->ToString(),
" vs ", it_type->ToString());
}

const Status child_valid = it->Validate();
if (!child_valid.ok()) {
return Status::Invalid("Child array invalid: ", child_valid.ToString(),
" at position [", idx, "}");
" at position [", idx, "]");
}
++idx;
}
Expand Down
20 changes: 15 additions & 5 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1290,7 +1290,9 @@ cdef class UnionArray(Array):
check_status(CUnionArray.MakeDense(
deref(types.ap), deref(value_offsets.ap), c, c_field_names,
c_type_codes, &out))
return pyarrow_wrap_array(out)
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the Array.from_.. did such validation in the end, and some not (like StructArray.from_arrays, which I was fixing here), so updated this now for all from_.. constructors

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to add some constructor microbenchmarks just so we understand the cost involved, but in principle this is okay


@staticmethod
def from_sparse(Array types, list children, list field_names=None,
Expand Down Expand Up @@ -1328,7 +1330,9 @@ cdef class UnionArray(Array):
c_field_names,
c_type_codes,
&out))
return pyarrow_wrap_array(out)
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result


cdef class StringArray(Array):
Expand Down Expand Up @@ -1503,7 +1507,9 @@ cdef class DictionaryArray(Array):
c_result.reset(new CDictionaryArray(c_type, _indices.sp_array,
_dictionary.sp_array))

return pyarrow_wrap_array(c_result)
cdef Array result = pyarrow_wrap_array(c_result)
result.validate()
return result


cdef class StructArray(Array):
Expand Down Expand Up @@ -1628,7 +1634,9 @@ cdef class StructArray(Array):
else:
c_result = CStructArray.MakeFromFields(
c_arrays, c_fields, shared_ptr[CBuffer](), -1, 0)
return pyarrow_wrap_array(GetResultValue(c_result))
cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
result.validate()
return result


cdef class ExtensionArray(Array):
Expand Down Expand Up @@ -1667,7 +1675,9 @@ cdef class ExtensionArray(Array):
"for extension type {1}".format(storage.type, typ))

ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array)
return pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
cdef Array result = pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
result.validate()
return result


cdef dict _array_classes = {
Expand Down
9 changes: 7 additions & 2 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def test_struct_from_buffers():


def test_struct_from_arrays():
a = pa.array([4, 5, 6])
a = pa.array([4, 5, 6], type=pa.int64())
b = pa.array(["bar", None, ""])
c = pa.array([[1, 2], None, [3, None]])
expected_list = [
Expand All @@ -447,7 +447,7 @@ def test_struct_from_arrays():
# From fields
fa = pa.field("a", a.type, nullable=False)
fb = pa.field("b", b.type)
fc = pa.field("c", b.type)
fc = pa.field("c", c.type)
arr = pa.StructArray.from_arrays([a, b, c], fields=[fa, fb, fc])
assert arr.type == pa.struct([fa, fb, fc])
assert not arr.type[0].nullable
Expand All @@ -460,6 +460,11 @@ def test_struct_from_arrays():
assert arr.type == pa.struct([])
assert arr.to_pylist() == []

# Inconsistent fields
fa2 = pa.field("a", pa.int32())
with pytest.raises(ValueError, match="int64 vs int32"):
pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc])


def test_dictionary_from_numpy():
indices = np.repeat([0, 1, 2], 2)
Expand Down