diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 9ad29790377..66ab766ed7c 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -1234,6 +1234,7 @@ struct ValidateVisitor { } Status Visit(const StructArray& array) { + const auto& struct_type = checked_cast(*array.type()); if (array.num_fields() > 0) { // Validate fields int64_t array_length = array.field(0)->length(); @@ -1245,10 +1246,17 @@ struct ValidateVisitor { it->type()->ToString(), " at position [", idx, "]"); } + auto it_type = struct_type.child(i)->type(); + if (!it->type()->Equals(it_type)) { + return Status::Invalid("Child array at position [", idx, + "] does not match type field: ", it->type()->ToString(), + " vs ", it_type->ToString()); + } + const Status child_valid = it->Validate(); if (!child_valid.ok()) { return Status::Invalid("Child array invalid: ", child_valid.ToString(), - " at position [", idx, "}"); + " at position [", idx, "]"); } ++idx; } diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index e47327ff2d4..cec3c2b4e73 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1290,7 +1290,9 @@ cdef class UnionArray(Array): check_status(CUnionArray.MakeDense( deref(types.ap), deref(value_offsets.ap), c, c_field_names, c_type_codes, &out)) - return pyarrow_wrap_array(out) + cdef Array result = pyarrow_wrap_array(out) + result.validate() + return result @staticmethod def from_sparse(Array types, list children, list field_names=None, @@ -1328,7 +1330,9 @@ cdef class UnionArray(Array): c_field_names, c_type_codes, &out)) - return pyarrow_wrap_array(out) + cdef Array result = pyarrow_wrap_array(out) + result.validate() + return result cdef class StringArray(Array): @@ -1503,7 +1507,9 @@ cdef class DictionaryArray(Array): c_result.reset(new CDictionaryArray(c_type, _indices.sp_array, _dictionary.sp_array)) - return pyarrow_wrap_array(c_result) + cdef Array result = pyarrow_wrap_array(c_result) + result.validate() + return result cdef class StructArray(Array): @@ -1628,7 +1634,9 @@ cdef class StructArray(Array): else: c_result = CStructArray.MakeFromFields( c_arrays, c_fields, shared_ptr[CBuffer](), -1, 0) - return pyarrow_wrap_array(GetResultValue(c_result)) + cdef Array result = pyarrow_wrap_array(GetResultValue(c_result)) + result.validate() + return result cdef class ExtensionArray(Array): @@ -1667,7 +1675,9 @@ cdef class ExtensionArray(Array): "for extension type {1}".format(storage.type, typ)) ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array) - return pyarrow_wrap_array( ext_array) + cdef Array result = pyarrow_wrap_array( ext_array) + result.validate() + return result cdef dict _array_classes = { diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 9aa8a7c8b48..3cc96de5529 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -422,7 +422,7 @@ def test_struct_from_buffers(): def test_struct_from_arrays(): - a = pa.array([4, 5, 6]) + a = pa.array([4, 5, 6], type=pa.int64()) b = pa.array(["bar", None, ""]) c = pa.array([[1, 2], None, [3, None]]) expected_list = [ @@ -447,7 +447,7 @@ def test_struct_from_arrays(): # From fields fa = pa.field("a", a.type, nullable=False) fb = pa.field("b", b.type) - fc = pa.field("c", b.type) + fc = pa.field("c", c.type) arr = pa.StructArray.from_arrays([a, b, c], fields=[fa, fb, fc]) assert arr.type == pa.struct([fa, fb, fc]) assert not arr.type[0].nullable @@ -460,6 +460,11 @@ def test_struct_from_arrays(): assert arr.type == pa.struct([]) assert arr.to_pylist() == [] + # Inconsistent fields + fa2 = pa.field("a", pa.int32()) + with pytest.raises(ValueError, match="int64 vs int32"): + pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc]) + def test_dictionary_from_numpy(): indices = np.repeat([0, 1, 2], 2)