diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index 3950ceea90e..9034da7b7c1 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -51,7 +51,7 @@ cdef int check_flight_status(const CStatus& status) nogil except -1: detail = FlightStatusDetail.UnwrapStatus(status) if detail: with gil: - message = frombytes(status.message()) + message = frombytes(status.message(), safe=True) detail_msg = detail.get().extra_info() if detail.get().code() == CFlightStatusInternal: raise FlightInternalError(message, detail_msg) @@ -72,7 +72,7 @@ cdef int check_flight_status(const CStatus& status) nogil except -1: size_detail = FlightWriteSizeStatusDetail.UnwrapStatus(status) if size_detail: with gil: - message = frombytes(status.message()) + message = frombytes(status.message(), safe=True) raise FlightWriteSizeExceededError( message, size_detail.get().limit(), size_detail.get().actual()) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index b13fdb8b0f9..d89fd69afd0 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -177,7 +177,7 @@ cdef class ParquetLogicalType(_Weakrefable): self.type = type def __str__(self): - return frombytes(self.type.get().ToString()) + return frombytes(self.type.get().ToString(), safe=True) def to_json(self): return frombytes(self.type.get().ToJSON()) @@ -715,8 +715,9 @@ cdef class ParquetSchema(_Weakrefable): self.schema = container._metadata.schema() def __repr__(self): - return """{0} -{1}""".format(object.__repr__(self), frombytes(self.schema.ToString())) + return "{0}\n{1}".format( + object.__repr__(self), + frombytes(self.schema.ToString(), safe=True)) def __reduce__(self): return ParquetSchema, (self.parent,) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 4803899a5e2..e4bfc36c5ec 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -790,7 +790,7 @@ cdef class Array(_PandasConvertible): cdef c_string result with nogil: result = self.ap.Diff(deref(other.ap)) - return frombytes(result) + return frombytes(result, safe=True) def cast(self, object target_type, safe=True): """ @@ -992,7 +992,7 @@ cdef class Array(_PandasConvertible): ) ) - return frombytes(result) + return frombytes(result, safe=True) def format(self, **kwargs): import warnings diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index ebf5ba0dda1..0259bafc886 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -79,7 +79,7 @@ cdef class ChunkedArray(_PandasConvertible): ) ) - return frombytes(result) + return frombytes(result, safe=True) def format(self, **kwargs): import warnings diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index a642573eae1..032793a3041 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -707,3 +707,14 @@ def test_schema_merge(): with pytest.raises(pa.ArrowInvalid): pa.unify_schemas([b, d]) + + +def test_undecodable_metadata(): + # ARROW-10214: undecodable metadata shouldn't fail repr() + data1 = b'abcdef\xff\x00' + data2 = b'ghijkl\xff\x00' + schema = pa.schema( + [pa.field('ints', pa.int16(), metadata={'key': data1})], + metadata={'key': data2}) + assert 'abcdef' in str(schema) + assert 'ghijkl' in str(schema) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index ecfdaadff06..76a35aa797e 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -154,7 +154,7 @@ cdef class DataType(_Weakrefable): return self.type.layout().buffers.size() def __str__(self): - return frombytes(self.type.ToString()) + return frombytes(self.type.ToString(), safe=True) def __hash__(self): return hash(str(self)) @@ -888,7 +888,7 @@ cdef class KeyValueMetadata(_Metadata, Mapping): return str(self) def __str__(self): - return frombytes(self.metadata.ToString()) + return frombytes(self.metadata.ToString(), safe=True) def __eq__(self, other): try: @@ -1012,7 +1012,8 @@ cdef class Field(_Weakrefable): return field, (self.name, self.type, self.nullable, self.metadata) def __str__(self): - return 'pyarrow.Field<{0}>'.format(frombytes(self.field.ToString())) + return 'pyarrow.Field<{0}>'.format( + frombytes(self.field.ToString(), safe=True)) def __repr__(self): return self.__str__() @@ -1574,7 +1575,7 @@ cdef class Schema(_Weakrefable): ) ) - return frombytes(result) + return frombytes(result, safe=True) def _export_to_c(self, uintptr_t out_ptr): """