From e446f2a28ed461b0f4d8b272ac3395ed773432d4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 30 Jan 2017 17:36:23 -0500 Subject: [PATCH] Adjust to cleaned up pyarrow FileReader API, support multiple record batches in a stream --- python/pyspark/serializers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 15cbaa9b4ffb..a8c9e6d162e6 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -192,9 +192,9 @@ def dumps(self, obj): raise NotImplementedError def loads(self, obj): - from pyarrow.ipc import ArrowFileReader - reader = ArrowFileReader(obj) - return reader.get_record_batch(0) + from pyarrow import FileReader, BufferReader + reader = FileReader(BufferReader(obj)) + return reader.read_all() def __repr__(self): return "ArrowSerializer"