diff --git a/python/doc/source/development.rst b/python/doc/source/development.rst index b5aba6c53ef..55b3efdad17 100644 --- a/python/doc/source/development.rst +++ b/python/doc/source/development.rst @@ -84,7 +84,7 @@ from conda-forge: conda create -y -q -n pyarrow-dev \ python=3.6 numpy six setuptools cython pandas pytest \ cmake flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib \ - brotli jemalloc -c conda-forge + brotli jemalloc lz4-c zstd -c conda-forge source activate pyarrow-dev diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index bbe52033526..6081a64fd90 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -629,7 +629,10 @@ cdef class ParquetWriter: cdef CTable* ctable = table.table if row_group_size is None or row_group_size == -1: - row_group_size = ctable.num_rows() + if ctable.num_rows() > 0: + row_group_size = ctable.num_rows() + else: + row_group_size = 1 elif row_group_size == 0: raise ValueError('Row group size cannot be 0') diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 40e44b352ac..116a0450b1b 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -265,6 +265,18 @@ def test_read_pandas_column_subset(tmpdir): tm.assert_frame_equal(df[['strings', 'uint8']], df_read) +@parquet +def test_pandas_parquet_empty_roundtrip(tmpdir): + df = _test_dataframe(0) + arrow_table = pa.Table.from_pandas(df) + imos = pa.BufferOutputStream() + _write_table(arrow_table, imos, version="2.0") + buf = imos.get_result() + reader = pa.BufferReader(buf) + df_read = _read_table(reader).to_pandas() + tm.assert_frame_equal(df, df_read) + + @parquet def test_pandas_parquet_pyfile_roundtrip(tmpdir): filename = tmpdir.join('pandas_pyfile_roundtrip.parquet').strpath