diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 6aeed0a1892..a2c6352bdda 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -3345,3 +3345,42 @@ def test_filter_before_validate_schema(tempdir): # read single file using filter table = pq.read_table(tempdir, filters=[[('A', '==', 0)]]) assert table.column('B').equals(pa.chunked_array([[1, 2, 3]])) + + +@pytest.mark.pandas +@pytest.mark.fastparquet +@pytest.mark.filterwarnings("ignore:RangeIndex:DeprecationWarning") +def test_fastparquet_cross_compatibility(tempdir): + fp = pytest.importorskip('fastparquet') + + df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(4.0, 7.0, dtype="float64"), + "d": [True, False, True], + "e": pd.date_range("20130101", periods=3), + "f": pd.Categorical(["a", "b", "a"]), + # fastparquet writes list as BYTE_ARRAY JSON, so no roundtrip + # "g": [[1, 2], None, [1, 2, 3]], + } + ) + table = pa.table(df) + + # Arrow -> fastparquet + file_arrow = str(tempdir / "cross_compat_arrow.parquet") + pq.write_table(table, file_arrow, compression=None) + + fp_file = fp.ParquetFile(file_arrow) + df_fp = fp_file.to_pandas() + tm.assert_frame_equal(df, df_fp) + + # Fastparquet -> arrow + file_fastparquet = str(tempdir / "cross_compat_fastparquet.parquet") + fp.write(file_fastparquet, df) + + table_fp = pq.read_pandas(file_fastparquet) + # for fastparquet written file, categoricals comes back as strings + # (no arrow schema in parquet metadata) + df['f'] = df['f'].astype(object) + tm.assert_frame_equal(table_fp.to_pandas(), df)