From 620b3b8588bcd46e66f3e98115594c5cc64176d8 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 16 Aug 2019 13:52:29 -0500 Subject: [PATCH] Add unit test for ARROW-5480 --- python/pyarrow/tests/test_parquet.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 12288bbda68c6..756cd7f532921 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -3015,7 +3015,6 @@ def test_dictionary_array_automatically_read(): assert result.schema.metadata is None -@pytest.mark.pandas def test_pandas_categorical_na_type_row_groups(): # ARROW-5085 df = pd.DataFrame({"col": [None] * 100, "int": [1.0] * 100}) @@ -3033,6 +3032,20 @@ def test_pandas_categorical_na_type_row_groups(): assert result[1].equals(table[1]) +def test_categorical_roundtrip(): + # ARROW-5480, this was enabled by ARROW-3246 + from io import BytesIO + df = pd.DataFrame({'x': pd.Categorical(['a', 'a', 'b', 'b'])}) + + buf = BytesIO() + df.to_parquet(buf) + + # This reads back object, but I expected category + result = pd.read_parquet(BytesIO(buf.getvalue())) + assert result['x'].dtype == 'category' + tm.assert_frame_equal(result, df) + + @pytest.mark.pandas def test_multi_dataset_metadata(tempdir): filenames = ["ARROW-1983-dataset.0", "ARROW-1983-dataset.1"]