diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 12288bbda68c6..2a9850858ec05 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -3033,6 +3033,25 @@ def test_pandas_categorical_na_type_row_groups(): assert result[1].equals(table[1]) +@pytest.mark.pandas +def test_pandas_categorical_roundtrip(): + # ARROW-5480, this was enabled by ARROW-3246 + + # Have one of the categories unobserved and include a null (-1) + codes = np.array([2, 0, 0, 2, 0, -1, 2], dtype='int32') + categories = ['foo', 'bar', 'baz'] + df = pd.DataFrame({'x': pd.Categorical.from_codes( + codes, categories=categories)}) + + buf = pa.BufferOutputStream() + pq.write_table(pa.table(df), buf) + + result = pq.read_table(buf.getvalue()).to_pandas() + assert result.x.dtype == 'category' + assert (result.x.cat.categories == categories).all() + tm.assert_frame_equal(result, df) + + @pytest.mark.pandas def test_multi_dataset_metadata(tempdir): filenames = ["ARROW-1983-dataset.0", "ARROW-1983-dataset.1"]