diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 5d54551d7a8..be8e3634852 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1159,7 +1159,7 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( elif compression is not None: for column, codec in compression.iteritems(): check_compression_name(codec) - props.compression(column, compression_from_name(codec)) + props.compression(tobytes(column), compression_from_name(codec)) if isinstance(compression_level, int): props.compression_level(compression_level) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 37e104303af..ae3dd0b3094 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -838,6 +838,11 @@ def test_compression_level(use_legacy_dataset): compression_level=5, use_legacy_dataset=use_legacy_dataset) + # Check that the user can provide a compression per column + _check_roundtrip(table, expected=table, + compression={'a': "gzip", 'b': "snappy"}, + use_legacy_dataset=use_legacy_dataset) + # Check that the user can provide a compression level per column _check_roundtrip(table, expected=table, compression="gzip", compression_level={'a': 2, 'b': 3},