Skip to content
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Other enhancements
- Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
- Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
- Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
-
- Many read/to_* functions, such as :meth:DataFrame.to_pickle and :func:read_csv, support forwarding compression arguments to lzma.LZMAFile (:issue:52979)

.. ---------------------------------------------------------------------------
.. _whatsnew_210.notable_bug_fixes:
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,10 @@
(otherwise no compression).
Set to ``None`` for no compression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'tar'``} and other
key-value pairs are forwarded to
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdCompressor`` or
``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for faster compression and to create
a reproducible gzip archive:
Expand All @@ -477,10 +477,10 @@
If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in.
Set to ``None`` for no decompression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'tar'``} and other
key-value pairs are forwarded to
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdDecompressor`` or
``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for Zstandard decompression using a
custom compression dictionary:
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,8 +825,10 @@ def get_handle(
elif compression == "xz":
# error: Argument 1 to "LZMAFile" has incompatible type "Union[str,
# BaseBuffer]"; expected "Optional[Union[Union[str, bytes, PathLike[str],
# PathLike[bytes]], IO[bytes]]]"
handle = get_lzma_file()(handle, ioargs.mode) # type: ignore[arg-type]
# PathLike[bytes]], IO[bytes]], None]"
handle = get_lzma_file()(
handle, ioargs.mode, **compression_args # type: ignore[arg-type]
)

# Zstd Compression
elif compression == "zstd":
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/io/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,24 @@ def test_gzip_compression_level(obj, method):
compressed_size_fast = os.path.getsize(path)
assert compressed_size_default < compressed_size_fast

@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
def test_xz_compression_level_read(obj, method):
with tm.ensure_clean() as path:
getattr(obj, method)(path, compression="xz")
compressed_size_default = os.path.getsize(path)
getattr(obj, method)(path, compression={"method": "xz", "preset": 1})
compressed_size_fast = os.path.getsize(path)
assert compressed_size_default < compressed_size_fast
if method=="to_csv":
pd.read_csv(compressed_size_fast)

@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
def test_xz_compression_invalid_args(obj, method):
with tm.ensure_clean() as path:
try:
getattr(obj, method)(path, compression={"method": "xz", "compresslevel": 9})
except Exception as e:
assert e != TypeError

@pytest.mark.parametrize(
"obj",
Expand All @@ -273,6 +291,7 @@ def test_bzip_compression_level(obj, method):
getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1})



@pytest.mark.parametrize(
"suffix,archive",
[
Expand Down