Skip to content
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ Other enhancements
- Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
- :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
- Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
- Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
- Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
- Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_210.notable_bug_fixes:
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,10 @@
(otherwise no compression).
Set to ``None`` for no compression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'tar'``} and other
key-value pairs are forwarded to
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdCompressor`` or
``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for faster compression and to create
a reproducible gzip archive:
Expand All @@ -477,10 +477,10 @@
If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in.
Set to ``None`` for no decompression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'tar'``} and other
key-value pairs are forwarded to
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdDecompressor`` or
``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for Zstandard decompression using a
custom compression dictionary:
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,8 +825,10 @@ def get_handle(
elif compression == "xz":
# error: Argument 1 to "LZMAFile" has incompatible type "Union[str,
# BaseBuffer]"; expected "Optional[Union[Union[str, bytes, PathLike[str],
# PathLike[bytes]], IO[bytes]]]"
handle = get_lzma_file()(handle, ioargs.mode) # type: ignore[arg-type]
# PathLike[bytes]], IO[bytes]], None]"
handle = get_lzma_file()(
handle, ioargs.mode, **compression_args # type: ignore[arg-type]
)

# Zstd Compression
elif compression == "zstd":
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/io/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,28 @@ def test_gzip_compression_level(obj, method):
assert compressed_size_default < compressed_size_fast


@pytest.mark.parametrize(
"obj",
[
pd.DataFrame(
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
columns=["X", "Y", "Z"],
),
pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"),
],
)
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
def test_xz_compression_level_read(obj, method):
with tm.ensure_clean() as path:
getattr(obj, method)(path, compression="xz")
compressed_size_default = os.path.getsize(path)
getattr(obj, method)(path, compression={"method": "xz", "preset": 1})
compressed_size_fast = os.path.getsize(path)
assert compressed_size_default < compressed_size_fast
if method == "to_csv":
pd.read_csv(path, compression="xz")


@pytest.mark.parametrize(
"obj",
[
Expand Down