allow zlib to be an alias for gzip for h5netcdf backend #9983

hmaarrfk · 2025-01-24T16:49:27Z

I'm not too sure where to put this, but it seemed like you already had a compatibility shims for this kind of stuff:

Closes #xxxx
Tests added
User visible changes (including notable bug fixes) are documented in whats-new.rst
New functions/methods are listed in api.rst

hmaarrfk · 2025-01-24T17:08:10Z

Failing test:

============================================== test session starts ==============================================
platform linux -- Python 3.10.16, pytest-8.3.4, pluggy-1.5.0
PySide6 6.8.1 -- Qt runtime 6.8.1 -- Qt compiled 6.8.1
rootdir: /home/mark/git/xarray
configfile: pyproject.toml
plugins: anyio-4.8.0, timeout-2.3.1, xdist-3.6.1, time-machine-2.16.0, forked-1.6.0, hydra-core-1.3.2, qt-4.4.0, env-1.1.5
collected 1 item

xarray/tests/test_backends.py F                                                                           [100%]

=================================================== FAILURES ====================================================
________________________________ TestH5NetCDFData.test_compression_encoding_h5py ________________________________

self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7e38062dd150>

    def test_compression_encoding_h5py(self) -> None:
        ENCODINGS: tuple[tuple[dict[str, Any], dict[str, Any]], ...] = (
            # h5py style compression with gzip codec will be converted to
            # NetCDF4-Python style on round-trip
            (
                {"compression": "gzip", "compression_opts": 9},
                {"zlib": True, "complevel": 9},
            ),
            # For compatibility with libnetcdf4 we allow users to specify
            # zlib for gzip
            (
                {"compression": "zlib", "compression_opts": 9},
                {"zlib": True, "complevel": 9},
            ),
            # What can't be expressed in NetCDF4-Python style is
            # round-tripped unaltered
            (
                {"compression": "lzf", "compression_opts": None},
                {"compression": "lzf", "compression_opts": None},
            ),
            # If both styles are used together, h5py format takes precedence
            (
                {
                    "compression": "lzf",
                    "compression_opts": None,
                    "zlib": True,
                    "complevel": 9,
                },
                {"compression": "lzf", "compression_opts": None},
            ),
        )

        for compr_in, compr_out in ENCODINGS:
            data = create_test_data()
            compr_common = {
                "chunksizes": (5, 5),
                "fletcher32": True,
                "shuffle": True,
                "original_shape": data.var2.shape,
            }
            data["var2"].encoding.update(compr_in)
            data["var2"].encoding.update(compr_common)
            compr_out.update(compr_common)
            data["scalar"] = ("scalar_dim", np.array([2.0]))
            data["scalar"] = data["scalar"][0]
>           with self.roundtrip(data) as actual:

/home/mark/git/xarray/xarray/tests/test_backends.py:4064:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/mark/miniforge3/envs/dev/lib/python3.10/contextlib.py:135: in __enter__
    return next(self.gen)
/home/mark/git/xarray/xarray/tests/test_backends.py:373: in roundtrip
    self.save(data, path, **save_kwargs)
/home/mark/git/xarray/xarray/tests/test_backends.py:394: in save
    return dataset.to_netcdf(
/home/mark/git/xarray/xarray/core/dataset.py:2380: in to_netcdf
    return to_netcdf(  # type: ignore[return-value]  # mypy cannot resolve the overloads:(
/home/mark/git/xarray/xarray/backends/api.py:1908: in to_netcdf
    dump_to_store(
/home/mark/git/xarray/xarray/backends/api.py:1955: in dump_to_store
    store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
/home/mark/git/xarray/xarray/backends/common.py:458: in store
    self.set_variables(
/home/mark/git/xarray/xarray/backends/common.py:496: in set_variables
    target, source = self.prepare_variable(
/home/mark/git/xarray/xarray/backends/h5netcdf_.py:356: in prepare_variable
    nc4_var = self.ds.create_variable(
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5netcdf/core.py:1234: in create_variable
    return group._create_child_variable(
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5netcdf/core.py:1112: in _create_child_variable
    self._h5group.create_dataset(
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5py/_hl/group.py:183: in create_dataset
    dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5py/_hl/dataset.py:105: in make_new_dset
    dcpl = filters.fill_dcpl(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

plist = <h5py.h5p.PropDCID object at 0x7e3806225710>, shape = (8, 9), dtype = dtype('float64'), chunks = (5, 5)
compression = 'zlib', compression_opts = 9, shuffle = True, fletcher32 = True, maxshape = None
scaleoffset = None, external = None, allow_unknown_filter = False

    def fill_dcpl(plist, shape, dtype, chunks, compression, compression_opts,
                  shuffle, fletcher32, maxshape, scaleoffset, external,
                  allow_unknown_filter=False, *, fill_time=None):
        """ Generate a dataset creation property list.

        Undocumented and subject to change without warning.
        """

        if shape is None or shape == ():
            shapetype = 'Empty' if shape is None else 'Scalar'
            if any((chunks, compression, compression_opts, shuffle, fletcher32,
                    scaleoffset is not None)):
                raise TypeError(
                    f"{shapetype} datasets don't support chunk/filter options"
                )
            if maxshape and maxshape != ():
                raise TypeError(f"{shapetype} datasets cannot be extended")
            return h5p.create(h5p.DATASET_CREATE)

        def rq_tuple(tpl, name):
            """ Check if chunks/maxshape match dataset rank """
            if tpl in (None, True):
                return
            try:
                tpl = tuple(tpl)
            except TypeError:
                raise TypeError('"%s" argument must be None or a sequence object' % name)
            if len(tpl) != len(shape):
                raise ValueError('"%s" must have same rank as dataset shape' % name)

        rq_tuple(chunks, 'chunks')
        rq_tuple(maxshape, 'maxshape')

        if compression is not None:
            if isinstance(compression, FilterRefBase):
                compression_opts = compression.filter_options
                compression = compression.filter_id

            if compression not in encode and not isinstance(compression, int):
>               raise ValueError('Compression filter "%s" is unavailable' % compression)
E               ValueError: Compression filter "zlib" is unavailable

/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5py/_hl/filters.py:190: ValueError

hmaarrfk force-pushed the patch-5 branch from 46a7fb2 to 7021ba8 Compare January 24, 2025 17:07

allow zlib to be an alias for gzip

fc3b476

hmaarrfk force-pushed the patch-5 branch from 5baf20b to fc3b476 Compare January 24, 2025 17:08

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

allow zlib to be an alias for gzip for h5netcdf backend #9983

allow zlib to be an alias for gzip for h5netcdf backend #9983

hmaarrfk commented Jan 24, 2025

hmaarrfk commented Jan 24, 2025

allow zlib to be an alias for gzip for h5netcdf backend #9983

Are you sure you want to change the base?

allow zlib to be an alias for gzip for h5netcdf backend #9983

Conversation

hmaarrfk commented Jan 24, 2025

hmaarrfk commented Jan 24, 2025