Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow zlib to be an alias for gzip for h5netcdf backend #9983

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

hmaarrfk
Copy link
Contributor

I'm not too sure where to put this, but it seemed like you already had a compatibility shims for this kind of stuff:

xref: h5netcdf/h5netcdf#252

  • Closes #xxxx
  • Tests added
  • User visible changes (including notable bug fixes) are documented in whats-new.rst
  • New functions/methods are listed in api.rst

@hmaarrfk
Copy link
Contributor Author

Failing test:

============================================== test session starts ==============================================
platform linux -- Python 3.10.16, pytest-8.3.4, pluggy-1.5.0
PySide6 6.8.1 -- Qt runtime 6.8.1 -- Qt compiled 6.8.1
rootdir: /home/mark/git/xarray
configfile: pyproject.toml
plugins: anyio-4.8.0, timeout-2.3.1, xdist-3.6.1, time-machine-2.16.0, forked-1.6.0, hydra-core-1.3.2, qt-4.4.0, env-1.1.5
collected 1 item

xarray/tests/test_backends.py F                                                                           [100%]

=================================================== FAILURES ====================================================
________________________________ TestH5NetCDFData.test_compression_encoding_h5py ________________________________

self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7e38062dd150>

    def test_compression_encoding_h5py(self) -> None:
        ENCODINGS: tuple[tuple[dict[str, Any], dict[str, Any]], ...] = (
            # h5py style compression with gzip codec will be converted to
            # NetCDF4-Python style on round-trip
            (
                {"compression": "gzip", "compression_opts": 9},
                {"zlib": True, "complevel": 9},
            ),
            # For compatibility with libnetcdf4 we allow users to specify
            # zlib for gzip
            (
                {"compression": "zlib", "compression_opts": 9},
                {"zlib": True, "complevel": 9},
            ),
            # What can't be expressed in NetCDF4-Python style is
            # round-tripped unaltered
            (
                {"compression": "lzf", "compression_opts": None},
                {"compression": "lzf", "compression_opts": None},
            ),
            # If both styles are used together, h5py format takes precedence
            (
                {
                    "compression": "lzf",
                    "compression_opts": None,
                    "zlib": True,
                    "complevel": 9,
                },
                {"compression": "lzf", "compression_opts": None},
            ),
        )

        for compr_in, compr_out in ENCODINGS:
            data = create_test_data()
            compr_common = {
                "chunksizes": (5, 5),
                "fletcher32": True,
                "shuffle": True,
                "original_shape": data.var2.shape,
            }
            data["var2"].encoding.update(compr_in)
            data["var2"].encoding.update(compr_common)
            compr_out.update(compr_common)
            data["scalar"] = ("scalar_dim", np.array([2.0]))
            data["scalar"] = data["scalar"][0]
>           with self.roundtrip(data) as actual:

/home/mark/git/xarray/xarray/tests/test_backends.py:4064:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/mark/miniforge3/envs/dev/lib/python3.10/contextlib.py:135: in __enter__
    return next(self.gen)
/home/mark/git/xarray/xarray/tests/test_backends.py:373: in roundtrip
    self.save(data, path, **save_kwargs)
/home/mark/git/xarray/xarray/tests/test_backends.py:394: in save
    return dataset.to_netcdf(
/home/mark/git/xarray/xarray/core/dataset.py:2380: in to_netcdf
    return to_netcdf(  # type: ignore[return-value]  # mypy cannot resolve the overloads:(
/home/mark/git/xarray/xarray/backends/api.py:1908: in to_netcdf
    dump_to_store(
/home/mark/git/xarray/xarray/backends/api.py:1955: in dump_to_store
    store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
/home/mark/git/xarray/xarray/backends/common.py:458: in store
    self.set_variables(
/home/mark/git/xarray/xarray/backends/common.py:496: in set_variables
    target, source = self.prepare_variable(
/home/mark/git/xarray/xarray/backends/h5netcdf_.py:356: in prepare_variable
    nc4_var = self.ds.create_variable(
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5netcdf/core.py:1234: in create_variable
    return group._create_child_variable(
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5netcdf/core.py:1112: in _create_child_variable
    self._h5group.create_dataset(
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5py/_hl/group.py:183: in create_dataset
    dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5py/_hl/dataset.py:105: in make_new_dset
    dcpl = filters.fill_dcpl(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

plist = <h5py.h5p.PropDCID object at 0x7e3806225710>, shape = (8, 9), dtype = dtype('float64'), chunks = (5, 5)
compression = 'zlib', compression_opts = 9, shuffle = True, fletcher32 = True, maxshape = None
scaleoffset = None, external = None, allow_unknown_filter = False

    def fill_dcpl(plist, shape, dtype, chunks, compression, compression_opts,
                  shuffle, fletcher32, maxshape, scaleoffset, external,
                  allow_unknown_filter=False, *, fill_time=None):
        """ Generate a dataset creation property list.

        Undocumented and subject to change without warning.
        """

        if shape is None or shape == ():
            shapetype = 'Empty' if shape is None else 'Scalar'
            if any((chunks, compression, compression_opts, shuffle, fletcher32,
                    scaleoffset is not None)):
                raise TypeError(
                    f"{shapetype} datasets don't support chunk/filter options"
                )
            if maxshape and maxshape != ():
                raise TypeError(f"{shapetype} datasets cannot be extended")
            return h5p.create(h5p.DATASET_CREATE)

        def rq_tuple(tpl, name):
            """ Check if chunks/maxshape match dataset rank """
            if tpl in (None, True):
                return
            try:
                tpl = tuple(tpl)
            except TypeError:
                raise TypeError('"%s" argument must be None or a sequence object' % name)
            if len(tpl) != len(shape):
                raise ValueError('"%s" must have same rank as dataset shape' % name)

        rq_tuple(chunks, 'chunks')
        rq_tuple(maxshape, 'maxshape')

        if compression is not None:
            if isinstance(compression, FilterRefBase):
                compression_opts = compression.filter_options
                compression = compression.filter_id

            if compression not in encode and not isinstance(compression, int):
>               raise ValueError('Compression filter "%s" is unavailable' % compression)
E               ValueError: Compression filter "zlib" is unavailable

/home/mark/miniforge3/envs/dev/lib/python3.10/site-packages/h5py/_hl/filters.py:190: ValueError

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant