Skip to content

Commit

Permalink
Remove VLenUTF8 from filters to avoid double encoding error pydata/xa…
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed May 5, 2022
1 parent 6cf0ccb commit f6e71e9
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions sgkit/io/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Dict, MutableMapping, Optional, Union

import fsspec
import numcodecs
import xarray as xr
from xarray import Dataset

Expand Down Expand Up @@ -38,6 +39,15 @@ def save_dataset(
for v in ds:
# Workaround for https://github.com/pydata/xarray/issues/4380
ds[v].encoding.pop("chunks", None)

# Remove VLenUTF8 from filters to avoid double encoding error https://github.com/pydata/xarray/issues/3476
filters = ds[v].encoding.get("filters", None)
var_len_str_codec = numcodecs.VLenUTF8()
if filters is not None and var_len_str_codec in filters:
filters = list(filters)
filters.remove(var_len_str_codec)
ds[v].encoding["filters"] = filters

ds.to_zarr(store, **kwargs)


Expand Down

0 comments on commit f6e71e9

Please sign in to comment.