From a0bea98a1b38045af5a874bbe9447a3460c91bd5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 21 Dec 2017 10:23:02 -0800 Subject: [PATCH 01/25] move backend append logic to the prepare_variable methods --- xarray/backends/common.py | 14 +++----------- xarray/backends/h5netcdf_.py | 7 +++++-- xarray/backends/netCDF4_.py | 32 ++++++++++++++++++-------------- xarray/backends/netcdf3.py | 1 - xarray/backends/scipy_.py | 3 ++- xarray/backends/zarr.py | 7 +++++-- xarray/core/variable.py | 4 ---- 7 files changed, 33 insertions(+), 35 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index fd408877f87..c289d35fa2e 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -7,7 +7,6 @@ import traceback import contextlib from collections import Mapping -from distutils.version import LooseVersion from ..conventions import cf_encoder from ..core import indexing @@ -183,11 +182,7 @@ def add(self, source, target): def sync(self): if self.sources: import dask.array as da - import dask - if LooseVersion(dask.__version__) > LooseVersion('0.8.1'): - da.store(self.sources, self.targets, lock=self.lock) - else: - da.store(self.sources, self.targets) + da.store(self.sources, self.targets, lock=self.lock) self.sources = [] self.targets = [] @@ -232,11 +227,8 @@ def set_variables(self, variables, check_encoding_set, for vn, v in iteritems(variables): name = _encode_variable_name(vn) check = vn in check_encoding_set - if vn not in self.variables: - target, source = self.prepare_variable( - name, v, check, unlimited_dims=unlimited_dims) - else: - target, source = self.ds.variables[name], v.data + target, source = self.prepare_variable( + name, v, check, unlimited_dims=unlimited_dims) self.writer.add(source, target) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b4d2dc7e689..82abaade06a 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -156,8 +156,11 @@ def prepare_variable(self, name, variable, check_encoding=False, 'chunksizes', 'fletcher32']: if key in encoding: kwargs[key] = encoding[key] - nc4_var = self.ds.createVariable(name, dtype, variable.dims, - fill_value=fill_value, **kwargs) + if name not in self.ds.variables: + nc4_var = self.ds.createVariable(name, dtype, variable.dims, + fill_value=fill_value, **kwargs) + else: + nc4_var = self.ds.variables[name] for k, v in iteritems(attrs): nc4_var.setncattr(k, v) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 59e195b1c9a..d8aa33f35dc 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -352,20 +352,24 @@ def prepare_variable(self, name, variable, check_encoding=False, encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, unlimited_dims=unlimited_dims) - nc4_var = self.ds.createVariable( - varname=name, - datatype=datatype, - dimensions=variable.dims, - zlib=encoding.get('zlib', False), - complevel=encoding.get('complevel', 4), - shuffle=encoding.get('shuffle', True), - fletcher32=encoding.get('fletcher32', False), - contiguous=encoding.get('contiguous', False), - chunksizes=encoding.get('chunksizes'), - endian='native', - least_significant_digit=encoding.get('least_significant_digit'), - fill_value=fill_value) - _disable_auto_decode_variable(nc4_var) + if name in self.ds.variables: + nc4_var = self.ds.variables[name] + else: + nc4_var = self.ds.createVariable( + varname=name, + datatype=datatype, + dimensions=variable.dims, + zlib=encoding.get('zlib', False), + complevel=encoding.get('complevel', 4), + shuffle=encoding.get('shuffle', True), + fletcher32=encoding.get('fletcher32', False), + contiguous=encoding.get('contiguous', False), + chunksizes=encoding.get('chunksizes'), + endian='native', + least_significant_digit=encoding.get( + 'least_significant_digit'), + fill_value=fill_value) + _disable_auto_decode_variable(nc4_var) for k, v in iteritems(attrs): # set attributes one-by-one since netCDF4<1.0.10 can't handle diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 7194e06186f..7aa054bc119 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -6,7 +6,6 @@ import numpy as np from .. import conventions, Variable -from ..core import duck_array_ops from ..core.pycompat import basestring, unicode_type, OrderedDict diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 240b8f2ebaa..75d2de5e43b 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -196,7 +196,8 @@ def prepare_variable(self, name, variable, check_encoding=False, # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not not support # incremental writes. - self.ds.createVariable(name, data.dtype, variable.dims) + if name not in self.variables: + self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] for k, v in iteritems(variable.attrs): self._validate_attr_key(k) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 779d8d07886..30ea51811c4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -378,8 +378,11 @@ def prepare_variable(self, name, variable, check_encoding=False, # compressor='default', fill_value=0, order='C', store=None, # synchronizer=None, overwrite=False, path=None, chunk_store=None, # filters=None, cache_metadata=True, **kwargs) - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, - fill_value=fill_value, **encoding) + if name in self.ds: + zarr_array = self.ds[name] + else: + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, + fill_value=fill_value, **encoding) # decided not to explicity enumerate encoding options because we # risk overriding zarr's defaults (e.g. if we specificy # cache_metadata=None instead of True). Alternative is to have lots of diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 385ab2066cf..e3bead51a94 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,6 @@ from collections import defaultdict import functools import itertools -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -1392,9 +1391,6 @@ def quantile(self, q, dim=None, interpolation='linear'): raise TypeError("quantile does not work for arrays stored as dask " "arrays. Load the data via .compute() or .load() " "prior to calling this method.") - if LooseVersion(np.__version__) < LooseVersion('1.10.0'): - raise NotImplementedError( - 'quantile requres numpy version 1.10.0 or later') q = np.asarray(q, dtype=np.float64) From afdb254b74d01d4ed751a784e997fe579654de39 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 22 Dec 2017 11:38:46 -0700 Subject: [PATCH 02/25] deprecate variables/dimensions/attrs properties on AbstractWritableDataStore --- xarray/backends/common.py | 18 +++++------------- xarray/backends/scipy_.py | 4 ++-- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index c289d35fa2e..2f910456f54 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -132,24 +132,15 @@ def load(self): @property def variables(self): - # Because encoding/decoding might happen which may require both the - # attributes and the variables, and because a store may be updated - # we need to load both the attributes and variables - # anytime either one is requested. - variables, _ = self.load() - return variables + raise RuntimeError('using variables property is deprecated') @property def attrs(self): - # Because encoding/decoding might happen which may require both the - # attributes and the variables, and because a store may be updated - # we need to load both the attributes and variables - # anytime either one is requested. - _, attributes = self.load() - return attributes + raise RuntimeError('using attrs property is deprecated') @property def dimensions(self): + raise RuntimeError('using dimensions property is deprecated') return self.get_dimensions() def close(self): @@ -235,8 +226,9 @@ def set_variables(self, variables, check_encoding_set, def set_necessary_dimensions(self, variable, unlimited_dims=None): if unlimited_dims is None: unlimited_dims = set() + dims = self.get_dimensions() for d, l in zip(variable.dims, variable.shape): - if d not in self.dimensions: + if d not in dims: is_unlimited = d in unlimited_dims self.set_dimension(d, l, is_unlimited) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 75d2de5e43b..0994d8510b8 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -165,7 +165,7 @@ def get_encoding(self): def set_dimension(self, name, length, is_unlimited=False): with self.ensure_open(autoclose=False): - if name in self.dimensions: + if name in self.ds.dimensions: raise ValueError('%s does not support modifying dimensions' % type(self).__name__) dim_length = length if not is_unlimited else None @@ -196,7 +196,7 @@ def prepare_variable(self, name, variable, check_encoding=False, # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not not support # incremental writes. - if name not in self.variables: + if name not in self.ds.variables: self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] for k, v in iteritems(variable.attrs): From cc021508b090ce7b7ca05033b03e9260dfa2cb73 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 24 Dec 2017 12:23:34 -0700 Subject: [PATCH 03/25] warnings instead of errors for backend properties --- xarray/backends/common.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 2f910456f54..83753ced8f5 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -7,6 +7,7 @@ import traceback import contextlib from collections import Mapping +import warnings from ..conventions import cf_encoder from ..core import indexing @@ -132,15 +133,25 @@ def load(self): @property def variables(self): - raise RuntimeError('using variables property is deprecated') + warnings.warn('The ``variables`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) + variables, _ = self.load() + return variables @property def attrs(self): - raise RuntimeError('using attrs property is deprecated') + warnings.warn('The ``attrs`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) + _, attrs = self.load() + return attrs @property def dimensions(self): - raise RuntimeError('using dimensions property is deprecated') + warnings.warn('The ``dimensions`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) return self.get_dimensions() def close(self): From 86240cdc5b7dd4d473c934651c4e011b1e7b34e7 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 26 Dec 2017 11:40:48 -0800 Subject: [PATCH 04/25] use attrs.update when setting zarr attributes --- xarray/backends/zarr.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 30ea51811c4..84594602ad6 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -351,9 +351,11 @@ def set_dimension(self, name, length, is_unlimited=False): (self.ds.attrs[_DIMENSION_KEY], name, length)) self.ds.attrs[_DIMENSION_KEY][name] = length - def set_attribute(self, key, value): - _, attributes = _get_zarr_dims_and_attrs(self.ds, _DIMENSION_KEY) - attributes[key] = _encode_zarr_attr_value(value) + def set_attributes(self, attributes): + attrs = {} + for k, v in iteritems(attributes): + attrs[k] = _encode_zarr_attr_value(v) + self.ds.attrs.update(attrs) def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): From 9c89ef26476d535d978fbdae26b0466b3f37f171 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 26 Dec 2017 12:31:45 -0800 Subject: [PATCH 05/25] more performance improvements to attributes in zarr backend --- xarray/backends/zarr.py | 58 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 84594602ad6..3e03c36963f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -280,7 +280,7 @@ class ZarrStore(AbstractWritableDataStore): @classmethod def open_group(cls, store, mode='r', synchronizer=None, group=None, - writer=None): + writer=None, ): import zarr zarr_group = zarr.open_group(store=store, mode=mode, synchronizer=synchronizer, path=group) @@ -331,31 +331,45 @@ def get_variables(self): for k, v in self.ds.arrays()) def get_attrs(self): - _, attributes = _get_zarr_dims_and_attrs(self.ds, _DIMENSION_KEY) + attributes = HiddenKeyDict(self.ds.attrs, [_DIMENSION_KEY]) return _decode_zarr_attrs(attributes) def get_dimensions(self): - dimensions, _ = _get_zarr_dims_and_attrs(self.ds, _DIMENSION_KEY) + try: + dimensions = self.ds.attrs[_DIMENSION_KEY] + except KeyError: + raise KeyError("Zarr object is missing the attribute `%s`, which " + "is required for xarray to determine variable " + "dimensions." % (_DIMENSION_KEY)) return dimensions - def set_dimension(self, name, length, is_unlimited=False): - if is_unlimited: + # TODO: we need these checks one way or another + # def set_dimension(self, name, length, is_unlimited=False): + # # consistency check + # if name in self.ds.attrs[_DIMENSION_KEY]: + # if self.ds.attrs[_DIMENSION_KEY][name] != length: + # raise ValueError("Pre-existing array dimensions %r " + # "encoded in Zarr attributes are incompatible " + # "with newly specified dimension `%s`: %g" % + # (self.ds.attrs[_DIMENSION_KEY], name, length)) + # self.ds.attrs[_DIMENSION_KEY][name] = length + + def set_necessary_dimensions(self, variable, unlimited_dims=None): + if unlimited_dims is not None: raise NotImplementedError( "Zarr backend doesn't know how to handle unlimited dimensions") - # consistency check - if name in self.ds.attrs[_DIMENSION_KEY]: - if self.ds.attrs[_DIMENSION_KEY][name] != length: - raise ValueError("Pre-existing array dimensions %r " - "encoded in Zarr attributes are incompatible " - "with newly specified dimension `%s`: %g" % - (self.ds.attrs[_DIMENSION_KEY], name, length)) - self.ds.attrs[_DIMENSION_KEY][name] = length + dims = OrderedDict() + for d, l in zip(variable.dims, variable.shape): + # for now we're avoiding the checks in set_dimension to avoid + # hitting the remote dataset. + # TODO: fix this + dims[d] = l + self.ds.attrs[_DIMENSION_KEY].update(dims) def set_attributes(self, attributes): - attrs = {} - for k, v in iteritems(attributes): - attrs[k] = _encode_zarr_attr_value(v) - self.ds.attrs.update(attrs) + encoded_attrs = OrderedDict((k, _encode_zarr_attr_value(v)) + for k, v in iteritems(attributes)) + self.ds.attrs.update(encoded_attrs) def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): @@ -395,11 +409,13 @@ def prepare_variable(self, name, variable, check_encoding=False, # cache_metadata=encoding.get('cache_metadata')) # the magic for storing the hidden dimension data - zarr_array.attrs[_DIMENSION_KEY] = dims - _, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY) - + encoded_attrs = OrderedDict() + encoded_attrs[_DIMENSION_KEY] = dims for k, v in iteritems(attrs): - attributes[k] = _encode_zarr_attr_value(v) + encoded_attrs[k] = _encode_zarr_attr_value(v) + + # update all the attributes at once + zarr_array.attrs.update(encoded_attrs) return zarr_array, variable.data From d459c66b74ddd81bd674683c31665e4f6035cfe8 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 28 Dec 2017 09:10:10 -0800 Subject: [PATCH 06/25] fix typo --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 3e03c36963f..aba1853842e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -280,7 +280,7 @@ class ZarrStore(AbstractWritableDataStore): @classmethod def open_group(cls, store, mode='r', synchronizer=None, group=None, - writer=None, ): + writer=None): import zarr zarr_group = zarr.open_group(store=store, mode=mode, synchronizer=synchronizer, path=group) From 8f71b311d876d94b9ace0fcf383f20da62c401b2 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 1 Jan 2018 22:56:56 -0800 Subject: [PATCH 07/25] new set_dimensions method for writable data stores --- xarray/backends/common.py | 26 ++++++++++++++++++------ xarray/backends/h5netcdf_.py | 2 -- xarray/backends/netCDF4_.py | 2 -- xarray/backends/scipy_.py | 4 ---- xarray/backends/zarr.py | 38 +++++++++++++++--------------------- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 83753ced8f5..6451791a06b 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -6,10 +6,10 @@ import time import traceback import contextlib -from collections import Mapping +from collections import Mapping, OrderedDict import warnings -from ..conventions import cf_encoder +from ..conventions import cf_encoder, maybe_encode_as_char_array from ..core import indexing from ..core.utils import FrozenOrderedDict, NdimSizeLenMixin from ..core.pycompat import iteritems, dask_array_type @@ -216,7 +216,11 @@ def store_dataset(self, dataset): def store(self, variables, attributes, check_encoding_set=frozenset(), unlimited_dims=None): + # This seems out of place + variables = OrderedDict([(k, maybe_encode_as_char_array(v)) + for k, v in variables.items()]) self.set_attributes(attributes) + self.set_dimensions(variables, unlimited_dims=unlimited_dims) self.set_variables(variables, check_encoding_set, unlimited_dims=unlimited_dims) @@ -234,12 +238,22 @@ def set_variables(self, variables, check_encoding_set, self.writer.add(source, target) - def set_necessary_dimensions(self, variable, unlimited_dims=None): + def set_dimensions(self, variables, unlimited_dims=None): if unlimited_dims is None: unlimited_dims = set() - dims = self.get_dimensions() - for d, l in zip(variable.dims, variable.shape): - if d not in dims: + + existing_dims = self.get_dimensions() + + dims = {} + for v in variables.values(): + dims.update(dict(zip(v.dims, v.shape))) + + for d, l in dims.items(): + + if d in existing_dims and l != existing_dims[d]: + raise ValueError("Unable to update size for existing dimension" + "%r (%d != %d)" % (d, l, existing_dims[d])) + elif d not in existing_dims: is_unlimited = d in unlimited_dims self.set_dimension(d, l, is_unlimited) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 82abaade06a..9d32e7e6cb7 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -133,8 +133,6 @@ def prepare_variable(self, name, variable, check_encoding=False, attrs = variable.attrs.copy() variable, dtype = _nc4_values_and_dtype(variable) - self.set_necessary_dimensions(variable, unlimited_dims=unlimited_dims) - fill_value = attrs.pop('_FillValue', None) if dtype is str and fill_value is not None: raise NotImplementedError( diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d8aa33f35dc..d1e8e97eb64 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -334,8 +334,6 @@ def prepare_variable(self, name, variable, check_encoding=False, variable = encode_nc3_variable(variable) datatype = variable.dtype - self.set_necessary_dimensions(variable, unlimited_dims=unlimited_dims) - attrs = variable.attrs.copy() fill_value = attrs.pop('_FillValue', None) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 0994d8510b8..05124d8d59d 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -188,10 +188,6 @@ def prepare_variable(self, name, variable, check_encoding=False, raise ValueError('unexpected encoding for scipy backend: %r' % list(variable.encoding)) - if unlimited_dims is not None and len(unlimited_dims) > 1: - raise ValueError('NETCDF3 only supports one unlimited dimension') - self.set_necessary_dimensions(variable, unlimited_dims=unlimited_dims) - data = variable.data # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not not support diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 3e03c36963f..b809d0f3c03 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -343,28 +343,25 @@ def get_dimensions(self): "dimensions." % (_DIMENSION_KEY)) return dimensions - # TODO: we need these checks one way or another - # def set_dimension(self, name, length, is_unlimited=False): - # # consistency check - # if name in self.ds.attrs[_DIMENSION_KEY]: - # if self.ds.attrs[_DIMENSION_KEY][name] != length: - # raise ValueError("Pre-existing array dimensions %r " - # "encoded in Zarr attributes are incompatible " - # "with newly specified dimension `%s`: %g" % - # (self.ds.attrs[_DIMENSION_KEY], name, length)) - # self.ds.attrs[_DIMENSION_KEY][name] = length - - def set_necessary_dimensions(self, variable, unlimited_dims=None): + def set_dimensions(self, variables, unlimited_dims=None): if unlimited_dims is not None: raise NotImplementedError( "Zarr backend doesn't know how to handle unlimited dimensions") - dims = OrderedDict() - for d, l in zip(variable.dims, variable.shape): - # for now we're avoiding the checks in set_dimension to avoid - # hitting the remote dataset. - # TODO: fix this - dims[d] = l - self.ds.attrs[_DIMENSION_KEY].update(dims) + + existing_dims = self.get_dimensions() + + dims = {} + for v in variables.values: + dims.update(dict(zip(v.dims, v.shape))) + + update_dims = {} + for d, l in dims.items(): + if d in existing_dims and l != existing_dims[d]: + raise ValueError("Unable to update size for existing dimension" + "%r (%d != %d)" % (d, l, existing_dims[d])) + update_dims[d] = l + + self.ds.attrs[_DIMENSION_KEY].update(update_dims) def set_attributes(self, attributes): encoded_attrs = OrderedDict((k, _encode_zarr_attr_value(v)) @@ -379,9 +376,6 @@ def prepare_variable(self, name, variable, check_encoding=False, dtype = variable.dtype shape = variable.shape - # TODO: figure out how zarr should deal with unlimited dimensions - self.set_necessary_dimensions(variable, unlimited_dims=unlimited_dims) - fill_value = _ensure_valid_fill_value(attrs.pop('_FillValue', None), dtype) From 67fcd9287c64b8d19e6c4588bd33b441b585c1bf Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 2 Jan 2018 08:59:01 -0800 Subject: [PATCH 08/25] more fixes for zarr --- xarray/backends/zarr.py | 58 +++++++++-------------------------------- 1 file changed, 13 insertions(+), 45 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index fc80fc118a9..d4d424fab41 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -379,37 +379,27 @@ def prepare_variable(self, name, variable, check_encoding=False, fill_value = _ensure_valid_fill_value(attrs.pop('_FillValue', None), dtype) - # TODO: figure out what encoding is needed for zarr encoding = _extract_zarr_variable_encoding( variable, raise_on_invalid=check_encoding) - # arguments for zarr.create: - # zarr.creation.create(shape, chunks=None, dtype=None, - # compressor='default', fill_value=0, order='C', store=None, - # synchronizer=None, overwrite=False, path=None, chunk_store=None, - # filters=None, cache_metadata=True, **kwargs) - if name in self.ds: - zarr_array = self.ds[name] - else: - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, - fill_value=fill_value, **encoding) - # decided not to explicity enumerate encoding options because we - # risk overriding zarr's defaults (e.g. if we specificy - # cache_metadata=None instead of True). Alternative is to have lots of - # logic in _extract_zarr_variable encoding to duplicate zarr defaults. - # chunks=encoding.get('chunks'), - # compressor=encoding.get('compressor'), - # filters=encodings.get('filters'), - # cache_metadata=encoding.get('cache_metadata')) - - # the magic for storing the hidden dimension data encoded_attrs = OrderedDict() + # the magic for storing the hidden dimension data encoded_attrs[_DIMENSION_KEY] = dims for k, v in iteritems(attrs): encoded_attrs[k] = _encode_zarr_attr_value(v) - # update all the attributes at once - zarr_array.attrs.update(encoded_attrs) + if name in self.ds: + zarr_array = self.ds[name] + zarr_array.attrs.update(encoded_attrs) + else: + # arguments for zarr.create: + # zarr.creation.create(shape, chunks=None, dtype=None, + # compressor='default', fill_value=0, order='C', store=None, + # synchronizer=None, overwrite=False, path=None, chunk_store=None, + # filters=None, cache_metadata=True, **kwargs) + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, + fill_value=fill_value, **encoding) + zarr_array.attrs.put(encoded_attrs) return zarr_array, variable.data @@ -421,28 +411,6 @@ def store(self, variables, attributes, *args, **kwargs): # sync() and close() methods should not be needed with zarr -# from zarr docs - -# Zarr arrays can be used as either the source or sink for data in parallel -# computations. Both multi-threaded and multi-process parallelism are -# supported. The Python global interpreter lock (GIL) is released for both -# compression and decompression operations, so Zarr will not block other Python -# threads from running. -# -# A Zarr array can be read concurrently by multiple threads or processes. No -# synchronization (i.e., locking) is required for concurrent reads. -# -# A Zarr array can also be written to concurrently by multiple threads or -# processes. Some synchronization may be required, depending on the way the -# data is being written. - -# If each worker in a parallel computation is writing to a separate region of -# the array, and if region boundaries are perfectly aligned with chunk -# boundaries, then no synchronization is required. However, if region and chunk -# boundaries are not perfectly aligned, then synchronization is required to -# avoid two workers attempting to modify the same chunk at the same time. - - def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, decode_cf=True, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, From b38e1a6a0ee2b7b6396a1ddb7214f09b1bd34a62 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 2 Jan 2018 13:02:12 -0800 Subject: [PATCH 09/25] more tests for zarr and remove append logic for zarr --- xarray/backends/zarr.py | 23 ++++------------------- xarray/tests/test_backends.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d4d424fab41..fdac0945553 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -43,13 +43,8 @@ def _ensure_valid_fill_value(value, dtype): return _encode_zarr_attr_value(valid) -def _decode_zarr_attr_value(value): - return value - - def _decode_zarr_attrs(attrs): - return OrderedDict([(k, _decode_zarr_attr_value(v)) - for k, v in attrs.items()]) + return OrderedDict(attrs.asdict()) def _replace_slices_with_arrays(key, shape): @@ -388,18 +383,9 @@ def prepare_variable(self, name, variable, check_encoding=False, for k, v in iteritems(attrs): encoded_attrs[k] = _encode_zarr_attr_value(v) - if name in self.ds: - zarr_array = self.ds[name] - zarr_array.attrs.update(encoded_attrs) - else: - # arguments for zarr.create: - # zarr.creation.create(shape, chunks=None, dtype=None, - # compressor='default', fill_value=0, order='C', store=None, - # synchronizer=None, overwrite=False, path=None, chunk_store=None, - # filters=None, cache_metadata=True, **kwargs) - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, - fill_value=fill_value, **encoding) - zarr_array.attrs.put(encoded_attrs) + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, + fill_value=fill_value, **encoding) + zarr_array.attrs.put(encoded_attrs) return zarr_array, variable.data @@ -408,7 +394,6 @@ def store(self, variables, attributes, *args, **kwargs): for k, v in iteritems(variables)) AbstractWritableDataStore.store(self, new_vars, attributes, *args, **kwargs) - # sync() and close() methods should not be needed with zarr def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6b0cd59eb9e..f35a74bde50 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -725,6 +725,16 @@ def test_append_overwrite_values(self): with self.open(tmp_file) as actual: self.assertDatasetIdentical(data, actual) + def test_append_with_invalid_dim_raises(self): + data = create_test_data() + with create_tmp_file(allow_cleanup_failure=False) as tmp_file: + self.save(data, tmp_file, mode='w') + data['var9'] = data['var2'] * 3 + data = data.isel(dim1=slice(2, 6)) # modify one dimension + with raises_regex(ValueError, + 'Unable to update size for existing dimension'): + self.save(data, tmp_file, mode='a') + def test_vectorized_indexing(self): self._test_vectorized_indexing(vindex_support=False) From 47ba8b658dbb35ad4b24ea3a437acf94af6c0ddf Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 2 Jan 2018 13:02:12 -0800 Subject: [PATCH 10/25] more tests for zarr and remove append logic for zarr --- xarray/backends/zarr.py | 38 +++++++---------------------------- xarray/tests/test_backends.py | 10 +++++++++ 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d4d424fab41..c414696678f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -43,13 +43,8 @@ def _ensure_valid_fill_value(value, dtype): return _encode_zarr_attr_value(valid) -def _decode_zarr_attr_value(value): - return value - - def _decode_zarr_attrs(attrs): - return OrderedDict([(k, _decode_zarr_attr_value(v)) - for k, v in attrs.items()]) + return OrderedDict(attrs.asdict()) def _replace_slices_with_arrays(key, shape): @@ -348,25 +343,16 @@ def set_dimensions(self, variables, unlimited_dims=None): raise NotImplementedError( "Zarr backend doesn't know how to handle unlimited dimensions") - existing_dims = self.get_dimensions() - dims = {} - for v in variables.values: + for v in variables.values(): dims.update(dict(zip(v.dims, v.shape))) - update_dims = {} - for d, l in dims.items(): - if d in existing_dims and l != existing_dims[d]: - raise ValueError("Unable to update size for existing dimension" - "%r (%d != %d)" % (d, l, existing_dims[d])) - update_dims[d] = l - - self.ds.attrs[_DIMENSION_KEY].update(update_dims) + self.ds.attrs[_DIMENSION_KEY].update(dims) def set_attributes(self, attributes): encoded_attrs = OrderedDict((k, _encode_zarr_attr_value(v)) for k, v in iteritems(attributes)) - self.ds.attrs.update(encoded_attrs) + self.ds.attrs.put(encoded_attrs) def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): @@ -388,18 +374,9 @@ def prepare_variable(self, name, variable, check_encoding=False, for k, v in iteritems(attrs): encoded_attrs[k] = _encode_zarr_attr_value(v) - if name in self.ds: - zarr_array = self.ds[name] - zarr_array.attrs.update(encoded_attrs) - else: - # arguments for zarr.create: - # zarr.creation.create(shape, chunks=None, dtype=None, - # compressor='default', fill_value=0, order='C', store=None, - # synchronizer=None, overwrite=False, path=None, chunk_store=None, - # filters=None, cache_metadata=True, **kwargs) - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, - fill_value=fill_value, **encoding) - zarr_array.attrs.put(encoded_attrs) + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, + fill_value=fill_value, **encoding) + zarr_array.attrs.put(encoded_attrs) return zarr_array, variable.data @@ -408,7 +385,6 @@ def store(self, variables, attributes, *args, **kwargs): for k, v in iteritems(variables)) AbstractWritableDataStore.store(self, new_vars, attributes, *args, **kwargs) - # sync() and close() methods should not be needed with zarr def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6b0cd59eb9e..f35a74bde50 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -725,6 +725,16 @@ def test_append_overwrite_values(self): with self.open(tmp_file) as actual: self.assertDatasetIdentical(data, actual) + def test_append_with_invalid_dim_raises(self): + data = create_test_data() + with create_tmp_file(allow_cleanup_failure=False) as tmp_file: + self.save(data, tmp_file, mode='w') + data['var9'] = data['var2'] * 3 + data = data.isel(dim1=slice(2, 6)) # modify one dimension + with raises_regex(ValueError, + 'Unable to update size for existing dimension'): + self.save(data, tmp_file, mode='a') + def test_vectorized_indexing(self): self._test_vectorized_indexing(vindex_support=False) From 26b6bcb6460b66ba983550cf6d3e85dbc327546e Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 2 Jan 2018 13:45:57 -0800 Subject: [PATCH 11/25] a few more tweaks to zarr attrs --- xarray/backends/zarr.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c414696678f..eeea7102d89 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -292,9 +292,6 @@ def __init__(self, zarr_group, writer=None): raise KeyError("Zarr group can't be read by xarray because " "it is missing the `%s` attribute." % _DIMENSION_KEY) - else: - # initialize hidden dimension attribute - self.ds.attrs[_DIMENSION_KEY] = {} if writer is None: # by default, we should not need a lock for writing zarr because @@ -310,7 +307,7 @@ def open_store_variable(self, name, zarr_array): data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self)) dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY) - attributes = _decode_zarr_attrs(attributes) + attributes = _decode_zarr_attrs(attributes.asdict()) encoding = {'chunks': zarr_array.chunks, 'compressor': zarr_array.compressor, 'filters': zarr_array.filters} @@ -326,12 +323,12 @@ def get_variables(self): for k, v in self.ds.arrays()) def get_attrs(self): - attributes = HiddenKeyDict(self.ds.attrs, [_DIMENSION_KEY]) + attributes = HiddenKeyDict(self.ds.attrs.asdict(), [_DIMENSION_KEY]) return _decode_zarr_attrs(attributes) def get_dimensions(self): try: - dimensions = self.ds.attrs[_DIMENSION_KEY] + dimensions = self.ds.attrs[_DIMENSION_KEY].asdict() except KeyError: raise KeyError("Zarr object is missing the attribute `%s`, which " "is required for xarray to determine variable " @@ -347,7 +344,7 @@ def set_dimensions(self, variables, unlimited_dims=None): for v in variables.values(): dims.update(dict(zip(v.dims, v.shape))) - self.ds.attrs[_DIMENSION_KEY].update(dims) + self.ds.attrs.update({_DIMENSION_KEY: dims}) def set_attributes(self, attributes): encoded_attrs = OrderedDict((k, _encode_zarr_attr_value(v)) From b7681ae96044d95068303a2b6ae2e14de86764c7 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 4 Jan 2018 14:16:37 -0800 Subject: [PATCH 12/25] Add encode methods to writable data stores, fixes for Zarr tests --- xarray/backends/common.py | 35 +++++++++++++---- xarray/backends/h5netcdf_.py | 7 +++- xarray/backends/netCDF4_.py | 36 +++++++++++------ xarray/backends/scipy_.py | 5 ++- xarray/backends/zarr.py | 12 +++--- xarray/tests/test_backends.py | 74 ++++++++++++++++++++++++++++------- 6 files changed, 125 insertions(+), 44 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 6451791a06b..a2eefa3a325 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -9,7 +9,7 @@ from collections import Mapping, OrderedDict import warnings -from ..conventions import cf_encoder, maybe_encode_as_char_array +from ..conventions import cf_encoder from ..core import indexing from ..core.utils import FrozenOrderedDict, NdimSizeLenMixin from ..core.pycompat import iteritems, dask_array_type @@ -177,7 +177,7 @@ def add(self, source, target): else: try: target[...] = source - except TypeError: + except (TypeError, PermissionError): # workaround for GH: scipy/scipy#6880 target[:] = source @@ -195,6 +195,19 @@ def __init__(self, writer=None): writer = ArrayWriter() self.writer = writer + def encode(self, variables, attributes): + variables = OrderedDict([(k, self.encode_variable(v)) + for k, v in variables.items()]) + attributes = OrderedDict([(k, self.encode_attribute(v)) + for k, v in attributes.items()]) + return variables, attributes + + def encode_variable(self, v): + return v + + def encode_attribute(self, a): + return a + def set_dimension(self, d, l): # pragma: no cover raise NotImplementedError @@ -216,9 +229,8 @@ def store_dataset(self, dataset): def store(self, variables, attributes, check_encoding_set=frozenset(), unlimited_dims=None): - # This seems out of place - variables = OrderedDict([(k, maybe_encode_as_char_array(v)) - for k, v in variables.items()]) + variables, attributes = self.encode(variables, attributes) + self.set_attributes(attributes) self.set_dimensions(variables, unlimited_dims=unlimited_dims) self.set_variables(variables, check_encoding_set, @@ -260,11 +272,18 @@ def set_dimensions(self, variables, unlimited_dims=None): class WritableCFDataStore(AbstractWritableDataStore): - def store(self, variables, attributes, *args, **kwargs): + def encode(self, variables, attributes): # All NetCDF files get CF encoded by default, without this attempting # to write times, for example, would fail. - cf_variables, cf_attrs = cf_encoder(variables, attributes) - AbstractWritableDataStore.store(self, cf_variables, cf_attrs, + variables, attributes = cf_encoder(variables, attributes) + variables = OrderedDict([(k, self.encode_variable(v)) + for k, v in variables.items()]) + attributes = OrderedDict([(k, self.encode_attribute(v)) + for k, v in attributes.items()]) + return variables, attributes + + def store(self, variables, attributes, *args, **kwargs): + AbstractWritableDataStore.store(self, variables, attributes, *args, **kwargs) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 9d32e7e6cb7..b83fed76b13 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -9,7 +9,7 @@ from ..core.pycompat import iteritems, bytes_type, unicode_type, OrderedDict from .common import WritableCFDataStore, DataStorePickleMixin, find_root -from .netCDF4_ import (_nc4_group, _nc4_values_and_dtype, +from .netCDF4_ import (_nc4_group, _encode_nc4_variable, _get_datatype, _extract_nc4_variable_encoding, BaseNetCDF4Array) @@ -126,12 +126,15 @@ def set_attribute(self, key, value): with self.ensure_open(autoclose=False): self.ds.setncattr(key, value) + def encode_variable(self, variable): + return _encode_nc4_variable(variable) + def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): import h5py attrs = variable.attrs.copy() - variable, dtype = _nc4_values_and_dtype(variable) + dtype = _get_datatype(variable) fill_value = attrs.pop('_FillValue', None) if dtype is str and fill_value is not None: diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d1e8e97eb64..15ef12d3f89 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -74,19 +74,28 @@ def __getitem__(self, key): return data -def _nc4_values_and_dtype(var): +def _encode_nc4_variable(var): + if var.dtype.kind == 'S': + var = conventions.maybe_encode_as_char_array(var) + return var + + +def _get_datatype(var, nc_format='NETCDF4'): + if nc_format == 'NETCDF4': + datatype = _nc4_dtype(var) + else: + datatype = var.dtype + return datatype + + +def _nc4_dtype(var): if var.dtype.kind == 'U': dtype = str - elif var.dtype.kind == 'S': - # use character arrays instead of unicode, because unicode support in - # netCDF4 is still rather buggy - var = conventions.maybe_encode_as_char_array(var) - dtype = var.dtype - elif var.dtype.kind in ['i', 'u', 'f', 'c']: + elif var.dtype.kind in ['i', 'u', 'f', 'c', 'S']: dtype = var.dtype else: raise ValueError('cannot infer dtype for netCDF4 variable') - return var, dtype + return dtype def _nc4_group(ds, group, mode): @@ -324,16 +333,17 @@ def set_variables(self, *args, **kwargs): with self.ensure_open(autoclose=False): super(NetCDF4DataStore, self).set_variables(*args, **kwargs) - def prepare_variable(self, name, variable, check_encoding=False, - unlimited_dims=None): + def encode_variable(self, variable): variable = _force_native_endianness(variable) - if self.format == 'NETCDF4': - variable, datatype = _nc4_values_and_dtype(variable) + variable = _encode_nc4_variable(variable) else: variable = encode_nc3_variable(variable) - datatype = variable.dtype + return variable + def prepare_variable(self, name, variable, check_encoding=False, + unlimited_dims=None): + datatype = _get_datatype(variable, self.format) attrs = variable.attrs.copy() fill_value = attrs.pop('_FillValue', None) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 05124d8d59d..dba2e5672a2 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -181,9 +181,12 @@ def set_attribute(self, key, value): value = encode_nc3_attr_value(value) setattr(self.ds, key, value) + def encode_variable(self, variable): + variable = encode_nc3_variable(variable) + return variable + def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): - variable = encode_nc3_variable(variable) if check_encoding and variable.encoding: raise ValueError('unexpected encoding for scipy backend: %r' % list(variable.encoding)) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index eeea7102d89..ae890347ab9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -44,7 +44,7 @@ def _ensure_valid_fill_value(value, dtype): def _decode_zarr_attrs(attrs): - return OrderedDict(attrs.asdict()) + return OrderedDict(attrs) def _replace_slices_with_arrays(key, shape): @@ -307,7 +307,7 @@ def open_store_variable(self, name, zarr_array): data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self)) dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY) - attributes = _decode_zarr_attrs(attributes.asdict()) + attributes = _decode_zarr_attrs(attributes) encoding = {'chunks': zarr_array.chunks, 'compressor': zarr_array.compressor, 'filters': zarr_array.filters} @@ -351,6 +351,10 @@ def set_attributes(self, attributes): for k, v in iteritems(attributes)) self.ds.attrs.put(encoded_attrs) + def encode_variable(self, variable): + variable = encode_zarr_variable(variable) + return variable + def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): @@ -378,9 +382,7 @@ def prepare_variable(self, name, variable, check_encoding=False, return zarr_array, variable.data def store(self, variables, attributes, *args, **kwargs): - new_vars = OrderedDict((k, encode_zarr_variable(v, name=k)) - for k, v in iteritems(variables)) - AbstractWritableDataStore.store(self, new_vars, attributes, + AbstractWritableDataStore.store(self, variables, attributes, *args, **kwargs) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f35a74bde50..a1252a3cc27 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1102,12 +1102,26 @@ def create_store(self): with self.create_zarr_target() as store_target: yield backends.ZarrStore.open_group(store_target, mode='w') + def save(self, dataset, store_target, **kwargs): + dataset.to_zarr(store=store_target, **kwargs) + + @contextlib.contextmanager + def open(self, store_target, **kwargs): + with xr.open_zarr(store_target, **kwargs) as ds: + yield ds + @contextlib.contextmanager def roundtrip(self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False): with self.create_zarr_target() as store_target: - data.to_zarr(store=store_target, **save_kwargs) - yield xr.open_zarr(store_target, **open_kwargs) + self.save(data, store_target, **save_kwargs) + with self.open(store_target, **open_kwargs) as ds: + yield ds + + @contextlib.contextmanager + def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + pytest.skip("zarr backend does not support appending") def test_auto_chunk(self): original = create_test_data().chunk() @@ -1205,8 +1219,8 @@ def test_hidden_zarr_keys(self): # JSON only has a single array type, which maps to list in Python. # In contrast, dims in xarray is always a tuple. for var in expected.variables.keys(): - assert (zarr_group[var].attrs[self.DIMENSION_KEY] - == list(expected[var].dims)) + dims = zarr_group[var].attrs[self.DIMENSION_KEY] + assert dims == list(expected[var].dims) with xr.decode_cf(store) as actual: # make sure it is hidden @@ -1215,10 +1229,11 @@ def test_hidden_zarr_keys(self): assert self.DIMENSION_KEY not in expected[var].attrs # verify that the dataset fails to open if dimension key is missing - del zarr_group.attrs[self.DIMENSION_KEY] - with pytest.raises(KeyError): - with xr.decode_cf(store) as actual: - pass + # this is not passing because the store is not read only + # del zarr_group.attrs[self.DIMENSION_KEY] + # with pytest.raises(KeyError): + # with xr.decode_cf(store) as actual: + # pass # put it back and try removing from a variable zarr_group.attrs[self.DIMENSION_KEY] = {} @@ -1240,13 +1255,13 @@ def test_write_persistence_modes(self): # make sure overwriting works as expected with self.create_zarr_target() as store: - original.to_zarr(store) + self.save(original, store) # should overwrite with no error - original.to_zarr(store, mode='w') - actual = xr.open_zarr(store) - self.assertDatasetIdentical(original, actual) - with pytest.raises(ValueError): - original.to_zarr(store, mode='w-') + self.save(original, store, mode='w') + with self.open(store) as actual: + self.assertDatasetIdentical(original, actual) + with pytest.raises(KeyError): + self.save(original, store, mode='w-') # check that we can't use other persistence modes # TODO: reconsider whether other persistence modes should be supported @@ -1261,7 +1276,7 @@ def test_compressor_encoding(self): blosc_comp = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) save_kwargs = dict(encoding={'var1': {'compressor': blosc_comp}}) with self.roundtrip(original, save_kwargs=save_kwargs) as actual: - assert actual.var1.encoding['compressor'] == blosc_comp + assert repr(actual.var1.encoding['compressor']) == repr(blosc_comp) def test_group(self): original = create_test_data() @@ -1298,6 +1313,35 @@ def test_roundtrip_string_encoded_characters(self): def test_dataset_caching(self): super(CFEncodedDataTest, self).test_dataset_caching() + @pytest.mark.xfail(reason="Zarr stores can not be appended to") + def test_append_write(self): + super(CFEncodedDataTest, self).test_append_write() + + @pytest.mark.xfail(reason="Zarr stores can not be appended to") + def test_append_overwrite_values(self): + super(CFEncodedDataTest, self).test_append_overwrite_values() + + @pytest.mark.xfail(reason="Zarr stores can not be appended to") + def test_append_with_invalid_dim_raises(self): + super(CFEncodedDataTest, self).test_append_with_invalid_dim_raises() + + # zero-dim variables + @pytest.mark.xfail(reason="Zero-dimension variables are broken") + def test_zero_dimensional_variable(self): + super(CFEncodedDataTest, self).test_zero_dimensional_variable() + + @pytest.mark.xfail(reason="Zero-dimension variables are broken") + def test_roundtrip_timedelta_data(self): + super(CFEncodedDataTest, self).test_roundtrip_timedelta_data() + + @pytest.mark.xfail(reason="Zero-dimension variables are broken") + def test_roundtrip_datetime_data(self): + super(CFEncodedDataTest, self).test_roundtrip_datetime_data() + + @pytest.mark.xfail(reason="Zero-dimension variables are broken") + def test_roundtrip_coordinates_with_space(self): + super(CFEncodedDataTest, self).test_roundtrip_coordinates_with_space() + @requires_zarr class ZarrDictStoreTest(BaseZarrTest, TestCase): From e084e9e566d41c0b51c1eb70905186ab66d2147d Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 4 Jan 2018 21:54:40 -0800 Subject: [PATCH 13/25] fix for InMemoryDataStore --- xarray/backends/common.py | 5 ++++- xarray/backends/memory.py | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index a2eefa3a325..aa26c562ee2 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -96,6 +96,9 @@ def __getitem__(self, key): def __len__(self): return len(self.variables) + def get_dimensions(self): # pragma: no cover + raise NotImplementedError + def get_attrs(self): # pragma: no cover raise NotImplementedError @@ -177,7 +180,7 @@ def add(self, source, target): else: try: target[...] = source - except (TypeError, PermissionError): + except TypeError: # workaround for GH: scipy/scipy#6880 target[:] = source diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index f79e92439fe..0337c672a67 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -29,6 +29,13 @@ def get_attrs(self): def get_variables(self): return self._variables + def get_dimensions(self): + dims = OrderedDict() + for v in self._variables.values(): + for d, s in v.dims.items(): + dims[d] = s + return dims + def prepare_variable(self, k, v, *args, **kwargs): new_var = Variable(v.dims, np.empty_like(v), v.attrs) # we copy the variable and stuff all encodings in the @@ -41,6 +48,6 @@ def set_attribute(self, k, v): # copy to imitate writing to disk. self._attributes[k] = copy.deepcopy(v) - def set_dimension(self, d, l): + def set_dimension(self, d, l, unlimited_dims=None): # in this model, dimensions are accounted for in the variables pass From a6aeb369a5b5282f1a1df4f3240be85230f6ed05 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 5 Jan 2018 06:54:59 -0800 Subject: [PATCH 14/25] fix for unlimited dimensions Scipy Datastores --- xarray/backends/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index aa26c562ee2..170d2093f87 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -259,7 +259,7 @@ def set_dimensions(self, variables, unlimited_dims=None): existing_dims = self.get_dimensions() - dims = {} + dims = OrderedDict() for v in variables.values(): dims.update(dict(zip(v.dims, v.shape))) From 264b13f3b06c2034b39776211b577cfef05ed02a Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 5 Jan 2018 09:05:19 -0800 Subject: [PATCH 15/25] another patch for scipy --- xarray/backends/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 170d2093f87..807d067815e 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -260,6 +260,8 @@ def set_dimensions(self, variables, unlimited_dims=None): existing_dims = self.get_dimensions() dims = OrderedDict() + for v in unlimited_dims: # put unlimited_dims first + dims[v] = None for v in variables.values(): dims.update(dict(zip(v.dims, v.shape))) From 9c03bfcb5e99beb7e3b8483f7d6c83831ecdece6 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 5 Jan 2018 19:10:49 -0800 Subject: [PATCH 16/25] whatsnew --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 728e40d4409..9bda7b0cd70 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,8 +31,9 @@ Enhancements - Use ``pandas.Grouper`` class in xarray resample methods rather than the deprecated ``pandas.TimeGrouper`` class (:issue:`1766`). By `Joe Hamman `_. -- Support for using `Zarr`_ as storage layer for xarray. - By `Ryan Abernathey `_. +- Support for using `Zarr`_ as storage layer for xarray. (:issue:`1223`). + By `Ryan Abernathey `_ and + `Joe Hamman `_. - Experimental support for parsing ENVI metadata to coordinates and attributes in :py:func:`xarray.open_rasterio`. By `Matti Eskelinen `_. From c92020ab32fe480b78b46c5df5c06152dae1ac71 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 7 Jan 2018 13:48:38 -0800 Subject: [PATCH 17/25] ordereddict --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index ae890347ab9..1307b646365 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -340,7 +340,7 @@ def set_dimensions(self, variables, unlimited_dims=None): raise NotImplementedError( "Zarr backend doesn't know how to handle unlimited dimensions") - dims = {} + dims = OrderedDict() for v in variables.values(): dims.update(dict(zip(v.dims, v.shape))) From 18434f94bd3fd79e6405b88f0e960a1f6b8ba673 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 8 Jan 2018 22:11:18 -0800 Subject: [PATCH 18/25] address some of rabernats comments, in particular, this commit removes the _DIMENSION_KEY from the zarr_group.attrs --- xarray/backends/zarr.py | 37 ++++++++++++----------------------- xarray/tests/__init__.py | 2 +- xarray/tests/test_backends.py | 14 ++----------- 3 files changed, 15 insertions(+), 38 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 1307b646365..515d50a7eab 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -43,10 +43,6 @@ def _ensure_valid_fill_value(value, dtype): return _encode_zarr_attr_value(valid) -def _decode_zarr_attrs(attrs): - return OrderedDict(attrs) - - def _replace_slices_with_arrays(key, shape): """Replace slice objects in vindex with equivalent ndarray objects.""" num_slices = sum(1 for k in key if isinstance(k, slice)) @@ -287,12 +283,6 @@ def __init__(self, zarr_group, writer=None): self._synchronizer = self.ds.synchronizer self._group = self.ds.path - if _DIMENSION_KEY not in self.ds.attrs: - if self._read_only: - raise KeyError("Zarr group can't be read by xarray because " - "it is missing the `%s` attribute." % - _DIMENSION_KEY) - if writer is None: # by default, we should not need a lock for writing zarr because # we do not (yet) allow overlapping chunks during write @@ -307,7 +297,7 @@ def open_store_variable(self, name, zarr_array): data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self)) dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY) - attributes = _decode_zarr_attrs(attributes) + attributes = OrderedDict(attributes) encoding = {'chunks': zarr_array.chunks, 'compressor': zarr_array.compressor, 'filters': zarr_array.filters} @@ -323,16 +313,19 @@ def get_variables(self): for k, v in self.ds.arrays()) def get_attrs(self): - attributes = HiddenKeyDict(self.ds.attrs.asdict(), [_DIMENSION_KEY]) - return _decode_zarr_attrs(attributes) + attributes = OrderedDict(self.ds.attrs.asdict()) + return attributes def get_dimensions(self): - try: - dimensions = self.ds.attrs[_DIMENSION_KEY].asdict() - except KeyError: - raise KeyError("Zarr object is missing the attribute `%s`, which " - "is required for xarray to determine variable " - "dimensions." % (_DIMENSION_KEY)) + dimensions = OrderedDict() + for k, v in self.ds.arrays(): + try: + for d, s in zip(v.attrs[_DIMENSION_KEY], v.shape): + dimensions[d] = s + except KeyError: + raise KeyError("Zarr object is missing the attribute `%s`, " + "which is required for xarray to determine " + "variable dimensions." % (_DIMENSION_KEY)) return dimensions def set_dimensions(self, variables, unlimited_dims=None): @@ -340,12 +333,6 @@ def set_dimensions(self, variables, unlimited_dims=None): raise NotImplementedError( "Zarr backend doesn't know how to handle unlimited dimensions") - dims = OrderedDict() - for v in variables.values(): - dims.update(dict(zip(v.dims, v.shape))) - - self.ds.attrs.update({_DIMENSION_KEY: dims}) - def set_attributes(self, attributes): encoded_attrs = OrderedDict((k, _encode_zarr_attr_value(v)) for k, v in iteritems(attributes)) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 235c6e9e410..f475c9bc87b 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -71,7 +71,7 @@ def _importorskip(modname, minversion=None): has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') has_pathlib, requires_pathlib = _importorskip('pathlib') -has_zarr, requires_zarr = _importorskip('zarr', minversion='2.2.0') +has_zarr, requires_zarr = _importorskip('zarr', minversion='2.1.0') # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a1252a3cc27..28f5c4d4b53 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1212,9 +1212,6 @@ def test_hidden_zarr_keys(self): expected.dump_to_store(store) zarr_group = store.ds - # check that the global hidden attribute is present - assert self.DIMENSION_KEY in zarr_group.attrs - # check that a variable hidden attribute is present and correct # JSON only has a single array type, which maps to list in Python. # In contrast, dims in xarray is always a tuple. @@ -1224,19 +1221,12 @@ def test_hidden_zarr_keys(self): with xr.decode_cf(store) as actual: # make sure it is hidden - assert self.DIMENSION_KEY not in actual.attrs + # assert self.DIMENSION_KEY not in actual.attrs for var in expected.variables.keys(): assert self.DIMENSION_KEY not in expected[var].attrs - # verify that the dataset fails to open if dimension key is missing - # this is not passing because the store is not read only - # del zarr_group.attrs[self.DIMENSION_KEY] - # with pytest.raises(KeyError): - # with xr.decode_cf(store) as actual: - # pass - # put it back and try removing from a variable - zarr_group.attrs[self.DIMENSION_KEY] = {} + # zarr_group.attrs[self.DIMENSION_KEY] = {} del zarr_group.var2.attrs[self.DIMENSION_KEY] with pytest.raises(KeyError): with xr.decode_cf(store) as actual: From 9f89c7c4b436f9873da555577d7e7ff9944214e6 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 8 Jan 2018 22:16:22 -0800 Subject: [PATCH 19/25] stop skipping zero-dim zarr tests --- xarray/tests/test_backends.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 28f5c4d4b53..cb1397025dc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1315,23 +1315,6 @@ def test_append_overwrite_values(self): def test_append_with_invalid_dim_raises(self): super(CFEncodedDataTest, self).test_append_with_invalid_dim_raises() - # zero-dim variables - @pytest.mark.xfail(reason="Zero-dimension variables are broken") - def test_zero_dimensional_variable(self): - super(CFEncodedDataTest, self).test_zero_dimensional_variable() - - @pytest.mark.xfail(reason="Zero-dimension variables are broken") - def test_roundtrip_timedelta_data(self): - super(CFEncodedDataTest, self).test_roundtrip_timedelta_data() - - @pytest.mark.xfail(reason="Zero-dimension variables are broken") - def test_roundtrip_datetime_data(self): - super(CFEncodedDataTest, self).test_roundtrip_datetime_data() - - @pytest.mark.xfail(reason="Zero-dimension variables are broken") - def test_roundtrip_coordinates_with_space(self): - super(CFEncodedDataTest, self).test_roundtrip_coordinates_with_space() - @requires_zarr class ZarrDictStoreTest(BaseZarrTest, TestCase): From 3590d2895465d1489bf34d7b1c331b0b784fc8ef Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 9 Jan 2018 08:19:05 -0800 Subject: [PATCH 20/25] update minimum zarr version for tests --- xarray/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f475c9bc87b..0228a6c21b3 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -71,7 +71,7 @@ def _importorskip(modname, minversion=None): has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') has_pathlib, requires_pathlib = _importorskip('pathlib') -has_zarr, requires_zarr = _importorskip('zarr', minversion='2.1.0') +has_zarr, requires_zarr = _importorskip('zarr', minversion='2.2') # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 From 7ed6bf8dd5be5e382d877b730842166a30a569c5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 13 Jan 2018 14:06:12 -0800 Subject: [PATCH 21/25] cleanup and docs for zarr performance branch --- xarray/backends/common.py | 85 +++++++++++++++++++++++++++++++++-- xarray/backends/zarr.py | 14 ++++-- xarray/tests/test_backends.py | 2 - 3 files changed, 91 insertions(+), 10 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 807d067815e..19fe5fcb745 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -199,6 +199,22 @@ def __init__(self, writer=None): self.writer = writer def encode(self, variables, attributes): + """ + Encode the variables and attributes in this store + + Parameters + ---------- + variables : dict-like + Dictionary of key/value (variable name / xr.Variable) pairs + attributes : dict-like + Dictionary of key/value (attribute name / attribute) pairs + + Returns + ------- + variables : dict-like + attributes : dict-like + + """ variables = OrderedDict([(k, self.encode_variable(v)) for k, v in variables.items()]) attributes = OrderedDict([(k, self.encode_attribute(v)) @@ -206,9 +222,11 @@ def encode(self, variables, attributes): return variables, attributes def encode_variable(self, v): + """encode one variable""" return v def encode_attribute(self, a): + """encode one attribute""" return a def set_dimension(self, d, l): # pragma: no cover @@ -224,14 +242,36 @@ def sync(self): self.writer.sync() def store_dataset(self, dataset): - # in stores variables are all variables AND coordinates - # in xarray.Dataset variables are variables NOT coordinates, - # so here we pass the whole dataset in instead of doing - # dataset.variables + """ + in stores, variables are all variables AND coordinates + in xarray.Dataset variables are variables NOT coordinates, + so here we pass the whole dataset in instead of doing + dataset.variables + """ self.store(dataset, dataset.attrs) def store(self, variables, attributes, check_encoding_set=frozenset(), unlimited_dims=None): + """ + Top level method for putting data on this store, this method: + - encodes variables/attributes + - sets dimensions + - sets variables + + Parameters + ---------- + variables : dict-like + Dictionary of key/value (variable name / xr.Variable) pairs + attributes : dict-like + Dictionary of key/value (attribute name / attribute) pairs + check_encoding_set : list-like + List of variables that should be checked for invalid encoding + values + unlimited_dims : list-like + List of dimension names that should be treated as unlimited + dimensions. + """ + variables, attributes = self.encode(variables, attributes) self.set_attributes(attributes) @@ -240,11 +280,36 @@ def store(self, variables, attributes, check_encoding_set=frozenset(), unlimited_dims=unlimited_dims) def set_attributes(self, attributes): + """ + This provides a centralized method to set the dataset attributes on the + data store. + + Parameters + ---------- + attributes : dict-like + Dictionary of key/value (attribute name / attribute) pairs + """ for k, v in iteritems(attributes): self.set_attribute(k, v) def set_variables(self, variables, check_encoding_set, unlimited_dims=None): + """ + This provides a centralized method to set the variables on the data + store. + + Parameters + ---------- + variables : dict-like + Dictionary of key/value (variable name / xr.Variable) pairs + check_encoding_set : list-like + List of variables that should be checked for invalid encoding + values + unlimited_dims : list-like + List of dimension names that should be treated as unlimited + dimensions. + """ + for vn, v in iteritems(variables): name = _encode_variable_name(vn) check = vn in check_encoding_set @@ -254,6 +319,18 @@ def set_variables(self, variables, check_encoding_set, self.writer.add(source, target) def set_dimensions(self, variables, unlimited_dims=None): + """ + This provides a centralized method to set the dimensions on the data + store. + + Parameters + ---------- + variables : dict-like + Dictionary of key/value (variable name / xr.Variable) pairs + unlimited_dims : list-like + List of dimension names that should be treated as unlimited + dimensions. + """ if unlimited_dims is None: unlimited_dims = set() diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 570667f3f1c..d3a0a82bd63 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -322,7 +322,12 @@ def get_dimensions(self): for k, v in self.ds.arrays(): try: for d, s in zip(v.attrs[_DIMENSION_KEY], v.shape): + if d in dimensions and dimensions[d] != s: + raise ValueError( + 'found conflicting lengths for dimension %s ' + '(%d != %d)' % (d, s, dimensions[d])) dimensions[d] = s + except KeyError: raise KeyError("Zarr object is missing the attribute `%s`, " "which is required for xarray to determine " @@ -335,14 +340,15 @@ def set_dimensions(self, variables, unlimited_dims=None): "Zarr backend doesn't know how to handle unlimited dimensions") def set_attributes(self, attributes): - encoded_attrs = OrderedDict((k, _encode_zarr_attr_value(v)) - for k, v in iteritems(attributes)) - self.ds.attrs.put(encoded_attrs) + self.ds.attrs.put(attributes) def encode_variable(self, variable): variable = encode_zarr_variable(variable) return variable + def encode_attribute(self, a): + return _encode_zarr_attr_value(a) + def prepare_variable(self, name, variable, check_encoding=False, unlimited_dims=None): @@ -361,7 +367,7 @@ def prepare_variable(self, name, variable, check_encoding=False, # the magic for storing the hidden dimension data encoded_attrs[_DIMENSION_KEY] = dims for k, v in iteritems(attrs): - encoded_attrs[k] = _encode_zarr_attr_value(v) + encoded_attrs[k] = self.encode_attribute(v) zarr_array = self.ds.create(name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5c6c25dd386..5296492fc7c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1222,12 +1222,10 @@ def test_hidden_zarr_keys(self): with xr.decode_cf(store) as actual: # make sure it is hidden - # assert self.DIMENSION_KEY not in actual.attrs for var in expected.variables.keys(): assert self.DIMENSION_KEY not in expected[var].attrs # put it back and try removing from a variable - # zarr_group.attrs[self.DIMENSION_KEY] = {} del zarr_group.var2.attrs[self.DIMENSION_KEY] with pytest.raises(KeyError): with xr.decode_cf(store) as actual: From 3872da29cbb2c9b9cfa9fa334b9719a13e4e61de Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 13 Jan 2018 19:58:04 -0800 Subject: [PATCH 22/25] fix two failing tests when using zarr master --- xarray/tests/test_backends.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5296492fc7c..98067aa13fa 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1249,7 +1249,7 @@ def test_write_persistence_modes(self): self.save(original, store, mode='w') with self.open(store) as actual: self.assertDatasetIdentical(original, actual) - with pytest.raises(KeyError): + with pytest.raises(ValueError): self.save(original, store, mode='w-') # check that we can't use other persistence modes @@ -1273,10 +1273,6 @@ def test_group(self): with self.roundtrip(original, save_kwargs={'group': group}, open_kwargs={'group': group}) as actual: self.assertDatasetIdentical(original, actual) - with pytest.raises(KeyError): - with self.roundtrip(original, - save_kwargs={'group': group}) as actual: - self.assertDatasetIdentical(original, actual) # TODO: implement zarr object encoding and make these tests pass @pytest.mark.xfail(reason="Zarr object encoding not implemented") From c31decfa67eb9eb0f63ec5f1bcfddcbb377eaf97 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 14 Jan 2018 16:26:17 -0800 Subject: [PATCH 23/25] flake8 --- xarray/backends/common.py | 17 +++++++++-------- xarray/tests/__init__.py | 2 +- xarray/tests/test_backends.py | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 19fe5fcb745..636ec8b23b4 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -342,14 +342,15 @@ def set_dimensions(self, variables, unlimited_dims=None): for v in variables.values(): dims.update(dict(zip(v.dims, v.shape))) - for d, l in dims.items(): - - if d in existing_dims and l != existing_dims[d]: - raise ValueError("Unable to update size for existing dimension" - "%r (%d != %d)" % (d, l, existing_dims[d])) - elif d not in existing_dims: - is_unlimited = d in unlimited_dims - self.set_dimension(d, l, is_unlimited) + for dim, length in dims.items(): + + if dim in existing_dims and length != existing_dims[dim]: + raise ValueError( + "Unable to update size for existing dimension" + "%r (%d != %d)" % (dim, length, existing_dims[dim])) + elif dim not in existing_dims: + is_unlimited = dim in unlimited_dims + self.set_dimension(dim, length, is_unlimited) class WritableCFDataStore(AbstractWritableDataStore): diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 6320d7024dd..fc548ef655c 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -71,7 +71,7 @@ def _importorskip(modname, minversion=None): has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') has_pathlib, requires_pathlib = _importorskip('pathlib') -has_zarr, requires_zarr = _importorskip('zarr', minversion='2.2') +has_zarr, requires_zarr = _importorskip('zarr', minversion='2.1') has_np112, requires_np112 = _importorskip('numpy', minversion='1.12.0') # some special cases diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6f852ed91c2..32ab8438c18 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1225,7 +1225,7 @@ def test_hidden_zarr_keys(self): dims = zarr_group[var].attrs[self.DIMENSION_KEY] assert dims == list(expected[var].dims) - with xr.decode_cf(store) as actual: + with xr.decode_cf(store): # make sure it is hidden for var in expected.variables.keys(): assert self.DIMENSION_KEY not in expected[var].attrs @@ -1233,7 +1233,7 @@ def test_hidden_zarr_keys(self): # put it back and try removing from a variable del zarr_group.var2.attrs[self.DIMENSION_KEY] with pytest.raises(KeyError): - with xr.decode_cf(store) as actual: + with xr.decode_cf(store): pass def test_write_persistence_modes(self): From 189d2627ff8feeec72a62b0adf091a478465bfb8 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 14 Jan 2018 18:29:44 -0800 Subject: [PATCH 24/25] back to zarr 2.2 --- xarray/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index fc548ef655c..6320d7024dd 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -71,7 +71,7 @@ def _importorskip(modname, minversion=None): has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') has_pathlib, requires_pathlib = _importorskip('pathlib') -has_zarr, requires_zarr = _importorskip('zarr', minversion='2.1') +has_zarr, requires_zarr = _importorskip('zarr', minversion='2.2') has_np112, requires_np112 = _importorskip('numpy', minversion='1.12.0') # some special cases From 96996eff5fbbd07ea87003762bc27befd70bbf58 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 16 Jan 2018 09:26:14 -0700 Subject: [PATCH 25/25] remove extra store method --- xarray/backends/common.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 636ec8b23b4..157ee494067 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -343,7 +343,6 @@ def set_dimensions(self, variables, unlimited_dims=None): dims.update(dict(zip(v.dims, v.shape))) for dim, length in dims.items(): - if dim in existing_dims and length != existing_dims[dim]: raise ValueError( "Unable to update size for existing dimension" @@ -365,10 +364,6 @@ def encode(self, variables, attributes): for k, v in attributes.items()]) return variables, attributes - def store(self, variables, attributes, *args, **kwargs): - AbstractWritableDataStore.store(self, variables, attributes, - *args, **kwargs) - class DataStorePickleMixin(object): """Subclasses must define `ds`, `_opener` and `_mode` attributes.