diff --git a/ci/Minimum b/ci/Minimum index 9da32f05e72..fc1b1c54533 100644 --- a/ci/Minimum +++ b/ci/Minimum @@ -2,7 +2,7 @@ matplotlib==2.1.0 numpy==1.16.0 scipy==1.0.0 pint==0.10.1 -xarray==0.13.0 +xarray==0.14.1 traitlets==4.3.0 pooch==0.1 pandas==0.22.0 diff --git a/docs/installguide.rst b/docs/installguide.rst index 25209f4ab61..601d3bc5e37 100644 --- a/docs/installguide.rst +++ b/docs/installguide.rst @@ -13,7 +13,7 @@ years. For Python itself, that means supporting the last two minor releases. * scipy >= 1.0.0 * pint >= 0.10.1 * pandas >= 0.22.0 -* xarray >= 0.13.0 +* xarray >= 0.14.1 * traitlets >= 4.3.0 * pooch >= 0.1 diff --git a/setup.cfg b/setup.cfg index 9e577cb9bff..ee759f068b6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,7 +43,7 @@ install_requires = numpy>=1.16.0 scipy>=1.0 pint>=0.10.1 - xarray>=0.13.0 + xarray>=0.14.1 pooch>=0.1 traitlets>=4.3.0 pandas>=0.22.0 diff --git a/src/metpy/xarray.py b/src/metpy/xarray.py index f190f10d100..3d1f18b1c4e 100644 --- a/src/metpy/xarray.py +++ b/src/metpy/xarray.py @@ -140,18 +140,37 @@ def magnitude(self): @property def unit_array(self): - """Return the data values of this DataArray as a `pint.Quantity`.""" + """Return the data values of this DataArray as a `pint.Quantity`. + + Notes + ----- + If not already existing as a `pint.Quantity` or Dask array, the data of this DataArray + will be loaded into memory by this operation. + """ if isinstance(self._data_array.data, units.Quantity): return self._data_array.data else: - return units.Quantity(self._data_array.values, self.units) + return units.Quantity(self._data_array.data, self.units) def convert_units(self, units): - """Return new DataArray with values converted to different units.""" + """Return new DataArray with values converted to different units. + + Notes + ----- + Any cached/lazy-loaded data (except that in a Dask array) will be loaded into memory + by this operation. Do not utilize on moderate- to large-sized remote datasets before + subsetting! + """ return self.quantify().copy(data=self.unit_array.to(units)) def convert_coordinate_units(self, coord, units): - """Return new DataArray with coordinate converted to different units.""" + """Return new DataArray with coordinate converted to different units. + + Notes + ----- + Any cached/lazy-loaded coordinate data (except that in a Dask array) will be loaded + into memory by this operation. + """ new_coord_var = self._data_array[coord].copy( data=self._data_array[coord].metpy.unit_array.m_as(units) ) @@ -159,7 +178,14 @@ def convert_coordinate_units(self, coord, units): return self._data_array.assign_coords(coords={coord: new_coord_var}) def quantify(self): - """Return a DataArray with the data converted to a `pint.Quantity`.""" + """Return a DataArray with the data converted to a `pint.Quantity`. + + Notes + ----- + Any cached/lazy-loaded data (except that in a Dask array) will be loaded into memory + by this operation. Do not utilize on moderate- to large-sized remote datasets before + subsetting! + """ if ( not isinstance(self._data_array.data, units.Quantity) and np.issubdtype(self._data_array.data.dtype, np.number) @@ -203,14 +229,17 @@ def cartopy_globe(self): def _fixup_coordinate_map(self, coord_map): """Ensure sure we have coordinate variables in map, not coordinate names.""" + new_coord_map = {} for axis in coord_map: if coord_map[axis] is not None and not isinstance(coord_map[axis], xr.DataArray): - coord_map[axis] = self._data_array[coord_map[axis]] + new_coord_map[axis] = self._data_array[coord_map[axis]] + else: + new_coord_map[axis] = coord_map[axis] - return coord_map + return new_coord_map def assign_coordinates(self, coordinates): - """Assign the given coordinates to the given MetPy axis types. + """Return new DataArray with given coordinates assigned to the given MetPy axis types. Parameters ---------- @@ -221,18 +250,32 @@ def assign_coordinates(self, coordinates): which will trigger reparsing of all coordinates on next access. """ + coord_updates = {} if coordinates: # Assign the _metpy_axis attributes according to supplied mapping coordinates = self._fixup_coordinate_map(coordinates) for axis in coordinates: if coordinates[axis] is not None: - _assign_axis(coordinates[axis].attrs, axis) + coord_updates[coordinates[axis].name] = ( + coordinates[axis].assign_attrs( + _assign_axis(coordinates[axis].attrs.copy(), axis) + ) + ) else: # Clear _metpy_axis attribute on all coordinates - for coord_var in self._data_array.coords.values(): - coord_var.attrs.pop('_metpy_axis', None) + for coord_name, coord_var in self._data_array.coords.items(): + coord_updates[coord_name] = coord_var.copy(deep=False) - return self._data_array # allow method chaining + # Some coordinates remained linked in old form under other coordinates. We + # need to remove from these. + sub_coords = coord_updates[coord_name].coords + for sub_coord in sub_coords: + coord_updates[coord_name].coords[sub_coord].attrs.pop('_metpy_axis', None) + + # Now we can remove the _metpy_axis attr from the coordinate itself + coord_updates[coord_name].attrs.pop('_metpy_axis', None) + + return self._data_array.assign_coords(coord_updates) def _generate_coordinate_map(self): """Generate a coordinate map via CF conventions and other methods.""" @@ -291,6 +334,11 @@ def _metpy_axis_search(self, metpy_axis): return coord_var # Opportunistically parse all coordinates, and assign if not already assigned + # Note: since this is generally called by way of the coordinate properties, to cache + # the coordinate parsing results in coord_map on the coordinates means modifying the + # DataArray in-place (an exception to the usual behavior of MetPy's accessor). This is + # considered safe because it only effects the "_metpy_axis" attribute on the + # coordinates, and nothing else. coord_map = self._generate_coordinate_map() for axis, coord_var in coord_map.items(): if (coord_var is not None @@ -625,7 +673,7 @@ def parse_cf(self, varname=None, coordinates=None): # Assign coordinates if the coordinates argument is given if coordinates is not None: - var.metpy.assign_coordinates(coordinates) + var = var.metpy.assign_coordinates(coordinates) # Attempt to build the crs coordinate crs = None @@ -658,7 +706,7 @@ def _has_coord(coord_type): var = self._rebuild_coords(var, crs) if crs is not None: var = var.assign_coords(coords={'crs': crs}) - return var.metpy.quantify() + return var def _rebuild_coords(self, var, crs): """Clean up the units on the coordinate variables.""" @@ -814,7 +862,7 @@ def assign_y_x(self, force=False, tolerance=None): return self._dataset.assign_coords(**{y.name: y, x.name: x}) def update_attribute(self, attribute, mapping): - """Update attribute of all Dataset variables. + """Return new Dataset with specified attribute updated on all Dataset variables. Parameters ---------- @@ -829,24 +877,41 @@ def update_attribute(self, attribute, mapping): Returns ------- `xarray.Dataset` - Dataset with attribute updated (modified in place, and returned to allow method - chaining) + New Dataset with attribute updated """ # Make mapping uniform - if callable(mapping): - mapping_func = mapping - else: - def mapping_func(varname, **kwargs): - return mapping.get(varname, None) + if not callable(mapping): + old_mapping = mapping + + def mapping(varname, **kwargs): + return old_mapping.get(varname, None) - # Apply across all variables - for varname in list(self._dataset.data_vars) + list(self._dataset.coords): - value = mapping_func(varname, **self._dataset[varname].attrs) - if value is not None: - self._dataset[varname].attrs[attribute] = value + # Define mapping function for Dataset.map + def mapping_func(da): + new_value = mapping(da.name, **da.attrs) + if new_value is None: + return da + else: + return da.assign_attrs(**{attribute: new_value}) + + # Apply across all variables and coordinates + return ( + self._dataset + .map(mapping_func, keep_attrs=True) + .assign_coords({ + coord_name: mapping_func(coord_var) + for coord_name, coord_var in self._dataset.coords.items() + }) + ) - return self._dataset + def quantify(self): + """Return new dataset with all numeric variables quantified and cached data loaded.""" + return self._dataset.map(lambda da: da.metpy.quantify(), keep_attrs=True) + + def dequantify(self): + """Return new dataset with variables cast to magnitude and units on attribute.""" + return self._dataset.map(lambda da: da.metpy.dequantify(), keep_attrs=True) def _assign_axis(attributes, axis): diff --git a/tests/test_xarray.py b/tests/test_xarray.py index 60cde770898..e2b79c0460f 100644 --- a/tests/test_xarray.py +++ b/tests/test_xarray.py @@ -151,13 +151,37 @@ def test_quantify(test_ds_generic): np.testing.assert_array_almost_equal(result.data, units.Quantity(original)) -def test_dequantify(test_var): +def test_dequantify(): """Test dequantify method for converting data away from Quantity.""" - original = test_var.data - result = test_var.metpy.dequantify() + original = xr.DataArray(units.Quantity([280, 290, 300], 'K')) + result = original.metpy.dequantify() assert isinstance(result.data, np.ndarray) assert result.attrs['units'] == 'kelvin' - np.testing.assert_array_almost_equal(result.data, original.magnitude) + np.testing.assert_array_almost_equal(result.data, original.data.magnitude) + + +def test_dataset_quantify(test_ds_generic): + """Test quantify method for converting data to Quantity on Datasets.""" + result = test_ds_generic.metpy.quantify() + assert isinstance(result['test'].data, units.Quantity) + assert result['test'].data.units == units.dimensionless + assert 'units' not in result['test'].attrs + np.testing.assert_array_almost_equal( + result['test'].data, + units.Quantity(test_ds_generic['test'].data) + ) + + +def test_dataset_dequantify(): + """Test dequantify method for converting data away from Quantity on Datasets.""" + original = xr.Dataset({ + 'test': ('x', units.Quantity([280, 290, 300], 'K')), + 'x': np.arange(3) + }) + result = original.metpy.dequantify() + assert isinstance(result['test'].data, np.ndarray) + assert result['test'].attrs['units'] == 'kelvin' + np.testing.assert_array_almost_equal(result['test'].data, original['test'].data.magnitude) def test_radian_projection_coords(): @@ -278,7 +302,7 @@ def test_assign_coordinates_not_overwrite(test_ds_generic): """Test that assign_coordinates does not overwrite past axis attributes.""" data = test_ds_generic.copy() data['c'].attrs['axis'] = 'X' - data['test'].metpy.assign_coordinates({'y': data['c']}) + data['test'] = data['test'].metpy.assign_coordinates({'y': data['c']}) assert data['c'].identical(data['test'].metpy.y) assert data['c'].attrs['axis'] == 'X' @@ -598,9 +622,12 @@ def test_data_array_sel_dict_with_units(test_var): def test_data_array_sel_kwargs_with_units(test_var): """Test .sel on the metpy accessor with kwargs and axis type.""" truth = test_var.loc[:, 500.][..., 122] - selection = test_var.metpy.sel(vertical=5e4 * units.Pa, x=-16.569 * units.km, - tolerance=1., method='nearest') - selection.metpy.assign_coordinates(None) # truth was not parsed for coordinates + selection = ( + test_var.metpy + .sel(vertical=5e4 * units.Pa, x=-16.569 * units.km, tolerance=1., method='nearest') + .metpy + .assign_coordinates(None) + ) assert truth.identical(selection) @@ -959,13 +986,19 @@ def test_update_attribute_dictionary(test_ds_generic): 'test': 'Filler data', 'c': 'The third coordinate' } - test_ds_generic.metpy.update_attribute('description', descriptions) - assert 'description' not in test_ds_generic['a'].attrs - assert 'description' not in test_ds_generic['b'].attrs - assert test_ds_generic['c'].attrs['description'] == 'The third coordinate' - assert 'description' not in test_ds_generic['d'].attrs - assert 'description' not in test_ds_generic['e'].attrs - assert test_ds_generic['test'].attrs['description'] == 'Filler data' + result = test_ds_generic.metpy.update_attribute('description', descriptions) + + # Test attribute updates + assert 'description' not in result['a'].attrs + assert 'description' not in result['b'].attrs + assert result['c'].attrs['description'] == 'The third coordinate' + assert 'description' not in result['d'].attrs + assert 'description' not in result['e'].attrs + assert result['test'].attrs['description'] == 'Filler data' + + # Test for no side effects + assert 'description' not in test_ds_generic['c'].attrs + assert 'description' not in test_ds_generic['test'].attrs def test_update_attribute_callable(test_ds_generic): @@ -973,10 +1006,17 @@ def test_update_attribute_callable(test_ds_generic): def even_ascii(varname, **kwargs): if ord(varname[0]) % 2 == 0: return 'yes' - test_ds_generic.metpy.update_attribute('even', even_ascii) - assert 'even' not in test_ds_generic['a'].attrs - assert test_ds_generic['b'].attrs['even'] == 'yes' - assert 'even' not in test_ds_generic['c'].attrs - assert test_ds_generic['d'].attrs['even'] == 'yes' - assert 'even' not in test_ds_generic['e'].attrs - assert test_ds_generic['test'].attrs['even'] == 'yes' + result = test_ds_generic.metpy.update_attribute('even', even_ascii) + + # Test attribute updates + assert 'even' not in result['a'].attrs + assert result['b'].attrs['even'] == 'yes' + assert 'even' not in result['c'].attrs + assert result['d'].attrs['even'] == 'yes' + assert 'even' not in result['e'].attrs + assert result['test'].attrs['even'] == 'yes' + + # Test for no side effects + assert 'even' not in test_ds_generic['b'].attrs + assert 'even' not in test_ds_generic['d'].attrs + assert 'even' not in test_ds_generic['test'].attrs