diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 3a087bef8f..44eb9d0dd4 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -1687,40 +1687,67 @@ See also :func:`esmvalcore.preprocessor.extract_named_regions`. ``extract_shape`` -------------------------- +----------------- -Extract a shape or a representative point for this shape from -the data. +Extract a shape or a representative point for this shape from the data. Parameters: * ``shapefile``: path to the shapefile containing the geometry of the - region to be extracted. If the file contains multiple shapes behaviour - depends on the decomposed parameter. This path can be relative to - ``auxiliary_data_dir`` defined in the :ref:`user configuration file`. + region to be extracted. + If the file contains multiple shapes behaviour depends on the + ``decomposed`` parameter. + This path can be relative to ``auxiliary_data_dir`` defined in the + :ref:`user configuration file` or relative to + ``esmvalcore/preprocessor/shapefiles`` (in that priority order). + Alternatively, a string (see "Shapefile name" below) can be given to load + one of the following shapefiles that are shipped with ESMValCore: + + =============== ===================== ========================================== + Shapefile name Description Reference + =============== ===================== ========================================== + ar6 IPCC WG1 reference https://doi.org/10.5281/zenodo.5176260 + regions (v4) used in + Assessment Report 6 + =============== ===================== ========================================== + * ``method``: the method to select the region, selecting either all points - contained by the shape or a single representative point. Choose either - 'contains' or 'representative'. If not a single grid point is contained - in the shape, a representative point will be selected. + contained by the shape or a single representative point. + Choose either `'contains'` or `'representative'`. + If not a single grid point is contained in the shape, a representative + point will be selected. * ``crop``: by default extract_region_ will be used to crop the data to a - minimal rectangular region containing the shape. Set to ``false`` to only - mask data outside the shape. Data on irregular grids will not be cropped. - * ``decomposed``: by default ``false``, in this case the union of all the - regions in the shape file is masked out. If ``true``, the regions in the - shapefiles are masked out separately, generating an auxiliary dimension - for the cube for this. - * ``ids``: by default, ``[]``, in this case all the shapes in the file will - be used. If a list of IDs is provided, only the shapes matching them will - be used. The IDs are assigned from the ``name`` or ``id`` attributes (in - that order of priority) if present in the file or from the reading order - if otherwise not present. So, for example, if a file has both ```name`` - and ``id`` attributes, the ids will be assigned from ``name``. If the file - only has the ``id`` attribute, it will be taken from it and if no ``name`` - nor ``id`` attributes are present, an integer id starting from 1 will be - assigned automatically when reading the shapes. We discourage to rely on - this last behaviour as we can not assure that the reading order will be the - same in different platforms, so we encourage you to modify the file to add - a proper id attribute. If the file has an id attribute with a name that is - not supported, please open an issue so we can add support for it. + minimal rectangular region containing the shape. + Set to ``false`` to only mask data outside the shape. + Data on irregular grids will not be cropped. + * ``decomposed``: by default ``false``; in this case the union of all the + regions in the shapefile is masked out. + If set to ``true``, the regions in the shapefiles are masked out separately + and the output cube will have an additional dimension ``shape_id`` + describing the requested regions. + * ``ids``: Shapes to be read from the shapefile. + Can be given as: + + * :obj:`list`: IDs are assigned from the attributes ``name``, ``NAME``, + ``Name``, ``id``, or ``ID`` (in that priority order; the first one + available is used). + If none of these attributes are available in the shapefile, + assume that the given `ids` correspond to the reading order of the + individual shapes. + So, for example, if a file has both ``name`` and ``id`` attributes, the + ids will be assigned from ``name``. + If the file only has the ``id`` attribute, it will be taken from it and + if no ``name`` nor ``id`` attributes are present, an integer ID starting + from 0 will be assigned automatically when reading the shapes. + We discourage to rely on this last behaviour as we can not assure that + the reading order will be the same on different platforms, so we + encourage you to specify a custom attribute using a :obj:`dict` (see + below) instead. + Note: An empty list is interpreted as ``ids=None`` (see below). + * :obj:`dict`: IDs (dictionary value; :obj:`list` of :obj:`str`) are + assigned from attribute given as dictionary key (:obj:`str`). + Only dictionaries with length 1 are supported. + Example: ``ids={'Acronym': ['GIC', 'WNA']}``. + * `None`: select all available regions from the shapefile. Examples: * Extract the shape of the river Elbe from a shapefile: @@ -1746,6 +1773,19 @@ Examples: - United Kingdom - Taiwan + * Extract European AR6 regions: + + .. code-block:: yaml + + extract_shape: + shapefile: ar6 + method: contains + ids: + Acronym: + - NEU + - WCE + - MED + See also :func:`esmvalcore.preprocessor.extract_shape`. diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py index 38b7426c0c..ed858e0c39 100644 --- a/esmvalcore/_recipe/recipe.py +++ b/esmvalcore/_recipe/recipe.py @@ -42,6 +42,7 @@ PreprocessingTask, PreprocessorFile, ) +from esmvalcore.preprocessor._area import _update_shapefile_path from esmvalcore.preprocessor._other import _group_products from esmvalcore.preprocessor._regrid import ( _spec_to_latlonvals, @@ -631,12 +632,8 @@ def _update_extract_shape(settings, session): if 'extract_shape' in settings: shapefile = settings['extract_shape'].get('shapefile') if shapefile: - if not os.path.exists(shapefile): - shapefile = os.path.join( - session['auxiliary_data_dir'], - shapefile, - ) - settings['extract_shape']['shapefile'] = shapefile + shapefile = _update_shapefile_path(shapefile, session=session) + settings['extract_shape']['shapefile'] = shapefile check.extract_shape(settings['extract_shape']) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 5090fdf209..621a5e47e1 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -3,8 +3,12 @@ Allows for selecting data subsets using certain latitude and longitude bounds; selecting geographical regions; constructing area averages; etc. """ +from __future__ import annotations + import logging import warnings +from pathlib import Path +from typing import TYPE_CHECKING, Optional import fiona import iris @@ -12,6 +16,8 @@ import shapely import shapely.ops from dask import array as da +from iris.coords import AuxCoord +from iris.cube import Cube, CubeList from iris.exceptions import CoordinateNotFoundError from ._shared import ( @@ -26,9 +32,12 @@ remove_supplementary_variables, ) +if TYPE_CHECKING: + from esmvalcore.config import Session + logger = logging.getLogger(__name__) -SHAPE_ID_KEYS = ('name', 'NAME', 'Name', 'id', 'ID') +SHAPE_ID_KEYS: tuple[str, ...] = ('name', 'NAME', 'Name', 'id', 'ID') def extract_region(cube, start_longitude, end_longitude, start_latitude, @@ -365,13 +374,15 @@ def extract_named_regions(cube, regions): return cube -def _crop_cube(cube, - start_longitude, - start_latitude, - end_longitude, - end_latitude, - cmor_coords=True): - """Crop cubes on a cartesian grid.""" +def _crop_cube( + cube: Cube, + start_longitude: float, + start_latitude: float, + end_longitude: float, + end_latitude: float, + cmor_coords: bool = True, +) -> Cube: + """Crop cubes on a regular grid.""" lon_coord = cube.coord(axis='X') lat_coord = cube.coord(axis='Y') if lon_coord.ndim == 1 and lat_coord.ndim == 1: @@ -405,19 +416,27 @@ def _crop_cube(cube, return cube -def _select_representative_point(shape, lon, lat): - """Select a representative point for `shape` from `lon` and `lat`.""" +def _select_representative_point( + shape, + lon: np.ndarray, + lat: np.ndarray, +) -> np.ndarray: + """Get mask to select a representative point.""" representative_point = shape.representative_point() points = shapely.geometry.MultiPoint( np.stack((np.ravel(lon), np.ravel(lat)), axis=1)) nearest_point = shapely.ops.nearest_points(points, representative_point)[0] nearest_lon, nearest_lat = nearest_point.coords[0] - select = (lon == nearest_lon) & (lat == nearest_lat) - return select + mask = (lon == nearest_lon) & (lat == nearest_lat) + return mask -def _correct_coords_from_shapefile(cube, cmor_coords, pad_north_pole, - pad_hawaii): +def _correct_coords_from_shapefile( + cube: Cube, + cmor_coords: bool, + pad_north_pole: bool, + pad_hawaii: bool, +) -> tuple[np.ndarray, np.ndarray]: """Get correct lat and lon from shapefile.""" lon = cube.coord(axis='X').points lat = cube.coord(axis='Y').points @@ -440,108 +459,157 @@ def _correct_coords_from_shapefile(cube, cmor_coords, pad_north_pole, return lon, lat -def _get_masks_from_geometries(geometries, - lon, - lat, - method='contains', - decomposed=False, - ids=None): - - if method not in {'contains', 'representative'}: - raise ValueError( - "Invalid value for `method`. Choose from 'contains', ", - "'representative'.") +def _process_ids(geometries, ids: list | dict | None) -> tuple: + """Read requested IDs and ID keys.""" + # If ids is a dict, it needs to have length 1 and all geometries needs to + # have the requested attribute key + if isinstance(ids, dict): + if len(ids) != 1: + raise ValueError( + f"If `ids` is given as dict, it needs exactly one entry, got " + f"{ids}" + ) + key = list(ids.keys())[0] + for geometry in geometries: + if key not in geometry['properties']: + raise ValueError( + f"Geometry {dict(geometry['properties'])} does not have " + f"requested attribute {key}" + ) + id_keys: tuple[str, ...] = (key, ) + ids = ids[key] + + # Otherwise, use SHAPE_ID_KEYS to get ID + else: + id_keys = SHAPE_ID_KEYS - selections = dict() - if ids: + # IDs should be strings or None + if not ids: + ids = None + if ids is not None: ids = [str(id_) for id_ in ids] - for i, item in enumerate(geometries): - for id_prop in SHAPE_ID_KEYS: - if id_prop in item['properties']: - id_ = str(item['properties'][id_prop]) + + return (id_keys, ids) + + +def _get_requested_geometries( + geometries, + ids: list | dict | None, + shapefile: Path, +) -> dict[str, dict]: + """Return requested geometries.""" + (id_keys, ids) = _process_ids(geometries, ids) + + # Iterate through all geometries and select matching elements + requested_geometries = {} + for (reading_order, geometry) in enumerate(geometries): + for key in id_keys: + if key in geometry['properties']: + geometry_id = str(geometry['properties'][key]) break + + # If none of the attributes are available in the geometry, use reading + # order as last resort else: - id_ = str(i) - logger.debug('Shape "%s" found', id_) - if ids and id_ not in ids: - continue - selections[id_] = _get_shape(lon, lat, method, item) - - if ids: - missing = set(ids) - set(selections.keys()) - if missing: - raise ValueError(f'Shapes {" ".join(missing)!r} not found') + geometry_id = str(reading_order) - if not decomposed and len(selections) > 1: - return _merge_shapes(selections, lat.shape) + logger.debug("Found shape '%s'", geometry_id) - return selections + # Select geometry if its ID is requested or all IDs are requested + # (i.e., ids=None) + if ids is None or geometry_id in ids: + requested_geometries[geometry_id] = geometry + # Check if all requested IDs have been found + if ids is not None: + missing = set(ids) - set(requested_geometries.keys()) + if missing: + raise ValueError( + f"Requested shapes {missing} not found in shapefile " + f"{shapefile}" + ) -def _geometry_matches_ids(geometry: dict, ids: list): - """Returns True if `geometry` matches one of the `ids`.""" - props = geometry['properties'] + return requested_geometries - geom_id = [props.get(key, None) for key in SHAPE_ID_KEYS] - geom_id = [key for key in geom_id if key is not None] - if not geom_id: - raise KeyError(f'{props} dict has no `name` or `id` key') +def _get_masks_from_geometries( + geometries: dict[str, dict], + lon: np.ndarray, + lat: np.ndarray, + method: str = 'contains', + decomposed: bool = False, +) -> dict[str, np.ndarray]: + """Get cube masks from requested regions.""" + if method not in {'contains', 'representative'}: + raise ValueError( + "Invalid value for `method`. Choose from 'contains', ", + "'representative'.") + + masks = {} + for (id_, geometry) in geometries.items(): + masks[id_] = _get_single_mask(lon, lat, method, geometry) - geom_id = geom_id[0] + if not decomposed and len(masks) > 1: + return _merge_masks(masks, lat.shape) - return geom_id in ids + return masks -def _get_bounds(geometries, ids=None): - """Get bounds from the subset of geometries defined by `ids`. +def _get_bounds( + geometries: dict[str, dict], +) -> tuple[float, float, float, float]: + """Get bounds from given geometries. Parameters ---------- - geometries : fiona.Collection + geometries: fiona.collection.Collection Fiona collection of shapes (geometries). - ids : tuple of str, optional - List of ids to select from geometry collection. If None, - return global bounds (``geometries.bounds``) Returns ------- lat_min, lon_min, lat_max, lon_max - Returns coordinates deliminating bounding box for shape ids. - """ - if not ids: - return geometries.bounds - - subset = [geom for geom in geometries if _geometry_matches_ids(geom, ids)] + Coordinates deliminating bounding box for shape ids. - all_bounds = np.vstack([fiona.bounds(geom) for geom in subset]) + """ + all_bounds = np.vstack( + [fiona.bounds(geom) for geom in geometries.values()] + ) lon_max, lat_max = all_bounds[:, 2:].max(axis=0) lon_min, lat_min = all_bounds[:, :2].min(axis=0) return lon_min, lat_min, lon_max, lat_max -def _get_shape(lon, lat, method, item): - shape = shapely.geometry.shape(item['geometry']) +def _get_single_mask( + lon: np.ndarray, + lat: np.ndarray, + method: str, + geometry: dict, +) -> np.ndarray: + """Get single mask from one region.""" + shape = shapely.geometry.shape(geometry['geometry']) if method == 'contains': - select = shapely.vectorized.contains(shape, lon, lat) - if method == 'representative' or not select.any(): - select = _select_representative_point(shape, lon, lat) - return select + mask = shapely.vectorized.contains(shape, lon, lat) + if method == 'representative' or not mask.any(): + mask = _select_representative_point(shape, lon, lat) + return mask -def _merge_shapes(selections, shape): - selection = np.zeros(shape, dtype=bool) - for select in selections.values(): - selection |= select - return {0: selection} +def _merge_masks( + masks: dict[str, np.ndarray], + shape: tuple, +) -> dict[str, np.ndarray]: + """Merge masks into one.""" + merged_mask = np.zeros(shape, dtype=bool) + for mask in masks.values(): + merged_mask |= mask + return {'0': merged_mask} -def fix_coordinate_ordering(cube): - """Transpose the dimensions. +def fix_coordinate_ordering(cube: Cube) -> Cube: + """Transpose the cube dimensions. - This is done such that the order of dimension is - in standard order, ie: + This is done such that the order of dimension is in standard order, i.e.: [time] [shape_id] [other_coordinates] latitude longitude @@ -550,12 +618,13 @@ def fix_coordinate_ordering(cube): Parameters ---------- cube: iris.cube.Cube - input cube. + Input cube. Returns ------- iris.cube.Cube Cube with dimensions transposed to standard order + """ try: time_dim = cube.coord_dims('time') @@ -570,21 +639,63 @@ def fix_coordinate_ordering(cube): for dim in [time_dim, shape_dim]: for i in dim: other.remove(i) - other = tuple(other) + other_dims = tuple(other) - order = time_dim + shape_dim + other + order = time_dim + shape_dim + other_dims cube.transpose(new_order=order) return cube -def extract_shape(cube, - shapefile, - method='contains', - crop=True, - decomposed=False, - ids=None): - """Extract a region defined by a shapefile. +def _update_shapefile_path( + shapefile: str | Path, + session: Optional[Session] = None, +) -> Path: + """Update path to shapefile.""" + shapefile = str(shapefile) + shapefile_path = Path(shapefile) + + # Try absolute path + logger.debug("extract_shape: Looking for shapefile %s", shapefile_path) + if shapefile_path.exists(): + return shapefile_path + + # Try path relative to auxiliary_data_dir if session is given + if session is not None: + shapefile_path = session['auxiliary_data_dir'] / shapefile + logger.debug("extract_shape: Looking for shapefile %s", shapefile_path) + if shapefile_path.exists(): + return shapefile_path + + # Try path relative to esmvalcore/preprocessor/shapefiles/ + shapefile_path = Path(__file__).parent / 'shapefiles' / shapefile + logger.debug("extract_shape: Looking for shapefile %s", shapefile_path) + if shapefile_path.exists(): + return shapefile_path + + # As final resort, add suffix '.shp' and try path relative to + # esmvalcore/preprocessor/shapefiles/ again + # Note: this will find "special" shapefiles like 'ar6' + shapefile_path = ( + Path(__file__).parent / 'shapefiles' / f"{shapefile.lower()}.shp" + ) + if shapefile_path.exists(): + return shapefile_path + + # If no valid shapefile has been found, return original input (an error + # will be raised at a later stage) + return Path(shapefile) + + +def extract_shape( + cube: Cube, + shapefile: str | Path, + method: str = 'contains', + crop: bool = True, + decomposed: bool = False, + ids: Optional[list | dict] = None, +) -> Cube: + """Extract a region defined by a shapefile using masking. Note that this function does not work for shapes crossing the prime meridian or poles. @@ -592,25 +703,42 @@ def extract_shape(cube, Parameters ---------- cube: iris.cube.Cube - input cube. - shapefile: str - A shapefile defining the region(s) to extract. + Input cube. + shapefile: str or Path + A shapefile defining the region(s) to extract. Also accepts the + following strings to load special shapefiles: + + * ``'ar6'``: IPCC WG1 reference regions (v4) used in Assessment Report + 6 (https://doi.org/10.5281/zenodo.5176260). Should be used in + combination with a :obj:`dict` for the argument `ids`, e.g., + ``ids={'Acronym': ['GIC', 'WNA']}``. method: str, optional Select all points contained by the shape or select a single - representative point. Choose either 'contains' or 'representative'. - If 'contains' is used, but not a single grid point is contained by the - shape, a representative point will selected. + representative point. Choose either `'contains'` or `'representative'`. + If `'contains'` is used, but not a single grid point is contained by + the shape, a representative point will be selected. crop: bool, optional - Crop the resulting cube using `extract_region()`. Note that data on - irregular grids will not be cropped. + In addition to masking, crop the resulting cube using + :func:`~esmvalcore.preprocessor.extract_region`. Data on irregular + grids will not be cropped. decomposed: bool, optional - Whether or not to retain the sub shapes of the shapefile in the output. - If this is set to True, the output cube has a dimension for the sub - shapes. - ids: list(str), optional - List of shapes to be read from the file. The ids are assigned from - the attributes 'name' or 'id' (in that priority order) if present in - the file or correspond to the reading order if not. + If set to `True`, the output cube will have an additional dimension + `shape_id` describing the requested regions. + ids: list or dict or None, optional + Shapes to be read from the shapefile. Can be given as: + + * :obj:`list`: IDs are assigned from the attributes `name`, `NAME`, + `Name`, `id`, or `ID` (in that priority order; the first one + available is used). If none of these attributes are available in the + shapefile, assume that the given `ids` correspond to the reading + order of the individual shapes, e.g., ``ids=[0, 2]`` corresponds to + the first and third shape read from the shapefile. Note: An empty + list is interpreted as `ids=None`. + * :obj:`dict`: IDs (dictionary value; :obj:`list` of :obj:`str`) are + assigned from attribute given as dictionary key (:obj:`str`). Only + dictionaries with length 1 are supported. + Example: ``ids={'Acronym': ['GIC', 'WNA']}`` for ``shapefile='ar6'``. + * `None`: select all available shapes from the shapefile. Returns ------- @@ -619,13 +747,14 @@ def extract_shape(cube, See Also -------- - extract_region : Extract a region from a cube. + extract_region: Extract a region from a cube. + """ + shapefile = _update_shapefile_path(shapefile) with fiona.open(shapefile) as geometries: - # get parameters specific to the shapefile (NE used case - # eg longitudes [-180, 180] or latitude missing - # or overflowing edges) + # Get parameters specific to the shapefile (NE used case e.g. + # longitudes [-180, 180] or latitude missing or overflowing edges) cmor_coords = True pad_north_pole = False pad_hawaii = False @@ -636,40 +765,60 @@ def extract_shape(cube, if geometries.bounds[0] > -180. and geometries.bounds[0] < 179.: pad_hawaii = True + requested_geometries = _get_requested_geometries( + geometries, ids, shapefile + ) + + # Crop cube if desired if crop: lon_min, lat_min, lon_max, lat_max = _get_bounds( - geometries=geometries, - ids=ids, + requested_geometries + ) + cube = _crop_cube( + cube, + start_longitude=lon_min, + start_latitude=lat_min, + end_longitude=lon_max, + end_latitude=lat_max, + cmor_coords=cmor_coords, ) - cube = _crop_cube(cube, - start_longitude=lon_min, - start_latitude=lat_min, - end_longitude=lon_max, - end_latitude=lat_max, - cmor_coords=cmor_coords) - lon, lat = _correct_coords_from_shapefile(cube, cmor_coords, - pad_north_pole, pad_hawaii) + lon, lat = _correct_coords_from_shapefile( + cube, + cmor_coords, + pad_north_pole, + pad_hawaii, + ) + + masks = _get_masks_from_geometries( + requested_geometries, + lon, + lat, + method=method, + decomposed=decomposed, + ) - selections = _get_masks_from_geometries(geometries, - lon, - lat, - method=method, - decomposed=decomposed, - ids=ids) + # Mask input cube based on requested regions + result = _mask_cube(cube, masks) - return _mask_cube(cube, selections) + # Remove dummy scalar coordinate if final cube is not decomposed + if not decomposed: + result.remove_coord('shape_id') + return result -def _mask_cube(cube, selections): - cubelist = iris.cube.CubeList() - for id_, select in selections.items(): + +def _mask_cube(cube: Cube, masks: dict[str, np.ndarray]) -> Cube: + """Mask input cube.""" + cubelist = CubeList() + for id_, mask in masks.items(): _cube = cube.copy() remove_supplementary_variables(_cube) _cube.add_aux_coord( - iris.coords.AuxCoord(id_, units='no_unit', long_name="shape_id")) - select = da.broadcast_to(select, _cube.shape) - _cube.data = da.ma.masked_where(~select, _cube.core_data()) + AuxCoord(id_, units='no_unit', long_name='shape_id') + ) + mask = da.broadcast_to(mask, _cube.shape) + _cube.data = da.ma.masked_where(~mask, _cube.core_data()) cubelist.append(_cube) result = fix_coordinate_ordering(cubelist.merge_cube()) if cube.cell_measures(): diff --git a/esmvalcore/preprocessor/shapefiles/ar6.LICENSE b/esmvalcore/preprocessor/shapefiles/ar6.LICENSE new file mode 100644 index 0000000000..748ce6b3c8 --- /dev/null +++ b/esmvalcore/preprocessor/shapefiles/ar6.LICENSE @@ -0,0 +1,11 @@ +This data set has been downloaded from + +Iturbide, Maialen, Fernández, Jesús, Gutiérrez, José Manuel, Bedia, Joaquín, +Cimadevilla, Ezequiel, Díez-Sierra, Javier, Manzanas, Rodrigo, Casanueva, Ana, +Baño-Medina, Jorge, Milovac, Josipa, Herrera, Sixto, Cofiño, Antonio S., San +Martín, Daniel, García-Díez, Markel, Hauser, Mathias, Huard, David, & Yelekci, +Özge. (2021). Repository supporting the implementation of FAIR principles in +the IPCC-WGI Atlas (v2.0). Zenodo. https://doi.org/10.5281/zenodo.5176260 + +and is available under a Creative Commons Attribution license, CC BY 4.0 +(https://creativecommons.org/licenses/by/4.0/legalcode). diff --git a/esmvalcore/preprocessor/shapefiles/ar6.dbf b/esmvalcore/preprocessor/shapefiles/ar6.dbf new file mode 100644 index 0000000000..4280805d0c Binary files /dev/null and b/esmvalcore/preprocessor/shapefiles/ar6.dbf differ diff --git a/esmvalcore/preprocessor/shapefiles/ar6.prj b/esmvalcore/preprocessor/shapefiles/ar6.prj new file mode 100644 index 0000000000..a30c00a55d --- /dev/null +++ b/esmvalcore/preprocessor/shapefiles/ar6.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] \ No newline at end of file diff --git a/esmvalcore/preprocessor/shapefiles/ar6.shp b/esmvalcore/preprocessor/shapefiles/ar6.shp new file mode 100644 index 0000000000..b589a79679 Binary files /dev/null and b/esmvalcore/preprocessor/shapefiles/ar6.shp differ diff --git a/esmvalcore/preprocessor/shapefiles/ar6.shx b/esmvalcore/preprocessor/shapefiles/ar6.shx new file mode 100644 index 0000000000..5639e8173e Binary files /dev/null and b/esmvalcore/preprocessor/shapefiles/ar6.shx differ diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py index b63a4bd7f7..ba53388ddf 100644 --- a/tests/integration/recipe/test_recipe.py +++ b/tests/integration/recipe/test_recipe.py @@ -1521,7 +1521,7 @@ def test_extract_shape(tmp_path, patched_datafinder, session): task = recipe.tasks.pop() assert len(task.products) == 1 product = task.products.pop() - assert product.settings['extract_shape']['shapefile'] == str(shapefile) + assert product.settings['extract_shape']['shapefile'] == shapefile @pytest.mark.parametrize('invalid_arg', @@ -1555,8 +1555,7 @@ def test_extract_shape_raises(tmp_path, patched_datafinder, session, end_year: 2005 ensemble: r1i1p1 additional_datasets: - - - dataset: GFDL-CM3 + - dataset: GFDL-CM3 scripts: null """) diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index 809a07ae5d..cabaadf396 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -8,6 +8,7 @@ import pytest from cf_units import Unit from iris.cube import Cube +from iris.fileformats.pp import EARTH_RADIUS from numpy.testing._private.utils import assert_raises from shapely.geometry import Polygon, mapping @@ -15,6 +16,8 @@ import tests from esmvalcore.preprocessor._area import ( _crop_cube, + _get_requested_geometries, + _update_shapefile_path, area_statistics, extract_named_regions, extract_region, @@ -27,8 +30,7 @@ class Test(tests.Test): """Test class for the :func:`esmvalcore.preprocessor._area_pp` module.""" def setUp(self): """Prepare tests.""" - self.coord_sys = iris.coord_systems.GeogCS( - iris.fileformats.pp.EARTH_RADIUS) + self.coord_sys = iris.coord_systems.GeogCS(EARTH_RADIUS) data = np.ones((5, 5), dtype=np.float32) lons = iris.coords.DimCoord( [i + .5 for i in range(5)], @@ -318,11 +320,11 @@ def create_irregular_grid_cube(data, lons, lats): }, { 'region': (0, 0, -100, 0), - 'raises': "Invalid start_latitude: -100." + 'raises': "Invalid start_latitude: -100" }, { 'region': (0, 0, 0, 100), - 'raises': "Invalid end_latitude: -100." + 'raises': "Invalid end_latitude: 100" }, ] @@ -368,7 +370,7 @@ def test_extract_region_irregular(irregular_extract_region_cube, case): np.testing.assert_array_equal(cube.data[i].mask, case['mask']) np.testing.assert_array_equal(cube.data.data, case['data']) else: - with pytest.raises(ValueError) as exc: + with pytest.raises(ValueError, match=case['raises']): extract_region( irregular_extract_region_cube, start_longitude=start_lon, @@ -376,7 +378,6 @@ def test_extract_region_irregular(irregular_extract_region_cube, case): start_latitude=start_lat, end_latitude=end_lat, ) - assert exc.value == case['raises'] def create_rotated_grid_cube(data): @@ -406,7 +407,7 @@ def create_rotated_grid_cube(data): units='degrees', coord_system=coord_sys_rotated) - coord_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) + coord_sys = iris.coord_systems.GeogCS(EARTH_RADIUS) glon, glat = np.meshgrid(grid_lons, grid_lats) lons, lats = iris.analysis.cartography.unrotate_pole( np.deg2rad(glon), np.deg2rad(glat), grid_north_pole_longitude, @@ -503,7 +504,7 @@ def test_area_statistics_rotated(case): @pytest.fixture def make_testcube(): """Create a test cube on a Cartesian grid.""" - coord_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) + coord_sys = iris.coord_systems.GeogCS(EARTH_RADIUS) data = np.ones((5, 5)) lons = iris.coords.DimCoord( [i + .5 for i in range(5)], @@ -675,9 +676,7 @@ def test_crop_cube_with_ne_file(ne_ocean_shapefile): @pytest.mark.parametrize('crop', [True, False]) -@pytest.mark.parametrize('ids', [None, [ - 0, -]]) +@pytest.mark.parametrize('ids', [None, [0]]) def test_extract_shape(make_testcube, square_shape, tmp_path, crop, ids): """Test for extracting a region with shapefile.""" expected = square_shape @@ -698,11 +697,9 @@ def test_extract_shape(make_testcube, square_shape, tmp_path, crop, ids): def test_extract_shape_natural_earth(make_testcube, ne_ocean_shapefile): """Test for extracting a shape from NE file.""" expected = np.ones((5, 5)) - preproc_path = Path(esmvalcore.preprocessor.__file__).parent - shp_file = preproc_path / "ne_masks" / "ne_50m_ocean.shp" result = extract_shape( make_testcube, - shp_file, + ne_ocean_shapefile, crop=False, ) np.testing.assert_array_equal(result.data.data, expected) @@ -711,8 +708,6 @@ def test_extract_shape_natural_earth(make_testcube, ne_ocean_shapefile): def test_extract_shape_fx(make_testcube, ne_ocean_shapefile): """Test for extracting a shape from NE file.""" expected = np.ones((5, 5)) - preproc_path = Path(esmvalcore.preprocessor.__file__).parent - shp_file = preproc_path / "ne_masks" / "ne_50m_ocean.shp" cube = make_testcube measure = iris.coords.CellMeasure(cube.data, standard_name='cell_area', @@ -728,7 +723,7 @@ def test_extract_shape_fx(make_testcube, ne_ocean_shapefile): cube.add_ancillary_variable(ancillary_var, (0, 1)) result = extract_shape( cube, - shp_file, + ne_ocean_shapefile, crop=False, ) np.testing.assert_array_equal(result.data.data, expected) @@ -962,11 +957,208 @@ def test_extract_shape_irregular(irreg_extract_shape_cube, tmp_path, method): np.testing.assert_array_equal(cube.data[i].mask, mask) -def test_extract_shape_wrong_method_raises(): - with pytest.raises(ValueError) as exc: - extract_shape(iris.cube.Cube([]), 'test.shp', method='wrong') - assert exc.value == ("Invalid value for `method`. Choose from " - "'contains', 'representative'.") +def test_extract_shape_wrong_method_raises(make_testcube, ne_ocean_shapefile): + msg = "Invalid value for `method`" + with pytest.raises(ValueError, match=msg): + extract_shape(make_testcube, ne_ocean_shapefile, method='wrong') + + +@pytest.mark.parametrize('ids', [None, []]) +@pytest.mark.parametrize('crop', [True, False]) +@pytest.mark.parametrize('decomposed', [True, False]) +def test_extract_shape_ar6_all_region(make_testcube, ids, crop, decomposed): + """Test for extracting all AR6 regions with shapefile.""" + cube = extract_shape( + make_testcube, + 'AR6', + method='contains', + crop=crop, + decomposed=decomposed, + ids=ids, + ) + + if decomposed: + assert cube.shape == (58, 5, 5) + assert cube.coords('shape_id') + assert cube.coord_dims('shape_id') == (0, ) + assert np.ma.is_masked(cube.data) + else: + assert cube.shape == (5, 5) + assert not cube.coords('shape_id') + assert not np.ma.is_masked(cube.data) + assert cube.coord('latitude') == make_testcube.coord('latitude') + assert cube.coord('longitude') == make_testcube.coord('longitude') + + +EAO_MASK = np.array([ + [0, 0, 0, 0, 0], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], +], dtype=bool) + +WAF_MASK = np.array([ + [1, 1, 1, 1, 1], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], +], dtype=bool) + + +@pytest.mark.parametrize( + 'ids', + [ + {'Acronym': ['EAO']}, + ['Equatorial.Atlantic-Ocean'], + ], +) +@pytest.mark.parametrize('crop', [True, False]) +@pytest.mark.parametrize('decomposed', [True, False]) +def test_extract_shape_ar6_one_region(make_testcube, ids, crop, decomposed): + """Test for extracting 1 AR6 regions with shapefile.""" + # Adapt lat slightly to test cropping + lat = make_testcube.coord('latitude') + lat.points = [-45., -40., 2.5, 3.5, 4.5] + lat.bounds = [[-50., -41.], [-41., 2.], [2., 3.], [3., 4.], [4., 5.]] + + cube = extract_shape( + make_testcube, + 'ar6', + method='contains', + crop=crop, + decomposed=decomposed, + ids=ids, + ) + + lat = cube.coord('latitude') + lon = cube.coord('longitude') + if decomposed: + if crop: + assert cube.shape == (3, 5) + np.testing.assert_allclose(lat.points, [2.5, 3.5, 4.5]) + else: + assert cube.shape == (5, 5) + assert lat == make_testcube.coord('latitude') + assert lon == make_testcube.coord('longitude') + assert cube.coords('shape_id') + assert cube.coord_dims('shape_id') == () + else: # not decomposed + if crop: + assert cube.shape == (3, 5) + np.testing.assert_allclose(lat.points, [2.5, 3.5, 4.5]) + else: + assert cube.shape == (5, 5) + assert lat == make_testcube.coord('latitude') + assert lon == make_testcube.coord('longitude') + assert not cube.coords('shape_id') + assert np.ma.is_masked(cube.data) + + +@pytest.mark.parametrize( + 'ids', + [ + {'Acronym': ['EAO', 'WAF']}, + ['Equatorial.Atlantic-Ocean', 'Western-Africa'], + ], +) +@pytest.mark.parametrize('crop', [True, False]) +@pytest.mark.parametrize('decomposed', [True, False]) +def test_extract_shape_ar6_two_regions(make_testcube, ids, crop, decomposed): + """Test for extracting 2 AR6 regions with shapefile.""" + cube = extract_shape( + make_testcube, + 'AR6', + method='contains', + crop=crop, + decomposed=decomposed, + ids=ids, + ) + + if decomposed: + assert cube.shape == (2, 5, 5) + mask = np.ma.getmaskarray(cube.data) + np.testing.assert_array_equal(mask[0], EAO_MASK) + np.testing.assert_array_equal(mask[1], WAF_MASK) + assert cube.coords('shape_id') + assert cube.coord_dims('shape_id') == (0, ) + else: + assert cube.shape == (5, 5) + assert not np.ma.is_masked(cube.data) + assert not cube.coords('shape_id') + assert cube.coord('latitude') == make_testcube.coord('latitude') + assert cube.coord('longitude') == make_testcube.coord('longitude') + + +@pytest.mark.parametrize('ids', [{}, {'a': [1, 2], 'b': [1, 2]}]) +def test_extract_shape_invalid_dict(make_testcube, ids): + """Test for extract_shape with invalid ids.""" + msg = "If `ids` is given as dict, it needs exactly one entry" + with pytest.raises(ValueError, match=msg): + extract_shape(make_testcube, 'ar6', ids=ids) + + +@pytest.fixture +def ar6_shapefile(): + """Path to AR6 shapefile.""" + shapefile = ( + Path(esmvalcore.preprocessor.__file__).parent / 'shapefiles' / + 'ar6.shp' + ) + return shapefile + + +def test_get_requested_geometries_invalid_ids(ar6_shapefile): + """Test ``_get_requested_geometries`` with invalid ids.""" + msg = "does not have requested attribute wrong_attr" + with fiona.open(ar6_shapefile) as geometries: + with pytest.raises(ValueError, match=msg): + _get_requested_geometries( + geometries, {'wrong_attr': [1, 2]}, Path('shape.shp') + ) + + +@pytest.mark.parametrize('session', [{}, None]) +def test_update_shapefile_path_abs(session, tmp_path): + """ Test ``update_shapefile_path``.""" + if session is not None: + session['auxiliary_data_dir'] = tmp_path + shapefile = tmp_path / 'my_custom_shapefile.shp' + shapefile.write_text("") # create empty file + + # Test with Path and str object + for shapefile_in in (shapefile, str(shapefile)): + shapefile_out = _update_shapefile_path(shapefile, session=session) + assert isinstance(shapefile_out, Path) + assert shapefile_out == shapefile + + +@pytest.mark.parametrize( + 'shapefile', ['aux_dir/ar6.shp', 'ar6.shp', 'ar6', 'AR6', 'aR6'] +) +@pytest.mark.parametrize('session', [{}, None]) +def test_update_shapefile_path_rel( + shapefile, session, ar6_shapefile, tmp_path +): + """ Test ``update_shapefile_path``.""" + if session is not None: + session['auxiliary_data_dir'] = tmp_path + (tmp_path / 'aux_dir').mkdir(parents=True, exist_ok=True) + aux_dir_shapefile = tmp_path / 'aux_dir' / 'ar6.shp' + aux_dir_shapefile.write_text("") # create empty file + + # Test with Path and str object + for shapefile_in in (Path(shapefile), shapefile): + shapefile_out = _update_shapefile_path(shapefile, session=session) + assert isinstance(shapefile_out, Path) + + if 'aux_dir' in str(shapefile_in) and session is None: + assert shapefile_out == Path(shapefile) + elif 'aux_dir' in str(shapefile): + assert shapefile_out == tmp_path / shapefile + else: + assert shapefile_out == ar6_shapefile if __name__ == '__main__': diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py index a8651daf00..a437221a0e 100644 --- a/tests/unit/recipe/test_recipe.py +++ b/tests/unit/recipe/test_recipe.py @@ -6,6 +6,7 @@ import pyesgf.search.results import pytest +import esmvalcore import esmvalcore._recipe.recipe as _recipe import esmvalcore.config import esmvalcore.experimental.recipe_output @@ -635,3 +636,39 @@ def test_extract_preprocessor_order(): order = _recipe._extract_preprocessor_order(profile) assert any(order[i:i + 2] == ('regrid', 'derive') for i in range(len(order) - 1)) + + +def test_update_extract_shape_abs_shapefile(session, tmp_path): + """Test ``_update_extract_shape``.""" + session['auxiliary_data_dir'] = '/aux/dir' + shapefile = tmp_path / 'my_custom_shapefile.shp' + shapefile.write_text("") # create empty file + settings = {'extract_shape': {'shapefile': str(shapefile)}} + + _recipe._update_extract_shape(settings, session) + + assert isinstance(settings['extract_shape']['shapefile'], Path) + assert settings['extract_shape']['shapefile'] == shapefile + + +@pytest.mark.parametrize( + 'shapefile', ['aux_dir/ar6.shp', 'ar6.shp', 'ar6', 'AR6', 'aR6'] +) +def test_update_extract_shape_rel_shapefile(shapefile, session, tmp_path): + """Test ``_update_extract_shape``.""" + session['auxiliary_data_dir'] = tmp_path + (tmp_path / 'aux_dir').mkdir(parents=True, exist_ok=True) + aux_dir_shapefile = tmp_path / 'aux_dir' / 'ar6.shp' + aux_dir_shapefile.write_text("") # create empty file + settings = {'extract_shape': {'shapefile': shapefile}} + + _recipe._update_extract_shape(settings, session) + + if 'aux_dir' in shapefile: + assert settings['extract_shape']['shapefile'] == tmp_path / shapefile + else: + ar6_file = ( + Path(esmvalcore.preprocessor.__file__).parent / 'shapefiles' / + 'ar6.shp' + ) + assert settings['extract_shape']['shapefile'] == ar6_file