Skip to content

Commit fbdd5ec

Browse files
pp-molbdreyer
andauthored
Ugrid cf load (#3684)
* Initial insertion of Ugrid into CF loading. * Make gridded a core dependency. * Bugs and test fixes for CFReader. * Latest iris-test-data. * Review comments, renames + comments. * Tiny bugfix. * Fix code for nonstandard dimension ordering Co-Authored-By: lbdreyer <[email protected]> * Extend fix for nonstandard dims to edges. * Check 'boundary_node_connectivity' as a link property. Co-authored-by: lbdreyer <[email protected]>
1 parent 7600b0f commit fbdd5ec

File tree

11 files changed

+429
-148
lines changed

11 files changed

+429
-148
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ git:
2828

2929
install:
3030
- >
31-
export IRIS_TEST_DATA_REF="1696ac3a823a06b95f430670f285ee97671d2cf2";
31+
export IRIS_TEST_DATA_REF="672dbb46c986038fa5d06a3d8aad691fd1951e07";
3232
export IRIS_TEST_DATA_SUFFIX=$(echo "${IRIS_TEST_DATA_REF}" | sed "s/^v//");
3333
3434
# Install miniconda

lib/iris/fileformats/cf.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@
1717
from abc import ABCMeta, abstractmethod
1818

1919
from collections.abc import Iterable, MutableMapping
20-
import os
2120
import re
2221
import warnings
2322

24-
import netCDF4
2523
import numpy as np
2624
import numpy.ma as ma
2725

@@ -1008,8 +1006,12 @@ class CFReader:
10081006
10091007
"""
10101008

1011-
def __init__(self, filename, warn=False, monotonic=False):
1012-
self._filename = os.path.expanduser(filename)
1009+
def __init__(
1010+
self, dataset, warn=False, monotonic=False, exclude_var_names=None
1011+
):
1012+
self._dataset = dataset
1013+
self._filename = dataset.filepath()
1014+
10131015
# All CF variable types EXCEPT for the "special cases" of
10141016
# CFDataVariable, CFCoordinateVariable and _CFFormulaTermsVariable.
10151017
self._variable_types = (
@@ -1025,8 +1027,6 @@ def __init__(self, filename, warn=False, monotonic=False):
10251027
#: Collection of CF-netCDF variables associated with this netCDF file
10261028
self.cf_group = CFGroup()
10271029

1028-
self._dataset = netCDF4.Dataset(self._filename, mode="r")
1029-
10301030
# Issue load optimisation warning.
10311031
if warn and self._dataset.file_format in [
10321032
"NETCDF3_CLASSIC",
@@ -1039,6 +1039,7 @@ def __init__(self, filename, warn=False, monotonic=False):
10391039

10401040
self._check_monotonic = monotonic
10411041

1042+
self.exclude_var_names = exclude_var_names or []
10421043
self._translate()
10431044
self._build_cf_groups()
10441045
self._reset()
@@ -1049,26 +1050,30 @@ def __repr__(self):
10491050
def _translate(self):
10501051
"""Classify the netCDF variables into CF-netCDF variables."""
10511052

1052-
netcdf_variable_names = list(self._dataset.variables.keys())
1053+
netcdf_variable_names = [
1054+
var_name
1055+
for var_name in self._dataset.variables.keys()
1056+
if var_name not in self.exclude_var_names
1057+
]
10531058

10541059
# Identify all CF coordinate variables first. This must be done
10551060
# first as, by CF convention, the definition of a CF auxiliary
10561061
# coordinate variable may include a scalar CF coordinate variable,
10571062
# whereas we want these two types of variables to be mutually exclusive.
10581063
coords = CFCoordinateVariable.identify(
1059-
self._dataset.variables, monotonic=self._check_monotonic
1064+
self._dataset.variables,
1065+
ignore=self.exclude_var_names,
1066+
monotonic=self._check_monotonic,
10601067
)
10611068
self.cf_group.update(coords)
10621069
coordinate_names = list(self.cf_group.coordinates.keys())
10631070

10641071
# Identify all CF variables EXCEPT for the "special cases".
10651072
for variable_type in self._variable_types:
10661073
# Prevent grid mapping variables being mis-identified as CF coordinate variables.
1067-
ignore = (
1068-
None
1069-
if issubclass(variable_type, CFGridMappingVariable)
1070-
else coordinate_names
1071-
)
1074+
ignore = self.exclude_var_names
1075+
if not issubclass(variable_type, CFGridMappingVariable):
1076+
ignore += coordinate_names
10721077
self.cf_group.update(
10731078
variable_type.identify(self._dataset.variables, ignore=ignore)
10741079
)
@@ -1082,7 +1087,7 @@ def _translate(self):
10821087

10831088
# Identify and register all CF formula terms.
10841089
formula_terms = _CFFormulaTermsVariable.identify(
1085-
self._dataset.variables
1090+
self._dataset.variables, ignore=self.exclude_var_names
10861091
)
10871092

10881093
for cf_var in formula_terms.values():
@@ -1125,10 +1130,9 @@ def _build(cf_variable):
11251130
for variable_type in self._variable_types:
11261131
# Prevent grid mapping variables being mis-identified as
11271132
# CF coordinate variables.
1128-
if issubclass(variable_type, CFGridMappingVariable):
1129-
ignore = None
1130-
else:
1131-
ignore = coordinate_names
1133+
ignore = self.exclude_var_names
1134+
if not issubclass(variable_type, CFGridMappingVariable):
1135+
ignore += coordinate_names
11321136
match = variable_type.identify(
11331137
self._dataset.variables,
11341138
ignore=ignore,
@@ -1258,11 +1262,8 @@ def _build(cf_variable):
12581262
def _reset(self):
12591263
"""Reset the attribute touch history of each variable."""
12601264
for nc_var_name in self._dataset.variables.keys():
1261-
self.cf_group[nc_var_name].cf_attrs_reset()
1262-
1263-
def __del__(self):
1264-
# Explicitly close dataset to prevent file remaining open.
1265-
self._dataset.close()
1265+
if nc_var_name not in self.exclude_var_names:
1266+
self.cf_group[nc_var_name].cf_attrs_reset()
12661267

12671268

12681269
def _getncattr(dataset, attr, default=None):

lib/iris/fileformats/netcdf.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import iris.exceptions
4545
import iris.fileformats.cf
4646
import iris.fileformats._pyke_rules
47+
from iris.fileformats.ugrid_cf_reader import UGridCFReader
4748
import iris.io
4849
import iris.util
4950
from iris._lazy_data import as_lazy_data
@@ -752,7 +753,7 @@ def coord_from_term(term):
752753
cube.add_aux_factory(factory)
753754

754755

755-
def load_cubes(filenames, callback=None):
756+
def load_cubes(filenames, callback=None, *args, **kwargs):
756757
"""
757758
Loads cubes from a list of NetCDF filenames/URLs.
758759
@@ -777,15 +778,20 @@ def load_cubes(filenames, callback=None):
777778
filenames = [filenames]
778779

779780
for filename in filenames:
780-
# Ingest the netCDF file.
781-
cf = iris.fileformats.cf.CFReader(filename)
781+
# Ingest the netCDF file, creating a reader which also checks for UGRID
782+
# content.
783+
reader = UGridCFReader(filename, *args, **kwargs)
782784

783785
# Process each CF data variable.
784-
data_variables = list(cf.cf_group.data_variables.values()) + list(
785-
cf.cf_group.promoted.values()
786-
)
786+
data_variables = list(
787+
reader.cfreader.cf_group.data_variables.values()
788+
) + list(reader.cfreader.cf_group.promoted.values())
787789
for cf_var in data_variables:
788-
cube = _load_cube(engine, cf, cf_var, filename)
790+
cube = _load_cube(engine, reader.cfreader, cf_var, filename)
791+
792+
# Post-process each cube to attach information describing the
793+
# unstructured mesh dimension, if any.
794+
reader.complete_ugrid_cube(cube)
789795

790796
# Process any associated formula terms and attach
791797
# the corresponding AuxCoordFactory.
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
# Copyright Iris contributors
2+
#
3+
# This file is part of Iris and is released under the LGPL license.
4+
# See COPYING and COPYING.LESSER in the root of the repository for full
5+
# licensing details.
6+
"""
7+
Adds a UGRID extension layer to netCDF file loading.
8+
9+
"""
10+
from collections import namedtuple
11+
import os
12+
13+
import netCDF4
14+
15+
from gridded.pyugrid.ugrid import UGrid
16+
from gridded.pyugrid.read_netcdf import (
17+
find_mesh_names,
18+
load_grid_from_nc_dataset,
19+
)
20+
from iris.fileformats.cf import CFReader
21+
22+
23+
_UGRID_ELEMENT_TYPE_NAMES = ("node", "edge", "face", "volume")
24+
25+
# Generate all possible UGRID structural property names.
26+
# These are the UGRID mesh properties that contain variable names for linkage,
27+
# which may appear as recognised properties of the main mesh variable.
28+
29+
# Start with coordinate variables for each element type (aka "mesh_location").
30+
_UGRID_LINK_PROPERTIES = [
31+
"{}_coordinates".format(elem) for elem in _UGRID_ELEMENT_TYPE_NAMES
32+
]
33+
34+
# Add in all possible type-to-type_connectivity elements.
35+
# NOTE: this actually generates extra unused names, such as
36+
# "node_face_connectivity", because we are not bothering to distinguish
37+
# between lower- and higher-order elements.
38+
# For now just don't worry about that, as long as we get all the ones which
39+
# *are* needed.
40+
_UGRID_LINK_PROPERTIES += [
41+
"{}_{}_connectivity".format(e1, e2)
42+
for e1 in _UGRID_ELEMENT_TYPE_NAMES
43+
for e2 in _UGRID_ELEMENT_TYPE_NAMES
44+
]
45+
46+
# Also allow for boundary information.
47+
_UGRID_LINK_PROPERTIES += ["boundary_node_connectivity"]
48+
49+
50+
class CubeUgrid(
51+
namedtuple("CubeUgrid", ["cube_dim", "grid", "mesh_location"])
52+
):
53+
"""
54+
Object recording the unstructured grid dimension of a cube.
55+
56+
* cube_dim (int):
57+
The cube dimension which maps the unstructured grid.
58+
There can be only one.
59+
60+
* grid (`gridded.pyugrid.UGrid`):
61+
A 'gridded' description of a UGRID mesh.
62+
63+
* mesh_location (str):
64+
Which element of the mesh the cube is mapped to.
65+
Can be 'face', 'edge' or 'node'. A 'volume' is not supported.
66+
67+
"""
68+
69+
def __str__(self):
70+
result = "Cube unstructured-grid dimension:"
71+
result += "\n cube dimension = {}".format(self.cube_dim)
72+
result += '\n mesh_location = "{}"'.format(self.mesh_location)
73+
result += '\n mesh "{}" :\n'.format(self.grid.mesh_name)
74+
try:
75+
mesh_str = str(self.grid.info)
76+
except TypeError:
77+
mesh_str = "<unprintable mesh>"
78+
result += "\n".join([" " + line for line in mesh_str.split("\n")])
79+
result += "\n"
80+
return result
81+
82+
83+
class UGridCFReader:
84+
"""
85+
A CFReader extension to add UGRID information to netcdf cube loading.
86+
87+
Identifies UGRID-specific parts of a netcdf file, providing:
88+
89+
* `self.cfreader` : a CFReader object to interpret the CF data from the
90+
file for cube creation, while ignoring the UGRID mesh data.
91+
92+
* `self.complete_ugrid_cube(cube)` a call to add the relevant UGRID
93+
information to a cube created from the cfreader data.
94+
95+
This allows us to decouple UGRID from CF support with minimal changes to
96+
the existing `iris.fileformats.netcdf` code, which is intimately coupled to
97+
both the CFReader class and the netCDF4 file interface.
98+
99+
"""
100+
101+
def __init__(self, filename, *args, **kwargs):
102+
self.filename = os.path.expanduser(filename)
103+
dataset = netCDF4.Dataset(self.filename, mode="r")
104+
self.dataset = dataset
105+
meshes = {}
106+
for meshname in find_mesh_names(self.dataset):
107+
mesh = UGrid()
108+
load_grid_from_nc_dataset(dataset, mesh, mesh_name=meshname)
109+
meshes[meshname] = mesh
110+
self.meshes = meshes
111+
112+
# Generate list of excluded variable names.
113+
exclude_vars = list(meshes.keys())
114+
115+
temp_xios_fix = kwargs.pop("temp_xios_fix", False)
116+
if not temp_xios_fix:
117+
# This way *ought* to work, but maybe problems with the test file ?
118+
for mesh in meshes.values():
119+
mesh_var = dataset.variables[mesh.mesh_name]
120+
for attr in mesh_var.ncattrs():
121+
if attr in _UGRID_LINK_PROPERTIES:
122+
exclude_vars.extend(mesh_var.getncattr(attr).split())
123+
else:
124+
# A crude and XIOS-specific alternative ..
125+
exclude_vars += [
126+
name
127+
for name in dataset.variables.keys()
128+
if any(name.startswith(meshname) for meshname in meshes.keys())
129+
]
130+
131+
# Identify possible mesh dimensions and make a map of them.
132+
meshdims_map = {} # Maps {dimension-name: (mesh, mesh-location)}
133+
for mesh in meshes.values():
134+
mesh_var = dataset.variables[mesh.mesh_name]
135+
if mesh.faces is not None:
136+
# Work out name of faces dimension and record it.
137+
if "face_dimension" in mesh_var.ncattrs():
138+
faces_dim_name = mesh_var.getncattr("face_dimension")
139+
else:
140+
# Assume default dimension ordering, and get the dim name
141+
# from dims of a non-optional connectivity variable.
142+
faces_varname = mesh_var.face_node_connectivity
143+
faces_var = dataset.variables[faces_varname]
144+
faces_dim_name = faces_var.dimensions[0]
145+
meshdims_map[faces_dim_name] = (mesh, "face")
146+
if mesh.edges is not None:
147+
# Work out name of edges dimension and record it.
148+
if "edge_dimension" in mesh_var.ncattrs():
149+
edges_dim_name = mesh_var.getncattr("edge_dimension")
150+
else:
151+
# Assume default dimension ordering, and get the dim name
152+
# from dims of a non-optional connectivity variable.
153+
edges_varname = mesh_var.edge_node_connectivity
154+
edges_var = dataset.variables[edges_varname]
155+
edges_dim_name = edges_var.dimensions[0]
156+
meshdims_map[edges_dim_name] = (mesh, "edge")
157+
if mesh.nodes is not None:
158+
# Work out name of nodes dimension and record it.
159+
# Get it from a non-optional coordinate variable.
160+
nodes_varname = mesh_var.node_coordinates.split()[0]
161+
nodes_var = dataset.variables[nodes_varname]
162+
nodes_dim_name = nodes_var.dimensions[0]
163+
meshdims_map[nodes_dim_name] = (mesh, "node")
164+
self.meshdims_map = meshdims_map
165+
166+
# Create a CFReader object which skips the UGRID-related variables.
167+
kwargs["exclude_var_names"] = exclude_vars
168+
self.cfreader = CFReader(self.dataset, *args, **kwargs)
169+
170+
def complete_ugrid_cube(self, cube):
171+
"""
172+
Add the ".ugrid" property to a cube loaded with the `self.cfreader`.
173+
174+
We identify the unstructured-grid dimension of the cube (if any), and
175+
attach a suitable CubeUgrid object, linking the cube mesh dimension to
176+
an element-type (aka "mesh_location") of a mesh.
177+
178+
"""
179+
# Set a 'cube.ugrid' property.
180+
data_var = self.dataset.variables[cube.var_name]
181+
meshes_info = [
182+
(i_dim, self.meshdims_map.get(dim_name))
183+
for i_dim, dim_name in enumerate(data_var.dimensions)
184+
if dim_name in self.meshdims_map
185+
]
186+
if len(meshes_info) > 1:
187+
msg = "Cube maps more than one mesh dimension: {}"
188+
raise ValueError(msg.format(meshes_info))
189+
if meshes_info:
190+
i_dim, (mesh, mesh_location) = meshes_info[0]
191+
cube.ugrid = CubeUgrid(
192+
cube_dim=i_dim, grid=mesh, mesh_location=mesh_location
193+
)
194+
else:
195+
# Add an empty 'cube.ugrid' to all cubes otherwise.
196+
cube.ugrid = None
197+
return
198+
199+
def __del__(self):
200+
# Explicitly close dataset to prevent file remaining open.
201+
self.dataset.close()

lib/iris/tests/integration/experimental/test_regrid_ProjectedUnstructured.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
class TestProjectedUnstructured(tests.IrisTest):
2828
def setUp(self):
2929
path = tests.get_data_path(
30-
("NetCDF", "unstructured_grid", "theta_nodal_xios.nc")
30+
("NetCDF", "unstructured_grid", "theta_nodal_not_ugrid.nc")
3131
)
3232
self.src = iris.load_cube(path, "Potential Temperature")
3333

lib/iris/tests/integration/test_regridding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def test_nearest(self):
9999
class TestUnstructured(tests.IrisTest):
100100
def setUp(self):
101101
path = tests.get_data_path(
102-
("NetCDF", "unstructured_grid", "theta_nodal_xios.nc")
102+
("NetCDF", "unstructured_grid", "theta_nodal_not_ugrid.nc")
103103
)
104104
self.src = iris.load_cube(path, "Potential Temperature")
105105
self.grid = simple_3d()[0, :, :]

0 commit comments

Comments
 (0)