diff --git a/doc/esmvalcore/fixing_data.rst b/doc/esmvalcore/fixing_data.rst index bd58e6e979..9d7a23c29d 100644 --- a/doc/esmvalcore/fixing_data.rst +++ b/doc/esmvalcore/fixing_data.rst @@ -266,3 +266,42 @@ missing coordinate you can create a fix for this model: data_cube = cubes.extract_strict('VAR_NAME') data_cube.add_aux_coord(coord, DIMENSIONS_INDEX_TUPLE) return [data_cube] + + +Customizing checker strictness +============================== + +The data checker classifies its issues using four different levels of +severity. From highest to lowest: + + - ``CRITICAL``: issues that most of the time will have severe consequences. + + - ``ERROR``: issues that usually lead to unexpected errors, but can be safely + ignored sometimes. + + - ``WARNING``: something is not up to the standard but is unlikely to have + consequences later. + + - ``DEBUG``: any info that the checker wants to communicate. Regardless of + checker strictness, those will always be reported as debug messages. + +Users can have control about which levels of issues are interpreted as errors, +and therefore make the checker fail or warnings or debug messages. +For this purpose there is an optional command line option `--check-level` +that can take a number of values, listed below from the lowest level of +strictness to the highest: + +- ``ignore``: all issues, regardless of severity, will be reported as + warnings. Checker will never fail. Use this at your own risk. + +- ``relaxed``: only CRITICAL issues are treated as errors. We recommend not to + rely on this mode, although it can be useful if there are errors preventing + the run that you are sure you can manage on the diagnostics or that will + not affect you. + +- ``default``: fail if there are any CRITICAL or ERROR issues (DEFAULT); Provides + a good measure of safety. + +- ``strict``: fail if there are any warnings, this is the highest level of + strictness. Mostly useful for checking datasets that you have produced, to + be sure that future users will not be distracted by inoffensive warnings. diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 9754bdbb22..d8ae7ab5ac 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -41,6 +41,7 @@ from ._config import configure_logging, read_config_user_file, DIAGNOSTICS_PATH from ._recipe import TASKSEP, read_recipe_file from ._task import resource_usage_logger +from .cmor.check import CheckLevels # set up logging logger = logging.getLogger(__name__) @@ -97,6 +98,24 @@ def get_args(): '--diagnostics', nargs='*', help="Only run the named diagnostics from the recipe.") + parser.add_argument( + '--check-level', + type=str, + choices=[ + val.name.lower() for val in CheckLevels if val != CheckLevels.DEBUG + ], + default='default', + help=""" + Configure the severity of the errors that will make the CMOR check + fail. + Optional: true; + Possible values: + ignore: all errors will be reported as warnings + relaxed: only fail if there are critical errors + default: fail if there are any errors + strict: fail if there are any warnings + """ + ) args = parser.parse_args() return args @@ -142,6 +161,7 @@ def main(args): pattern if TASKSEP in pattern else pattern + TASKSEP + '*' for pattern in args.diagnostics or () } + cfg['check_level'] = CheckLevels[args.check_level.upper()] cfg['synda_download'] = args.synda_download for limit in ('max_datasets', 'max_years'): value = getattr(args, limit) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f21c8aa206..ac8cbcbee9 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -299,6 +299,7 @@ def _get_default_settings(variable, config_user, derive=False): settings['fix_file']['output_dir'] = fix_dir # Cube fixes fix['frequency'] = variable['frequency'] + fix['check_level'] = config_user['check_level'] settings['fix_metadata'] = dict(fix) settings['fix_data'] = dict(fix) @@ -328,6 +329,7 @@ def _get_default_settings(variable, config_user, derive=False): 'mip': variable['mip'], 'short_name': variable['short_name'], 'frequency': variable['frequency'], + 'check_level': config_user['check_level'] } # Configure final CMOR data check settings['cmor_check_data'] = { @@ -335,6 +337,7 @@ def _get_default_settings(variable, config_user, derive=False): 'mip': variable['mip'], 'short_name': variable['short_name'], 'frequency': variable['frequency'], + 'check_level': config_user['check_level'] } # Clean up fixed files diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index c848531fc0..61c4f6f6fd 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -1,5 +1,6 @@ """Module for checking iris cubes against their CMOR definitions.""" import logging +from enum import IntEnum import cf_units import iris.coord_categorisation @@ -10,6 +11,19 @@ from .table import CMOR_TABLES +CheckLevels = IntEnum( + 'CheckLevels', 'DEBUG STRICT DEFAULT RELAXED IGNORE') +"""Level of strictness of the checks. + + Attributes + ------ + - DEBUG: Report any debug message that the checker wants to communicate. + - STRICT: Fail if there are warnings regarding compliance of CMOR standards. + - DEFAULT: Fail if cubes present any discrepancy with CMOR standards. + - RELAXED: Fail if cubes present severe discrepancies with CMOR standards. + - IGNORE: Do not fail for any discrepancy with CMOR standards. +""" + class CMORCheckError(Exception): """Exception raised when a cube does not pass the CMORCheck.""" @@ -35,18 +49,13 @@ class CMORCheck(): automatic_fixes: bool If True, CMORCheck will try to apply automatic fixes for any detected error, if possible. - warnings_as_errors: bool, default False - If True, CMORCheck will treat all warnings as errors + check_level: CheckLevels + Level of strictness of the checks. Attributes ---------- frequency: str Expected frequency for the data. - automatic_fixes: bool - If True, CMORCheck will try to apply automatic fixes for any - detected error, if possible. - warnings_as_errors: bool - If True, CMORCheck will treat all warnings as errors """ _attr_msg = '{}: {} should be {}, not {}' @@ -60,11 +69,13 @@ def __init__(self, var_info, frequency=None, fail_on_error=False, - automatic_fixes=False, - warnings_as_errors=False): + check_level=CheckLevels.DEFAULT, + automatic_fixes=False): self._cube = cube self._failerr = fail_on_error + self._check_level = check_level + self._logger = logging.getLogger(__name__) self._errors = list() self._warnings = list() self._debug_messages = list() @@ -73,7 +84,6 @@ def __init__(self, frequency = self._cmor_var.frequency self.frequency = frequency self.automatic_fixes = automatic_fixes - self.warnings_as_errors = warnings_as_errors def check_metadata(self, logger=None): """ @@ -87,6 +97,12 @@ def check_metadata(self, logger=None): - Equivalent calendars will all default to the same name. - Time units will be set to days since 1850-01-01 + + Parameters + ---------- + logger: logging.Logger + Given logger. + Raises ------ CMORCheckError @@ -95,8 +111,8 @@ def check_metadata(self, logger=None): all checks and then raises. """ - if logger is None: - logger = logging.getLogger(__name__) + if logger is not None: + self._logger = logger self._check_var_metadata() self._check_fill_value() @@ -104,14 +120,51 @@ def check_metadata(self, logger=None): self._check_coords() if self.frequency != 'fx': self._check_time_coord() + self._check_rank() - self.report_debug_messages(logger) - self.report_warnings(logger) + self.report_debug_messages() + self.report_warnings() self.report_errors() return self._cube + def check_data(self, logger=None): + """Check the cube data. + + Performs all the tests that require to have the data in memory. + Assumes that metadata is correct, so you must call check_metadata prior + to this. + + It will also report some warnings in case of minor errors. + + Parameters + ---------- + logger: logging.Logger + Given logger. + + Raises + ------ + CMORCheckError + If errors are found. If fail_on_error attribute is set to True, + raises as soon as an error is detected. If set to False, it perform + all checks and then raises. + + """ + if logger is not None: + self._logger = logger + + if self._cmor_var.units: + units = self._get_effective_units() + if str(self._cube.units) != units: + self._cube.convert_units(units) + + self._check_coords_data() + + self.report_warnings() + self.report_errors() + return self._cube + def report_errors(self): """Report detected errors. @@ -127,62 +180,33 @@ def report_errors(self): self._cube) raise CMORCheckError(msg) - def report_warnings(self, logger): + def report_warnings(self): """Report detected warnings to the given logger. Parameters ---------- - logger + logger: logging.Logger + Given logger """ if self.has_warnings(): msg = 'There were warnings in variable {}:\n{}\n'.format( self._cube.var_name, '\n '.join(self._warnings)) - logger.warning(msg) + self._logger.warning(msg) - def report_debug_messages(self, logger): + def report_debug_messages(self): """Report detected debug messages to the given logger. Parameters ---------- - logger + logger: logging.Logger + Given logger. """ if self.has_debug_messages(): msg = 'There were metadata changes in variable {}:\n{}\n'.format( self._cube.var_name, '\n '.join(self._debug_messages)) - logger.debug(msg) - - def check_data(self, logger=None): - """Check the cube data. - - Performs all the tests that require to have the data in memory. - Assumes that metadata is correct, so you must call check_metadata prior - to this. - - It will also report some warnings in case of minor errors. - - Raises - ------ - CMORCheckError - If errors are found. If fail_on_error attribute is set to True, - raises as soon as an error is detected. If set to False, it perform - all checks and then raises. - - """ - if logger is None: - logger = logging.getLogger(__name__) - - if self._cmor_var.units: - units = self._get_effective_units() - if str(self._cube.units) != units: - self._cube.convert_units(units) - - self._check_coords_data() - - self.report_warnings(logger) - self.report_errors() - return self._cube + self._logger.debug(msg) def _check_fill_value(self): """Check fill value.""" @@ -235,11 +259,17 @@ def _check_var_metadata(self): if self._cmor_var.units: units = self._get_effective_units() - - if not self._cube.units.is_convertible(units): - self.report_error(f'Variable {self._cube.var_name} units ' - f'{self._cube.units} can not be ' - f'converted to {self._cmor_var.units}') + if self._cube.units != units: + if not self._cube.units.is_convertible(units): + self.report_error( + f'Variable {self._cube.var_name} units ' + f'{self._cube.units} can not be ' + f'converted to {self._cmor_var.units}') + else: + self.report_warning( + f'Variable {self._cube.var_name} units ' + f'{self._cube.units} will be ' + f'converted to {self._cmor_var.units}') # Check other variable attributes that match entries in cube.attributes attrs = ('positive', ) @@ -250,9 +280,10 @@ def _check_var_metadata(self): self.report_warning('{}: attribute {} not present', self._cube.var_name, attr) elif self._cube.attributes[attr] != attr_value: - self.report_error(self._attr_msg, self._cube.var_name, - attr, attr_value, - self._cube.attributes[attr]) + self.report_error( + self._attr_msg, self._cube.var_name, + attr, attr_value, + self._cube.attributes[attr]) def _get_effective_units(self): """Get effective units.""" @@ -292,7 +323,7 @@ def _check_dim_names(self): try: cube_coord = self._cube.coord(var_name=coordinate.out_name) if cube_coord.standard_name != coordinate.standard_name: - self.report_error( + self.report_critical( self._attr_msg, coordinate.out_name, 'standard_name', @@ -324,8 +355,14 @@ def _check_dim_names(self): coordinate.out_name, ) except iris.exceptions.CoordinateNotFoundError: - self.report_error(self._does_msg, coordinate.name, - 'exist') + if coordinate.standard_name in ['time', 'latitude', + 'longitude'] or \ + coordinate.requested: + self.report_critical( + self._does_msg, coordinate.name, 'exist') + else: + self.report_error( + self._does_msg, coordinate.name, 'exist') def _check_coords(self): """Check coordinates.""" @@ -369,15 +406,20 @@ def _check_coord(self, cmor, coord, var_name): fixed = False if self.automatic_fixes: try: + old_unit = coord.units new_unit = cf_units.Unit(cmor.units, coord.units.calendar) coord.convert_units(new_unit) fixed = True + self.report_warning( + f'Coordinate {coord.var_name} units ' + f'{str(old_unit)} ' + f'converted to {cmor.units}') except ValueError: pass if not fixed: - self.report_error(self._attr_msg, var_name, 'units', - cmor.units, coord.units) + self.report_critical(self._attr_msg, var_name, 'units', + cmor.units, coord.units) self._check_coord_values(cmor, coord, var_name) self._check_coord_bounds(cmor, coord, var_name) self._check_coord_monotonicity_and_direction(cmor, coord, var_name) @@ -408,20 +450,22 @@ def _check_coord_monotonicity_and_direction(self, cmor, coord, var_name): if coord.ndim > 1: return if not coord.is_monotonic(): - self.report_error(self._is_msg, var_name, 'monotonic') + self.report_critical(self._is_msg, var_name, 'monotonic') if len(coord.points) == 1: return if cmor.stored_direction: if cmor.stored_direction == 'increasing': if coord.points[0] > coord.points[1]: if not self.automatic_fixes or coord.ndim > 1: - self.report_error(self._is_msg, var_name, 'increasing') + self.report_critical( + self._is_msg, var_name, 'increasing') else: self._reverse_coord(coord) elif cmor.stored_direction == 'decreasing': if coord.points[0] < coord.points[1]: if not self.automatic_fixes or coord.ndim > 1: - self.report_error(self._is_msg, var_name, 'decreasing') + self.report_critical( + self._is_msg, var_name, 'decreasing') else: self._reverse_coord(coord) @@ -446,8 +490,9 @@ def _check_coord_values(self, coord_info, coord, var_name): self.automatic_fixes: l_fix_coord_value = True else: - self.report_error(self._vals_msg, var_name, - '< {} ='.format('valid_min'), valid_min) + self.report_critical( + self._vals_msg, var_name, + '< {} ='.format('valid_min'), valid_min) if coord_info.valid_max: valid_max = float(coord_info.valid_max) @@ -456,8 +501,9 @@ def _check_coord_values(self, coord_info, coord, var_name): self.automatic_fixes: l_fix_coord_value = True else: - self.report_error(self._vals_msg, var_name, - '> {} ='.format('valid_max'), valid_max) + self.report_critical( + self._vals_msg, var_name, + '> {} ='.format('valid_max'), valid_max) if l_fix_coord_value: if coord.ndim == 1: @@ -511,8 +557,8 @@ def _check_time_coord(self): ) if not coord.units.is_time_reference(): - self.report_error(self._does_msg, var_name, - 'have time reference units') + self.report_critical(self._does_msg, var_name, + 'have time reference units') else: old_units = coord.units coord.convert_units( @@ -647,7 +693,41 @@ def has_debug_messages(self): """ return len(self._debug_messages) > 0 - def report_error(self, message, *args): + def report(self, level, message, *args): + """Generic method to report a message from the checker + + Parameters + ---------- + level : CheckLevels + Message level + message : str + Message to report + args : + String format args for the message + + Raises + ------ + CMORCheckError + If fail on error is set, it is thrown when registering an error + message + """ + msg = message.format(*args) + if level == CheckLevels.DEBUG: + if self._failerr: + self._logger.debug(msg) + else: + self._debug_messages.append(msg) + elif level < self._check_level: + if self._failerr: + self._logger.warning(msg) + else: + self._warnings.append(msg) + else: + if self._failerr: + raise CMORCheckError(msg + '\nin cube:\n{}'.format(self._cube)) + self._errors.append(msg) + + def report_critical(self, message, *args): """Report an error. If fail_on_error is set to True, raises automatically. @@ -661,16 +741,23 @@ def report_error(self, message, *args): arguments to format the message string. """ - msg = message.format(*args) - if self._failerr: - raise CMORCheckError(msg + '\nin cube:\n{}'.format(self._cube)) - self._errors.append(msg) + self.report(CheckLevels.RELAXED, message, *args) - def report_warning(self, message, *args): - """Report a warning. + def report_error(self, message, *args): + """Report a normal error. - If fail_on_error is set to True, logs it automatically. - If fail_on_error is set to False, stores it for later reports. + Parameters + ---------- + message: str: unicode + Message for the error. + *args: + arguments to format the message string. + + """ + self.report(CheckLevels.DEFAULT, message, *args) + + def report_warning(self, message, *args): + """Report a warning level error. Parameters ---------- @@ -680,15 +767,7 @@ def report_warning(self, message, *args): arguments to format the message string. """ - if self.warnings_as_errors: - self.report_error(message, *args) - return - - msg = message.format(*args) - if self._failerr: - print('WARNING: {0}'.format(msg)) - else: - self._warnings.append(msg) + self.report(CheckLevels.STRICT, message, *args) def report_debug_message(self, message, *args): """Report a debug message. @@ -701,15 +780,15 @@ def report_debug_message(self, message, *args): arguments to format the message string """ - msg = message.format(*args) - self._debug_messages.append(msg) + self.report(CheckLevels.DEBUG, message, *args) def _get_cmor_checker(table, mip, short_name, frequency, - fail_on_error=True, + fail_on_error=False, + check_level=CheckLevels.DEFAULT, automatic_fixes=False): """Get a CMOR checker/fixer.""" if table not in CMOR_TABLES: @@ -729,13 +808,16 @@ def _checker(cube): var_info, frequency=frequency, fail_on_error=fail_on_error, + check_level=check_level, automatic_fixes=automatic_fixes) return _checker -def cmor_check_metadata(cube, cmor_table, mip, short_name, frequency): - """Check if metadata conforms to variable's CMOR definition. +def cmor_check_metadata(cube, cmor_table, mip, + short_name, frequency, + check_level=CheckLevels.DEFAULT): + """Check if metadata conforms to variable's CMOR definiton. None of the checks at this step will force the cube to load the data. @@ -751,15 +833,20 @@ def cmor_check_metadata(cube, cmor_table, mip, short_name, frequency): Variable's short name. frequency: basestring Data frequency. + check_level: CheckLevels + Level of strictness of the checks. """ - checker = _get_cmor_checker(cmor_table, mip, short_name, frequency) + checker = _get_cmor_checker(cmor_table, mip, + short_name, frequency, + check_level=check_level) checker(cube).check_metadata() return cube -def cmor_check_data(cube, cmor_table, mip, short_name, frequency): - """Check if data conforms to variable's CMOR definition. +def cmor_check_data(cube, cmor_table, mip, short_name, frequency, + check_level=CheckLevels.DEFAULT): + """Check if data conforms to variable's CMOR definiton. The checks performed at this step require the data in memory. @@ -775,15 +862,18 @@ def cmor_check_data(cube, cmor_table, mip, short_name, frequency): Variable's short name frequency: basestring Data frequency + check_level: CheckLevels + Level of strictness of the checks. """ - checker = _get_cmor_checker(cmor_table, mip, short_name, frequency) + checker = _get_cmor_checker(cmor_table, mip, short_name, frequency, + check_level=check_level) checker(cube).check_data() return cube -def cmor_check(cube, cmor_table, mip, short_name, frequency): - """Check if cube conforms to variable's CMOR definition. +def cmor_check(cube, cmor_table, mip, short_name, frequency, check_level): + """Check if cube conforms to variable's CMOR definiton. Equivalent to calling cmor_check_metadata and cmor_check_data consecutively. @@ -800,8 +890,12 @@ def cmor_check(cube, cmor_table, mip, short_name, frequency): Variable's short name. frequency: basestring Data frequency. + check_level: enum.IntEnum + Level of strictness of the checks. """ - cmor_check_metadata(cube, cmor_table, mip, short_name, frequency) - cmor_check_data(cube, cmor_table, mip, short_name, frequency) + cmor_check_metadata(cube, cmor_table, mip, short_name, frequency, + check_level=check_level) + cmor_check_data(cube, cmor_table, mip, short_name, frequency, + check_level=check_level) return cube diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 6ddb13549d..a30be15cc2 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -13,7 +13,7 @@ from iris.cube import CubeList from ._fixes.fix import Fix -from .check import _get_cmor_checker +from .check import _get_cmor_checker, CheckLevels logger = logging.getLogger(__name__) @@ -55,7 +55,8 @@ def fix_metadata(cubes, project, dataset, mip, - frequency=None): + frequency=None, + check_level=CheckLevels.DEFAULT): """ Fix cube metadata if fixes are required and check it anyway. @@ -79,6 +80,8 @@ def fix_metadata(cubes, frequency: str, optional Variable's data frequency, if available + check_level: CheckLevels + Level of strictness of the checks. Set to default. Returns ------- @@ -103,38 +106,13 @@ def fix_metadata(cubes, for fix in fixes: cube_list = fix.fix_metadata(cube_list) - if len(cube_list) != 1: - cube = None - for raw_cube in cube_list: - if raw_cube.var_name == short_name: - cube = raw_cube - break - if not cube: - raise ValueError( - 'More than one cube found for variable %s in %s:%s but ' - 'none of their var_names match the expected. \n' - 'Full list of cubes encountered: %s' % - (short_name, project, dataset, cube_list) - ) - logger.warning( - 'Found variable %s in %s:%s, but there were other present in ' - 'the file. Those extra variables are usually metadata ' - '(cell area, latitude descriptions) that was not saved ' - 'properly. It is possible that errors appear further on ' - 'because of this. \nFull list of cubes encountered: %s', - short_name, - project, - dataset, - cube_list - ) - else: - cube = cube_list[0] - + cube = _get_single_cube(cube_list, short_name, project, dataset) checker = _get_cmor_checker( frequency=frequency, table=project, mip=mip, short_name=short_name, + check_level=check_level, fail_on_error=False, automatic_fixes=True) cube = checker(cube).check_metadata() @@ -143,12 +121,42 @@ def fix_metadata(cubes, return fixed_cubes +def _get_single_cube(cube_list, short_name, project, dataset): + if len(cube_list) == 1: + return cube_list[0] + cube = None + for raw_cube in cube_list: + if raw_cube.var_name == short_name: + cube = raw_cube + break + if not cube: + raise ValueError( + 'More than one cube found for variable %s in %s:%s but ' + 'none of their var_names match the expected. \n' + 'Full list of cubes encountered: %s' % + (short_name, project, dataset, cube_list) + ) + logger.warning( + 'Found variable %s in %s:%s, but there were other present in ' + 'the file. Those extra variables are usually metadata ' + '(cell area, latitude descriptions) that was not saved ' + 'according to CF-conventions. It is possible that errors appear ' + 'further on because of this. \nFull list of cubes encountered: %s', + short_name, + project, + dataset, + cube_list + ) + return cube + + def fix_data(cube, short_name, project, dataset, mip, - frequency=None): + frequency=None, + check_level=CheckLevels.DEFAULT): """ Fix cube data if fixes add present and check it anyway. @@ -171,6 +179,8 @@ def fix_data(cube, Variable's MIP frequency: str, optional Variable's data frequency, if available + check_level: CheckLevels + Level of strictness of the checks. Set to default. Returns ------- @@ -186,13 +196,13 @@ def fix_data(cube, for fix in Fix.get_fixes( project=project, dataset=dataset, mip=mip, short_name=short_name): cube = fix.fix_data(cube) - checker = _get_cmor_checker( frequency=frequency, table=project, mip=mip, short_name=short_name, fail_on_error=False, - automatic_fixes=True) + automatic_fixes=True, + check_level=check_level) cube = checker(cube).check_data() return cube diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index fbc40fc787..c9efc7a467 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -15,6 +15,8 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback +from esmvalcore.cmor.check import CheckLevels + from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance @@ -65,6 +67,7 @@ def config_user(tmp_path): cfg = esmvalcore._config.read_config_user_file(filename, 'recipe_test') cfg['synda_download'] = False cfg['output_file_type'] = 'png' + cfg['check_level'] = CheckLevels.DEFAULT return cfg @@ -352,6 +355,7 @@ def test_default_preprocessor(tmp_path, patched_datafinder, config_user): 'output_dir': fix_dir, }, 'fix_data': { + 'check_level': CheckLevels.DEFAULT, 'project': 'CMIP5', 'dataset': 'CanESM2', 'short_name': 'chl', @@ -359,6 +363,7 @@ def test_default_preprocessor(tmp_path, patched_datafinder, config_user): 'frequency': 'yr', }, 'fix_metadata': { + 'check_level': CheckLevels.DEFAULT, 'project': 'CMIP5', 'dataset': 'CanESM2', 'short_name': 'chl', @@ -374,12 +379,14 @@ def test_default_preprocessor(tmp_path, patched_datafinder, config_user): 'end_day': 1, }, 'cmor_check_metadata': { + 'check_level': CheckLevels.DEFAULT, 'cmor_table': 'CMIP5', 'mip': 'Oyr', 'short_name': 'chl', 'frequency': 'yr', }, 'cmor_check_data': { + 'check_level': CheckLevels.DEFAULT, 'cmor_table': 'CMIP5', 'mip': 'Oyr', 'short_name': 'chl', @@ -438,6 +445,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'output_dir': fix_dir, }, 'fix_data': { + 'check_level': CheckLevels.DEFAULT, 'project': 'CMIP5', 'dataset': 'CanESM2', 'short_name': 'sftlf', @@ -445,6 +453,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'frequency': 'fx', }, 'fix_metadata': { + 'check_level': CheckLevels.DEFAULT, 'project': 'CMIP5', 'dataset': 'CanESM2', 'short_name': 'sftlf', @@ -452,12 +461,14 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'frequency': 'fx', }, 'cmor_check_metadata': { + 'check_level': CheckLevels.DEFAULT, 'cmor_table': 'CMIP5', 'mip': 'fx', 'short_name': 'sftlf', 'frequency': 'fx', }, 'cmor_check_data': { + 'check_level': CheckLevels.DEFAULT, 'cmor_table': 'CMIP5', 'mip': 'fx', 'short_name': 'sftlf', diff --git a/tests/unit/cmor/test_cmor_check.py b/tests/unit/cmor/test_cmor_check.py index e7c03d2d52..3732c36ce5 100644 --- a/tests/unit/cmor/test_cmor_check.py +++ b/tests/unit/cmor/test_cmor_check.py @@ -1,8 +1,6 @@ """Unit tests for the CMORCheck class.""" -import sys import unittest -from io import StringIO import iris import iris.coord_categorisation @@ -11,7 +9,7 @@ import numpy as np from cf_units import Unit -from esmvalcore.cmor.check import CMORCheck, CMORCheckError +from esmvalcore.cmor.check import CMORCheck, CMORCheckError, CheckLevels class VariableInfoMock: @@ -97,14 +95,14 @@ def test_report_error(self): """Test report error function.""" checker = CMORCheck(self.cube, self.var_info) self.assertFalse(checker.has_errors()) - checker.report_error('New error: {}', 'something failed') + checker.report_critical('New error: {}', 'something failed') self.assertTrue(checker.has_errors()) def test_fail_on_error(self): """Test exception is raised if fail_on_error is activated.""" checker = CMORCheck(self.cube, self.var_info, fail_on_error=True) with self.assertRaises(CMORCheckError): - checker.report_error('New error: {}', 'something failed') + checker.report_critical('New error: {}', 'something failed') def test_report_warning(self): """Test report warning function.""" @@ -116,12 +114,12 @@ def test_report_warning(self): def test_warning_fail_on_error(self): """Test report warning function with fail_on_error.""" checker = CMORCheck(self.cube, self.var_info, fail_on_error=True) - stdout = sys.stdout - sys.stdout = StringIO() - checker.report_warning('New error: {}', 'something failed') - output = sys.stdout.getvalue().strip() - sys.stdout = stdout - self.assertEqual(output, 'WARNING: New error: something failed') + with self.assertLogs(level='WARNING') as cm: + checker.report_warning('New error: {}', 'something failed') + self.assertEqual( + cm.output, + ['WARNING:esmvalcore.cmor.check:New error: something failed', ] + ) def test_report_debug_message(self): """"Test report debug message function""" @@ -135,7 +133,7 @@ def test_check(self): self._check_cube() def _check_cube(self, automatic_fixes=False, frequency=None, - warnings_as_errors=True): + check_level=CheckLevels.DEFAULT): """Apply checks and optionally automatic fixes to self.cube.""" def checker(cube): return CMORCheck( @@ -143,11 +141,24 @@ def checker(cube): self.var_info, automatic_fixes=automatic_fixes, frequency=frequency, - warnings_as_errors=warnings_as_errors) + check_level=check_level) self.cube = checker(self.cube).check_metadata() self.cube = checker(self.cube).check_data() + def _check_cube_metadata(self, automatic_fixes=False, frequency=None, + check_level=CheckLevels.DEFAULT): + """Apply checks and optionally automatic fixes to self.cube.""" + def checker(cube): + return CMORCheck( + cube, + self.var_info, + automatic_fixes=automatic_fixes, + frequency=frequency, + check_level=check_level) + + self.cube = checker(self.cube).check_metadata() + def test_check_with_month_number(self): """Test checks succeeds for a good cube with month number.""" iris.coord_categorisation.add_month_number(self.cube, 'time') @@ -179,7 +190,7 @@ def test_check_bad_standard_name_auto_fix(self): """Test check pass for a bad standard_name with automatic fixes.""" self.cube = self.get_cube(self.var_info) self.cube.standard_name = 'wind_speed' - self._check_cube(automatic_fixes=True, warnings_as_errors=False) + self._check_cube(automatic_fixes=True) self._check_cube() def test_check_bad_standard_name(self): @@ -192,7 +203,7 @@ def test_check_bad_long_name_auto_fix(self): """Test check pass for a bad standard_name with automatic fixes.""" self.cube = self.get_cube(self.var_info) self.cube.long_name = 'bad_name' - self._check_cube(automatic_fixes=True, warnings_as_errors=False) + self._check_cube(automatic_fixes=True) self._check_cube() def test_check_bad_long_name_auto_fix_report_warning(self): @@ -274,20 +285,258 @@ def test_rank_unstructured_grid(self): self.cube.add_aux_coord(new_lat, 1) self._check_cube() - def _check_fails_in_metadata(self, automatic_fixes=False, frequency=None): + def test_check_bad_var_standard_name_strict_flag(self): + """Test check fails for a bad variable standard_name with + --cmor-check strict.""" + self.cube = self.get_cube(self.var_info) + self.cube.standard_name = 'wind_speed' + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_bad_var_long_name_strict_flag(self): + """Test check fails for a bad variable long_name with + --cmor-check strict.""" + self.cube = self.get_cube(self.var_info) + self.cube.long_name = "Near-Surface Wind Speed" + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_bad_var_units_strict_flag(self): + """Test check fails for a bad variable units with + --cmor-check strict.""" + self.cube = self.get_cube(self.var_info) + self.cube.units = "kg" + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_bad_attributes_strict_flag(self): + """Test check fails for a bad variable attribute with + --cmor-check strict.""" + self.var_info.standard_name = "surface_upward_latent_heat_flux" + self.var_info.positive = "up" + self.cube = self.get_cube(self.var_info) + self.cube.attributes['positive'] = "Wrong attribute" + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_bad_rank_strict_flag(self): + """Test check fails for a bad variable rank with + --cmor-check strict.""" + lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) + self.cube.remove_coord('latitude') + self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_bad_coord_var_name_strict_flag(self): + """Test check fails for bad coord var_name with + --cmor-check strict""" + self.var_info.table_type = 'CMIP5' + self.cube.coord('longitude').var_name = 'bad_name' + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_missing_lon_strict_flag(self): + """Test check fails for missing longitude with --cmor-check strict""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('longitude') + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_missing_lat_strict_flag(self): + """Test check fails for missing latitude with --cmor-check strict""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('latitude') + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_missing_time_strict_flag(self): + """Test check fails for missing time with --cmor-check strict""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('time') + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_missing_coord_strict_flag(self): + """Test check fails for missing coord other than lat and lon + with --cmor-check strict""" + self.cube = self.get_cube(self.var_info) + self.var_info.coordinates = {'height2m': CoordinateInfoMock('height2m') + } + self._check_fails_in_metadata(automatic_fixes=False) + + def test_check_bad_var_standard_name_relaxed_flag(self): + """Test check reports warning for a bad variable standard_name with + --cmor-check relaxed.""" + self.cube = self.get_cube(self.var_info) + self.cube.standard_name = 'wind_speed' + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_bad_var_long_name_relaxed_flag(self): + """Test check reports warning for a bad variable long_name with + --cmor-check relaxed.""" + self.cube = self.get_cube(self.var_info) + self.cube.long_name = "Near-Surface Wind Speed" + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_bad_var_units_relaxed_flag(self): + """Test check reports warning for a bad variable units with + --cmor-check relaxed.""" + self.cube = self.get_cube(self.var_info) + self.cube.units = "kg" + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_bad_attributes_relaxed_flag(self): + """Test check report warnings for a bad variable attribute with + --cmor-check relaxed.""" + self.var_info.standard_name = "surface_upward_latent_heat_flux" + self.var_info.positive = "up" + self.cube = self.get_cube(self.var_info) + self.cube.attributes['positive'] = "Wrong attribute" + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_bad_rank_relaxed_flag(self): + """Test check report warnings for a bad variable rank with + --cmor-check relaxed.""" + lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) + self.cube.remove_coord('latitude') + self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_bad_coord_standard_name_relaxed_flag(self): + """Test check reports warning for bad coord var_name with + --cmor-check relaxed""" + self.var_info.table_type = 'CMIP5' + self.cube.coord('longitude').var_name = 'bad_name' + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_missing_lon_relaxed_flag(self): + """Test check fails for missing longitude with --cmor-check relaxed""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('longitude') + self._check_fails_in_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_missing_lat_relaxed_flag(self): + """Test check fails for missing latitude with --cmor-check relaxed""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('latitude') + self._check_fails_in_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_missing_time_relaxed_flag(self): + """Test check fails for missing latitude with --cmor-check relaxed""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('time') + self._check_fails_in_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_missing_coord_relaxed_flag(self): + """Test check reports warning for missing coord other than lat and lon + with --cmor-check relaxed""" + self.cube = self.get_cube(self.var_info) + self.var_info.coordinates = {'height2m': CoordinateInfoMock('height2m') + } + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.RELAXED) + + def test_check_bad_var_standard_name_none_flag(self): + """Test check reports warning for a bad variable standard_name with + --cmor-check ignore.""" + self.cube = self.get_cube(self.var_info) + self.cube.standard_name = 'wind_speed' + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_bad_var_long_name_none_flag(self): + """Test check reports warning for a bad variable long_name with + --cmor-check ignore.""" + self.cube = self.get_cube(self.var_info) + self.cube.long_name = "Near-Surface Wind Speed" + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_bad_var_units_none_flag(self): + """Test check reports warning for a bad variable unit with + --cmor-check ignore.""" + self.cube = self.get_cube(self.var_info) + self.cube.units = "kg" + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_bad_attributes_none_flag(self): + """Test check reports warning for a bad variable attribute with + --cmor-check ignore.""" + self.var_info.standard_name = "surface_upward_latent_heat_flux" + self.var_info.positive = "up" + self.cube = self.get_cube(self.var_info) + self.cube.attributes['positive'] = "Wrong attribute" + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_bad_rank_none_flag(self): + """Test check reports warning for a bad variable rank with + --cmor-check ignore.""" + lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) + self.cube.remove_coord('latitude') + self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_bad_coord_standard_name_none_flag(self): + """Test check reports warning for bad coord var_name with + --cmor-check ignore.""" + self.var_info.table_type = 'CMIP5' + self.cube.coord('longitude').var_name = 'bad_name' + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_missing_lon_none_flag(self): + """Test check reports warning for missing longitude with + --cmor-check ignore""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('longitude') + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_missing_lat_none_flag(self): + """Test check reports warning for missing latitude with + --cmor-check ignore""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('latitude') + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_missing_time_none_flag(self): + """Test check reports warning for missing time + with --cmor-check ignore""" + self.var_info.table_type = 'CMIP5' + self.cube.remove_coord('time') + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def test_check_missing_coord_none_flag(self): + """Test check reports warning for missing coord other than lat, lon and + time with --cmor-check ignore""" + self.cube = self.get_cube(self.var_info) + self.var_info.coordinates = {'height2m': CoordinateInfoMock('height2m') + } + self._check_warnings_on_metadata(automatic_fixes=False, + check_level=CheckLevels.IGNORE) + + def _check_fails_in_metadata(self, automatic_fixes=False, frequency=None, + check_level=CheckLevels.DEFAULT): checker = CMORCheck( self.cube, self.var_info, automatic_fixes=automatic_fixes, - frequency=frequency) + frequency=frequency, + check_level=check_level) with self.assertRaises(CMORCheckError): checker.check_metadata() - def _check_warnings_on_metadata(self, automatic_fixes=False): + def _check_warnings_on_metadata(self, automatic_fixes=False, + check_level=CheckLevels.DEFAULT): checker = CMORCheck( - self.cube, self.var_info, - warnings_as_errors=False, - automatic_fixes=automatic_fixes, + self.cube, self.var_info, automatic_fixes=automatic_fixes, + check_level=check_level ) checker.check_metadata() self.assertTrue(checker.has_warnings()) diff --git a/tests/unit/cmor/test_fix.py b/tests/unit/cmor/test_fix.py index 814e810dd4..648004bbd5 100644 --- a/tests/unit/cmor/test_fix.py +++ b/tests/unit/cmor/test_fix.py @@ -4,6 +4,7 @@ from unittest.mock import Mock, patch from esmvalcore.cmor.fix import Fix, fix_data, fix_file, fix_metadata +from esmvalcore.cmor.check import CheckLevels class TestFixFile(TestCase): @@ -194,7 +195,7 @@ def test_cmor_checker_called(self): mip='mip', short_name='short_name', table='CMIP6', - ) + check_level=CheckLevels.DEFAULT,) checker.assert_called_once_with(self.cube) checker.return_value.check_metadata.assert_called_once_with() @@ -264,6 +265,7 @@ def test_cmor_checker_called(self): get_mock.assert_called_once_with( table='CMIP6', automatic_fixes=True, + check_level=CheckLevels.DEFAULT, fail_on_error=False, frequency='frequency', mip='mip',