From 8d30979d129d6631aef40fc35dd25ce60e23c37b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 12:46:30 +0100 Subject: [PATCH 01/40] started implementing dry data checks --- esmvalcore/_main.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 96613fb1a1..773aeb352c 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -97,6 +97,14 @@ def get_args(): '--diagnostics', nargs='*', help="Only run the named diagnostics from the recipe.") + parser.add_argument( + '--check-data-availability', + action='store_true', + help="Check data availability and return a report.") + parser.add_argument( + '--check-data-compliance', + action='store_true', + help="Check data CMOR compliance and return a report.") args = parser.parse_args() return args @@ -152,12 +160,18 @@ def main(args): cfg[limit] = value resource_log = os.path.join(cfg['run_dir'], 'resource_usage.txt') + data_dry_check = args.check_data_availability + compliance_dry_check = args.check_data_compliance with resource_usage_logger(pid=os.getpid(), filename=resource_log): - process_recipe(recipe_file=recipe, config_user=cfg) + process_recipe(recipe_file=recipe, + config_user=cfg, + data_dry_check=data_dry_check, + compliance_dry_check=compliance_dry_check) return cfg -def process_recipe(recipe_file, config_user): +def process_recipe(recipe_file, config_user, + data_dry_check, compliance_dry_check): """Process recipe.""" if not os.path.isfile(recipe_file): raise OSError(errno.ENOENT, "Specified recipe file does not exist", @@ -199,7 +213,8 @@ def process_recipe(recipe_file, config_user): shutil.copy2(recipe_file, config_user['run_dir']) # parse recipe - recipe = read_recipe_file(recipe_file, config_user) + recipe = read_recipe_file(recipe_file, config_user, + data_dry_check, compliance_dry_check) logger.debug("Recipe summary:\n%s", recipe) # run From 7c51fd79ae5db04e08e4d77564b5fbfefcc9c161 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 12:46:37 +0100 Subject: [PATCH 02/40] started implementing dry data checks --- esmvalcore/_recipe.py | 51 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index cdbd5c6bd1..bb20aba3b1 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -63,12 +63,18 @@ def load_raw_recipe(filename): return raw_recipe -def read_recipe_file(filename, config_user, initialize_tasks=True): +def read_recipe_file(filename, config_user, + data_dry_check, compliance_dry_check, + initialize_tasks=True): """Read a recipe from file.""" raw_recipe = load_raw_recipe(filename) + dry_check = False + if data_dry_check or compliance_dry_check: + dry_check = True return Recipe(raw_recipe, config_user, initialize_tasks, + dry_check=dry_check, recipe_file=filename) @@ -866,6 +872,7 @@ def __init__(self, raw_recipe, config_user, initialize_tasks=True, + dry_check=False, recipe_file=None): """Parse a recipe file into an object.""" self._cfg = deepcopy(config_user) @@ -880,6 +887,7 @@ def __init__(self, self.entity = self._initalize_provenance( raw_recipe.get('documentation', {})) self.tasks = self.initialize_tasks() if initialize_tasks else None + self.dry_tasks = self.initialize_dry_tasks() if dry_check else None @staticmethod def _need_ncl(raw_diagnostics): @@ -1276,11 +1284,48 @@ def initialize_tasks(self): # Return smallest possible set of tasks return get_independent_tasks(tasks) + def initialize_dry_tasks(self): + """Define dry checker tasks in recipe.""" + logger.info("Creating dry check tasks from recipe") + tasks = set() + + priority = 0 + for diagnostic_name, diagnostic in self.diagnostics.items(): + logger.info("Creating dry check tasks for diagnostic %s", + diagnostic_name) + + # Create the checking tasks + for variable_group in diagnostic['preprocessor_output']: + task_name = diagnostic_name + TASKSEP + variable_group + logger.info("Creating preprocessor task %s", task_name) + task = _get_dry_check_task( + variables=diagnostic['preprocessor_output'] + [variable_group], + profiles=self._preprocessors, + config_user=self._cfg, + task_name=task_name, + ) + for task0 in task.flatten(): + task0.priority = priority + tasks.add(task) + priority += 1 + + tasks = get_flattened_tasks(tasks) + logger.info("These tasks will be executed: %s", + ', '.join(t.name for t in tasks)) + + # Return smallest possible set of tasks + return get_independent_tasks(tasks) + def __str__(self): """Get human readable summary.""" return '\n\n'.join(str(task) for task in self.tasks) def run(self): """Run all tasks in the recipe.""" - run_tasks(self.tasks, - max_parallel_tasks=self._cfg['max_parallel_tasks']) + if dry_check: + run_tasks(self.dry_tasks, + max_parallel_tasks=self._cfg['max_parallel_tasks']) + else: + run_tasks(self.tasks, + max_parallel_tasks=self._cfg['max_parallel_tasks']) From 8a16687024086b5c2c53908e6b6990d8118c0427 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 12:57:33 +0100 Subject: [PATCH 03/40] inching closer --- esmvalcore/_recipe.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index bb20aba3b1..075f3add02 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -862,6 +862,14 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): return task +def _get_dry_check_task(variables, profiles, config_user, task_name) + """Create a dry check task.""" + profile = deepcopy(profiles[variables[0]['preprocessor']]) + variables = _limit_datasets(variables, profile, + config_user.get('max_datasets')) + for variable in variables: + _add_cmor_info(variable) + class Recipe: """Recipe object.""" From cdf049172b20ebf77cc0466daee76186df0e1e51 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 14:53:34 +0100 Subject: [PATCH 04/40] switched to an overall dry-run --- esmvalcore/_main.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 773aeb352c..1fb5174b8e 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -98,13 +98,9 @@ def get_args(): nargs='*', help="Only run the named diagnostics from the recipe.") parser.add_argument( - '--check-data-availability', + '--dry-run', action='store_true', - help="Check data availability and return a report.") - parser.add_argument( - '--check-data-compliance', - action='store_true', - help="Check data CMOR compliance and return a report.") + help="Check data and CMOR compliance and return a report.") args = parser.parse_args() return args @@ -160,18 +156,15 @@ def main(args): cfg[limit] = value resource_log = os.path.join(cfg['run_dir'], 'resource_usage.txt') - data_dry_check = args.check_data_availability - compliance_dry_check = args.check_data_compliance + dry_run = args.dry_run with resource_usage_logger(pid=os.getpid(), filename=resource_log): process_recipe(recipe_file=recipe, config_user=cfg, - data_dry_check=data_dry_check, - compliance_dry_check=compliance_dry_check) + dry_run=dry_run) return cfg -def process_recipe(recipe_file, config_user, - data_dry_check, compliance_dry_check): +def process_recipe(recipe_file, config_user, dry_run): """Process recipe.""" if not os.path.isfile(recipe_file): raise OSError(errno.ENOENT, "Specified recipe file does not exist", @@ -213,8 +206,7 @@ def process_recipe(recipe_file, config_user, shutil.copy2(recipe_file, config_user['run_dir']) # parse recipe - recipe = read_recipe_file(recipe_file, config_user, - data_dry_check, compliance_dry_check) + recipe = read_recipe_file(recipe_file, config_user, dry_run) logger.debug("Recipe summary:\n%s", recipe) # run From f64237bee09a23fd9a19abcf051d9d1499fdc23b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 14:54:13 +0100 Subject: [PATCH 05/40] first working version of dry-run --- esmvalcore/_recipe.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 075f3add02..df2af984a4 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -64,12 +64,11 @@ def load_raw_recipe(filename): def read_recipe_file(filename, config_user, - data_dry_check, compliance_dry_check, - initialize_tasks=True): + dry_run, initialize_tasks=True): """Read a recipe from file.""" raw_recipe = load_raw_recipe(filename) dry_check = False - if data_dry_check or compliance_dry_check: + if dry_run: dry_check = True return Recipe(raw_recipe, config_user, @@ -862,14 +861,24 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): return task -def _get_dry_check_task(variables, profiles, config_user, task_name) +def _get_dry_check_task(variables, profiles, config_user, task_name): """Create a dry check task.""" - profile = deepcopy(profiles[variables[0]['preprocessor']]) - variables = _limit_datasets(variables, profile, - config_user.get('max_datasets')) + basic_profile = deepcopy(profiles['default']) for variable in variables: _add_cmor_info(variable) + # Create simple dry-run preprocessor task + task = _get_single_preprocessor_task( + variables, + basic_profile, + config_user, + ancestor_tasks=[], + name=task_name, + ) + + return task + + class Recipe: """Recipe object.""" @@ -895,6 +904,7 @@ def __init__(self, self.entity = self._initalize_provenance( raw_recipe.get('documentation', {})) self.tasks = self.initialize_tasks() if initialize_tasks else None + self.dry_check = dry_check self.dry_tasks = self.initialize_dry_tasks() if dry_check else None @staticmethod @@ -1322,6 +1332,10 @@ def initialize_dry_tasks(self): logger.info("These tasks will be executed: %s", ', '.join(t.name for t in tasks)) + # Initialize task provenance + for task in tasks: + task.initialize_provenance(self.entity) + # Return smallest possible set of tasks return get_independent_tasks(tasks) @@ -1331,7 +1345,7 @@ def __str__(self): def run(self): """Run all tasks in the recipe.""" - if dry_check: + if self.dry_check: run_tasks(self.dry_tasks, max_parallel_tasks=self._cfg['max_parallel_tasks']) else: From febfb6a8a747434bb1128e39d31771dc1037ad7a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 15:19:31 +0100 Subject: [PATCH 06/40] adding dry-run to config dict --- esmvalcore/_main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 1fb5174b8e..90b16792fc 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -142,6 +142,7 @@ def main(args): logger.info("Writing program log files to:\n%s", "\n".join(log_files)) cfg['skip-nonexistent'] = args.skip_nonexistent + cfg['dry-run'] = args.dry_run cfg['diagnostics'] = { pattern if TASKSEP in pattern else pattern + TASKSEP + '*' for pattern in args.diagnostics or () From 62a3dceedce40261e98e132b34c85de74adf90e4 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 15:19:48 +0100 Subject: [PATCH 07/40] adding dry-run handling --- esmvalcore/_recipe.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index df2af984a4..eb8a3b207f 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -500,7 +500,8 @@ def _get_input_files(variable, config_user): '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable) + if not config_user.get('dry-run'): + check.data_availability(input_files, variable) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -684,8 +685,10 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ancestors = grouped_ancestors.get(variable['filename']) if not ancestors: ancestors = _get_input_files(variable, config_user) - if config_user.get('skip-nonexistent') and not ancestors: - logger.info("Skipping: no data found for %s", variable) + if config_user.get('skip-nonexistent') \ + or config_user.get('dry-run') and not ancestors: + logger.info("MISSING DATA: Skipping: no data found for %s", + variable) continue product = PreprocessorFile( attributes=variable, From f6c7da53711b622eb50ee14d68d19347cfeb81f4 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 15:36:17 +0100 Subject: [PATCH 08/40] tweaked func argument --- esmvalcore/_recipe.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index eb8a3b207f..6553b2397b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -64,16 +64,13 @@ def load_raw_recipe(filename): def read_recipe_file(filename, config_user, - dry_run, initialize_tasks=True): + dry_run=False, initialize_tasks=True): """Read a recipe from file.""" raw_recipe = load_raw_recipe(filename) - dry_check = False - if dry_run: - dry_check = True return Recipe(raw_recipe, config_user, initialize_tasks, - dry_check=dry_check, + dry_check=dry_run, recipe_file=filename) @@ -686,7 +683,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, if not ancestors: ancestors = _get_input_files(variable, config_user) if config_user.get('skip-nonexistent') \ - or config_user.get('dry-run') and not ancestors: + or config_user.get('dry-run') and not ancestors: logger.info("MISSING DATA: Skipping: no data found for %s", variable) continue From 1ff047bd8ff5fc3551c5aab7e5af3144cc7af698 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 11 Oct 2019 16:12:36 +0100 Subject: [PATCH 09/40] major simplification --- esmvalcore/_recipe.py | 100 ++++++++++-------------------------------- 1 file changed, 23 insertions(+), 77 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 6553b2397b..3a09b1eddf 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -813,7 +813,8 @@ def append(group_prefix, var): return derive_input -def _get_preprocessor_task(variables, profiles, config_user, task_name): +def _get_preprocessor_task(variables, profiles, + config_user, task_name, dry_check): """Create preprocessor task(s) for a set of datasets.""" # First set up the preprocessor profile variable = variables[0] @@ -822,7 +823,10 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): raise RecipeError( "Unknown preprocessor {} in variable {} of diagnostic {}".format( preproc_name, variable['short_name'], variable['diagnostic'])) - profile = deepcopy(profiles[variable['preprocessor']]) + if not dry_check: + profile = deepcopy(profiles[variable['preprocessor']]) + else: + profile = deepcopy(profiles['default']) logger.info("Creating preprocessor '%s' task for variable '%s'", variable['preprocessor'], variable['short_name']) variables = _limit_datasets(variables, profile, @@ -861,24 +865,6 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): return task -def _get_dry_check_task(variables, profiles, config_user, task_name): - """Create a dry check task.""" - basic_profile = deepcopy(profiles['default']) - for variable in variables: - _add_cmor_info(variable) - - # Create simple dry-run preprocessor task - task = _get_single_preprocessor_task( - variables, - basic_profile, - config_user, - ancestor_tasks=[], - name=task_name, - ) - - return task - - class Recipe: """Recipe object.""" @@ -903,9 +889,8 @@ def __init__(self, raw_recipe['diagnostics'], raw_recipe.get('datasets', [])) self.entity = self._initalize_provenance( raw_recipe.get('documentation', {})) - self.tasks = self.initialize_tasks() if initialize_tasks else None self.dry_check = dry_check - self.dry_tasks = self.initialize_dry_tasks() if dry_check else None + self.tasks = self.initialize_tasks() if initialize_tasks else None @staticmethod def _need_ncl(raw_diagnostics): @@ -1253,6 +1238,7 @@ def initialize_tasks(self): profiles=self._preprocessors, config_user=self._cfg, task_name=task_name, + dry_check=self.dry_check, ) for task0 in task.flatten(): task0.priority = priority @@ -1260,18 +1246,19 @@ def initialize_tasks(self): priority += 1 # Create diagnostic tasks - for script_name, script_cfg in diagnostic['scripts'].items(): - task_name = diagnostic_name + TASKSEP + script_name - logger.info("Creating diagnostic task %s", task_name) - task = DiagnosticTask( - script=script_cfg['script'], - output_dir=script_cfg['output_dir'], - settings=script_cfg['settings'], - name=task_name, - ) - task.priority = priority - tasks.add(task) - priority += 1 + if not self.dry_check: + for script_name, script_cfg in diagnostic['scripts'].items(): + task_name = diagnostic_name + TASKSEP + script_name + logger.info("Creating diagnostic task %s", task_name) + task = DiagnosticTask( + script=script_cfg['script'], + output_dir=script_cfg['output_dir'], + settings=script_cfg['settings'], + name=task_name, + ) + task.priority = priority + tasks.add(task) + priority += 1 check.tasks_valid(tasks) @@ -1302,52 +1289,11 @@ def initialize_tasks(self): # Return smallest possible set of tasks return get_independent_tasks(tasks) - def initialize_dry_tasks(self): - """Define dry checker tasks in recipe.""" - logger.info("Creating dry check tasks from recipe") - tasks = set() - - priority = 0 - for diagnostic_name, diagnostic in self.diagnostics.items(): - logger.info("Creating dry check tasks for diagnostic %s", - diagnostic_name) - - # Create the checking tasks - for variable_group in diagnostic['preprocessor_output']: - task_name = diagnostic_name + TASKSEP + variable_group - logger.info("Creating preprocessor task %s", task_name) - task = _get_dry_check_task( - variables=diagnostic['preprocessor_output'] - [variable_group], - profiles=self._preprocessors, - config_user=self._cfg, - task_name=task_name, - ) - for task0 in task.flatten(): - task0.priority = priority - tasks.add(task) - priority += 1 - - tasks = get_flattened_tasks(tasks) - logger.info("These tasks will be executed: %s", - ', '.join(t.name for t in tasks)) - - # Initialize task provenance - for task in tasks: - task.initialize_provenance(self.entity) - - # Return smallest possible set of tasks - return get_independent_tasks(tasks) - def __str__(self): """Get human readable summary.""" return '\n\n'.join(str(task) for task in self.tasks) def run(self): """Run all tasks in the recipe.""" - if self.dry_check: - run_tasks(self.dry_tasks, - max_parallel_tasks=self._cfg['max_parallel_tasks']) - else: - run_tasks(self.tasks, - max_parallel_tasks=self._cfg['max_parallel_tasks']) + run_tasks(self.tasks, + max_parallel_tasks=self._cfg['max_parallel_tasks']) From da8b8ee120b9317fefc76282a798140ef848a762 Mon Sep 17 00:00:00 2001 From: jvegasbsc Date: Mon, 14 Oct 2019 17:10:03 +0200 Subject: [PATCH 10/40] Add option to not raise in cmor checks --- esmvalcore/cmor/check.py | 20 +++++++++++++++----- tests/unit/cmor/test_cmor_check.py | 12 ++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 735ec1f09b..12aecc13dd 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -57,10 +57,12 @@ def __init__(self, var_info, frequency=None, fail_on_error=False, + raise_exception=True, automatic_fixes=False): self._cube = cube self._failerr = fail_on_error + self._raise_exception = raise_exception self._errors = list() self._warnings = list() self._debug_messages = list() @@ -104,13 +106,16 @@ def check_metadata(self, logger=None): self.report_debug_messages(logger) self.report_warnings(logger) - self.report_errors() + self.report_errors(logger) if self.frequency != 'fx': self._add_auxiliar_time_coordinates() - return self._cube + if self.has_errors(): + return None + else: + return self._cube - def report_errors(self): + def report_errors(self, logger): """Report detected errors. Raises @@ -123,7 +128,10 @@ def report_errors(self): msg = 'There were errors in variable {}:\n{}\nin cube:\n{}' msg = msg.format(self._cube.var_name, '\n '.join(self._errors), self._cube) - raise CMORCheckError(msg) + if self._raise_exception: + raise CMORCheckError(msg) + logger.error(msg) + def report_warnings(self, logger): """Report detected warnings to the given logger. @@ -179,7 +187,7 @@ def check_data(self, logger=None): self._check_coords_data() self.report_warnings(logger) - self.report_errors() + self.report_errors(logger) return self._cube def _check_fill_value(self): @@ -668,6 +676,7 @@ def _get_cmor_checker(table, short_name, frequency, fail_on_error=True, + raise_exception=True, automatic_fixes=False): """Get a CMOR checker/fixer.""" if table not in CMOR_TABLES: @@ -687,6 +696,7 @@ def _checker(cube): var_info, frequency=frequency, fail_on_error=fail_on_error, + raise_exception=raise_exception, automatic_fixes=automatic_fixes) return _checker diff --git a/tests/unit/cmor/test_cmor_check.py b/tests/unit/cmor/test_cmor_check.py index 9caa5beea8..6c9e06301f 100644 --- a/tests/unit/cmor/test_cmor_check.py +++ b/tests/unit/cmor/test_cmor_check.py @@ -178,6 +178,18 @@ def test_check_bad_standard_name(self): self.cube.standard_name = 'wind_speed' self._check_fails_in_metadata(automatic_fixes=False) + def test_check_bad_standard_name_no_raise(self): + """Test check fails for a bad short_name.""" + self.cube = self.get_cube(self.var_info) + self.cube.standard_name = 'wind_speed' + checker = CMORCheck( + self.cube, + self.var_info, + automatic_fixes=False, + raise_exception=False, + ) + self.assertIsNone(checker.check_metadata()) + def test_check_bad_long_name_auto_fix(self): """Test check pass for a bad standard_name with automatic fixes.""" self.cube = self.get_cube(self.var_info) From 005c9ef65c6f21c7f31921f87c1cdc9b4f5f0a48 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 12:46:15 +0100 Subject: [PATCH 11/40] linter test fix --- esmvalcore/cmor/check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 12aecc13dd..4336fe0131 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -132,7 +132,6 @@ def report_errors(self, logger): raise CMORCheckError(msg) logger.error(msg) - def report_warnings(self, logger): """Report detected warnings to the given logger. From 6d67cbf4929abc4bb064ee3fac4154b28417def9 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 14:41:15 +0100 Subject: [PATCH 12/40] removed saving --- esmvalcore/_recipe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3a09b1eddf..a5c52f4483 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -827,6 +827,7 @@ def _get_preprocessor_task(variables, profiles, profile = deepcopy(profiles[variable['preprocessor']]) else: profile = deepcopy(profiles['default']) + profile['save'] = False logger.info("Creating preprocessor '%s' task for variable '%s'", variable['preprocessor'], variable['short_name']) variables = _limit_datasets(variables, profile, From 3cfd1b84f8ddd54f61d4d56cd195e76f2fb3e017 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 15:48:12 +0100 Subject: [PATCH 13/40] no saving if dry run --- esmvalcore/_recipe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index a5c52f4483..c06652b33b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -358,7 +358,10 @@ def _get_default_settings(variable, config_user, derive=False): } # Configure saving cubes to file - settings['save'] = {'compress': config_user['compress_netcdf']} + if not config_user['dry-run']: + settings['save'] = {'compress': config_user['compress_netcdf']} + else: + settings['save'] = {'filename': 'dry-run'} return settings @@ -827,7 +830,6 @@ def _get_preprocessor_task(variables, profiles, profile = deepcopy(profiles[variable['preprocessor']]) else: profile = deepcopy(profiles['default']) - profile['save'] = False logger.info("Creating preprocessor '%s' task for variable '%s'", variable['preprocessor'], variable['short_name']) variables = _limit_datasets(variables, profile, From 2c87a4c33bff6d47468b952fa2de9fb14960f0b9 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 15:48:22 +0100 Subject: [PATCH 14/40] no saving if dry run --- esmvalcore/preprocessor/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 22dc6b1c58..2f93adbc4d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -232,7 +232,7 @@ def write_metadata(products, write_ncl=False): output_filename = os.path.join(output_dir, 'metadata.yml') output_files.append(output_filename) - with open(output_filename, 'w') as file: + with open(output_filename, 'w+') as file: _ordered_safe_dump(metadata, file) if write_ncl: output_files.append(_write_ncl_metadata(output_dir, metadata)) From 465d0702c77e22a2baf91f32a7113f255e051bc9 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 15:48:32 +0100 Subject: [PATCH 15/40] no saving if dry run --- esmvalcore/preprocessor/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 6793d2c3e8..1bc00cfdc7 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -251,7 +251,8 @@ def __init__(self, attributes, settings, ancestors=None): self.settings = copy.deepcopy(settings) if 'save' not in self.settings: self.settings['save'] = {} - self.settings['save']['filename'] = self.filename + if self.settings['save']['filename'] != 'dry-run': + self.settings['save']['filename'] = self.filename self.files = [a.filename for a in ancestors or ()] @@ -298,7 +299,8 @@ def cubes(self, value): def save(self): """Save cubes to disk.""" - if self._cubes is not None: + if self._cubes is not None and \ + self.settings['save']['filename'] != 'dry-run': self.files = preprocess(self._cubes, 'save', **self.settings['save']) self.files = preprocess(self.files, 'cleanup', From df2d765f9a2189585c89d26277f29a010728fb91 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 15:55:52 +0100 Subject: [PATCH 16/40] changed a bit --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c06652b33b..3e2a332eb1 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -358,7 +358,7 @@ def _get_default_settings(variable, config_user, derive=False): } # Configure saving cubes to file - if not config_user['dry-run']: + if 'dry-run' not in config_user: settings['save'] = {'compress': config_user['compress_netcdf']} else: settings['save'] = {'filename': 'dry-run'} From 660e33c57233a77dc1a6416ba380f69cd800bde0 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 15 Oct 2019 15:56:15 +0100 Subject: [PATCH 17/40] changed a bit to accomodate a slightly diffrnt handling --- esmvalcore/preprocessor/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 1bc00cfdc7..c7f7f113eb 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -251,7 +251,10 @@ def __init__(self, attributes, settings, ancestors=None): self.settings = copy.deepcopy(settings) if 'save' not in self.settings: self.settings['save'] = {} - if self.settings['save']['filename'] != 'dry-run': + if 'filename' in self.settings['save']: + if self.settings['save']['filename'] != 'dry-run': + self.settings['save']['filename'] = self.filename + else: self.settings['save']['filename'] = self.filename self.files = [a.filename for a in ancestors or ()] From 120eb4338a5bd36f4f3ac0c9f8b93851d344c873 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 17 Oct 2019 12:16:43 +0100 Subject: [PATCH 18/40] working version not to save --- esmvalcore/_recipe.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3e2a332eb1..61a7d8379a 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -358,10 +358,13 @@ def _get_default_settings(variable, config_user, derive=False): } # Configure saving cubes to file - if 'dry-run' not in config_user: - settings['save'] = {'compress': config_user['compress_netcdf']} + if 'dry-run' in config_user: + if not config_user['dry-run']: + settings['save'] = {'compress': config_user['compress_netcdf']} + else: + settings['save'] = {'dryrun': True} else: - settings['save'] = {'filename': 'dry-run'} + settings['save'] = {'compress': config_user['compress_netcdf']} return settings From f9cead9a18293cb92122254c317718b82f136a75 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 17 Oct 2019 12:16:56 +0100 Subject: [PATCH 19/40] working version not to save --- esmvalcore/preprocessor/__init__.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index c7f7f113eb..7386a512e5 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -251,11 +251,7 @@ def __init__(self, attributes, settings, ancestors=None): self.settings = copy.deepcopy(settings) if 'save' not in self.settings: self.settings['save'] = {} - if 'filename' in self.settings['save']: - if self.settings['save']['filename'] != 'dry-run': - self.settings['save']['filename'] = self.filename - else: - self.settings['save']['filename'] = self.filename + self.settings['save']['filename'] = self.filename self.files = [a.filename for a in ancestors or ()] @@ -302,8 +298,7 @@ def cubes(self, value): def save(self): """Save cubes to disk.""" - if self._cubes is not None and \ - self.settings['save']['filename'] != 'dry-run': + if self._cubes is not None and 'dryrun' not in self.settings['save']: self.files = preprocess(self._cubes, 'save', **self.settings['save']) self.files = preprocess(self.files, 'cleanup', @@ -411,9 +406,11 @@ def _run(self, _): for product in self.products: product.close() - metadata_files = write_metadata(self.products, - self.write_ncl_interface) - return metadata_files + input_products = [p for p in self.products if step in p.settings] + if 'dryrun' not in input_products[0].settings['save']: + metadata_files = write_metadata(self.products, + self.write_ncl_interface) + return metadata_files def __str__(self): """Get human readable description.""" From 207ec8b760a58e495078d0816268f755b29378ed Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 17 Oct 2019 12:17:32 +0100 Subject: [PATCH 20/40] working version not to save --- esmvalcore/preprocessor/_io.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 2f93adbc4d..aaadaec24d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -103,7 +103,8 @@ def concatenate(cubes): raise ValueError('Can not concatenate cubes.') -def save(cubes, filename, optimize_access='', compress=False, **kwargs): +def save(cubes, filename, optimize_access='', + compress=False, dryrun=False, **kwargs): """ Save iris cubes to file. @@ -232,7 +233,7 @@ def write_metadata(products, write_ncl=False): output_filename = os.path.join(output_dir, 'metadata.yml') output_files.append(output_filename) - with open(output_filename, 'w+') as file: + with open(output_filename, 'w') as file: _ordered_safe_dump(metadata, file) if write_ncl: output_files.append(_write_ncl_metadata(output_dir, metadata)) From 68c860ccbe25a91858c824a93d2a01f1dd48ffbb Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 17 Oct 2019 12:42:01 +0100 Subject: [PATCH 21/40] last change so the ancestry is not asked for if dry run --- esmvalcore/_recipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 61a7d8379a..d2fdaaf956 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1269,7 +1269,8 @@ def initialize_tasks(self): check.tasks_valid(tasks) # Resolve diagnostic ancestors - self._resolve_diagnostic_ancestors(tasks) + if not self.dry_check: + self._resolve_diagnostic_ancestors(tasks) # Select only requested tasks tasks = get_flattened_tasks(tasks) From b495d1aa614faeea6c0c6930e84b67c963ded81a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 17 Oct 2019 13:49:17 +0100 Subject: [PATCH 22/40] added section to documentation for dry running --- doc/esmvalcore/fixing_data.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/esmvalcore/fixing_data.rst b/doc/esmvalcore/fixing_data.rst index 3448c16ff1..3cb7cfa467 100644 --- a/doc/esmvalcore/fixing_data.rst +++ b/doc/esmvalcore/fixing_data.rst @@ -16,6 +16,14 @@ coordinate bounds like ''lat_bnds'') or problems with the actual data The ESMValTool can apply on the fly fixes to data sets that have known errors that can be fixed automatically. +Discovering potential issues with the data +========================================== + +You can run `esmvaltool` in a dry run mode by applying the `--dry-run` command line option: that will +run the data finding module and the CMOR checks and fixes modules and anything that is not fixed on the fly +will result in an exception that is logged to `main_log.txt`; you can check those potential issues +and follow up with implementing the needed fixes as described below. + Fix structure ============= From 867b45410becafc9f544a228d92d42ee9f416eff Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 24 Oct 2019 14:52:04 +0100 Subject: [PATCH 23/40] fixed very odd code change that maight have come through from merging development --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 49b9c5f9a2..f5b6a6728d 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -679,7 +679,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, _update_regrid_time(variable, settings) ancestors = grouped_ancestors.get(variable['filename']) if not ancestors: - ancestors = _get_input_files(variable, config_user) + ancestors = _get_ancestors(variable, config_user) if config_user.get('skip-nonexistent') \ or config_user.get('dry-run') and not ancestors: logger.info("MISSING DATA: Skipping: no data found for %s", From 48d79ea1b3396af6a88064d2c9fd5428849517fd Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 24 Oct 2019 15:53:25 +0100 Subject: [PATCH 24/40] modified checker in light of dryrun --- esmvalcore/_recipe_checks.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index b45d912d86..f92a577dad 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -90,10 +90,16 @@ def variable(var, required_keys): missing, var.get('short_name'), var.get('diagnostic'))) -def data_availability(input_files, var): +def data_availability(input_files, var, dryrun=False): """Check if the required input data is available.""" if not input_files: - raise RecipeError("No input files found for variable {}".format(var)) + if not dryrun: + raise RecipeError("No input files found for \ + variable {}".format(var)) + else: + logger.info("MISSING DATA: Skipping: \ + no data found for {}".format(var)) + return required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() @@ -105,10 +111,16 @@ def data_availability(input_files, var): missing_years = required_years - available_years if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), - input_files)) + if not dryrun: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) + else: + logger.info( + "MISSING DATA for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): From 1ff698616e84bfba91a8247d0e21ee54ef9ecc22 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 24 Oct 2019 15:53:46 +0100 Subject: [PATCH 25/40] modified data checks in light of dryrun --- esmvalcore/_recipe.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f5b6a6728d..2d200cf3e7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -245,7 +245,8 @@ def _dataset_to_file(variable, config_user): if files: variable = required_var break - check.data_availability(files, variable) + dryrun = config_user.get('dry-run') + check.data_availability(files, variable, dryrun=dryrun) return files[0] @@ -495,8 +496,8 @@ def _get_ancestors(variable, config_user): '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') or variable['dataset'] == variable.get('reference_dataset')): - if not config_user.get('dry-run'): - check.data_availability(input_files, variable) + dryrun = config_user.get('dry-run') + check.data_availability(input_files, variable, dryrun=dryrun) # Set up provenance tracking for i, filename in enumerate(input_files): From 38ee9af27205d07f9b40a2323c8df7f3ead0e06c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 24 Oct 2019 16:23:44 +0100 Subject: [PATCH 26/40] total skip if no data available in dryrun --- esmvalcore/_recipe.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 2d200cf3e7..c8c478e4ea 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -725,21 +725,25 @@ def _get_single_preprocessor_task(variables, config_user=config_user, ) - if not products: + if not products and not config_user.get('dry-run'): raise RecipeError( "Did not find any input data for task {}".format(name)) - task = PreprocessingTask( - products=products, - ancestors=ancestor_tasks, - name=name, - order=order, - debug=config_user['save_intermediary_cubes'], - write_ncl_interface=config_user['write_ncl_interface'], - ) + if not products and config_user.get('dry-run'): + task = [] + else: + task = PreprocessingTask( + products=products, + ancestors=ancestor_tasks, + name=name, + order=order, + debug=config_user['save_intermediary_cubes'], + write_ncl_interface=config_user['write_ncl_interface'], + ) - logger.info("PreprocessingTask %s created. It will create the files:\n%s", - task.name, '\n'.join(p.filename for p in task.products)) + logger.info("PreprocessingTask %s created. \ + It will create the files:\n%s", + task.name, '\n'.join(p.filename for p in task.products)) return task @@ -1249,10 +1253,11 @@ def initialize_tasks(self): task_name=task_name, dry_check=self.dry_check, ) - for task0 in task.flatten(): - task0.priority = priority - tasks.add(task) - priority += 1 + if task: + for task0 in task.flatten(): + task0.priority = priority + tasks.add(task) + priority += 1 # Create diagnostic tasks if not self.dry_check: From 5e7ac8ad298324d1766facc54f4e1af7f50d629d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 24 Oct 2019 17:47:15 +0100 Subject: [PATCH 27/40] fix for None file output for dryrun --- esmvalcore/_task.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 1351921783..9f53a7942d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -223,7 +223,10 @@ def run(self, input_files=None): if input_files is None: input_files = [] for task in self.ancestors: - input_files.extend(task.run()) + out_file = task.run() + # dryrun outputs no physical run output file + if out_file: + input_files.extend(out_file) logger.info("Starting task %s in process [%s]", self.name, os.getpid()) start = datetime.datetime.now() From 4d0232bc4442cc5a29ae0d5d56f305abc46faea8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 24 Oct 2019 18:11:00 +0100 Subject: [PATCH 28/40] skip running tasks that dont have physical files --- esmvalcore/_task.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 9f53a7942d..1186708cbc 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -230,7 +230,10 @@ def run(self, input_files=None): logger.info("Starting task %s in process [%s]", self.name, os.getpid()) start = datetime.datetime.now() - self.output_files = self._run(input_files) + # skip tasks that have ancestors but no files (eg dryrun) + if (self.ancestors and input_files) or \ + (not self.ancestors and not input_files): + self.output_files = self._run(input_files) runtime = datetime.datetime.now() - start logger.info("Successfully completed task %s (priority %s) in %s", self.name, self.priority, runtime) From 0e6e1ccfe19731aa7b85bdaff3765457649068bf Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 25 Oct 2019 12:45:58 +0100 Subject: [PATCH 29/40] reverted to old message --- esmvalcore/_recipe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c8c478e4ea..b3c90647c0 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -683,8 +683,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ancestors = _get_ancestors(variable, config_user) if config_user.get('skip-nonexistent') \ or config_user.get('dry-run') and not ancestors: - logger.info("MISSING DATA: Skipping: no data found for %s", - variable) + logger.info("Skipping: no data found for %s", variable) continue product = PreprocessorFile( attributes=variable, From 51c2abe245055b77a79536dcee71dd9d4d328fca Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 25 Oct 2019 12:48:08 +0100 Subject: [PATCH 30/40] edited error messages --- esmvalcore/_recipe_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index f92a577dad..2a90ad922a 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -97,7 +97,7 @@ def data_availability(input_files, var, dryrun=False): raise RecipeError("No input files found for \ variable {}".format(var)) else: - logger.info("MISSING DATA: Skipping: \ + logger.info("DRYRUN: MISSING DATA: \ no data found for {}".format(var)) return @@ -118,7 +118,7 @@ def data_availability(input_files, var, dryrun=False): input_files)) else: logger.info( - "MISSING DATA for years {} in files {}".format( + "DRYRUN: MISSING DATA for years {} in files {}".format( ", ".join(str(year) for year in missing_years), input_files)) From 72fd668b56909ac7a3def6f77daffb49ee2c8189 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 25 Oct 2019 13:16:33 +0100 Subject: [PATCH 31/40] added single-thread condition for dryrun --- esmvalcore/_main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 90b16792fc..2443af3fc5 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -100,7 +100,10 @@ def get_args(): parser.add_argument( '--dry-run', action='store_true', - help="Check data and CMOR compliance and return a report.") + help="Check data and CMOR compliance and return a report. \ + This feature works fully only with max_parallel_tasks: 1 \ + in config-user.yml; however, it is very fast so even on \ + a single thread it should take much less time.") args = parser.parse_args() return args From 3333f5a15053d2fb78ab5be08b6b4909586d0343 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 25 Oct 2019 13:20:58 +0100 Subject: [PATCH 32/40] added note on single threading --- doc/esmvalcore/fixing_data.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/esmvalcore/fixing_data.rst b/doc/esmvalcore/fixing_data.rst index 3cb7cfa467..fd11ed56fc 100644 --- a/doc/esmvalcore/fixing_data.rst +++ b/doc/esmvalcore/fixing_data.rst @@ -22,7 +22,9 @@ Discovering potential issues with the data You can run `esmvaltool` in a dry run mode by applying the `--dry-run` command line option: that will run the data finding module and the CMOR checks and fixes modules and anything that is not fixed on the fly will result in an exception that is logged to `main_log.txt`; you can check those potential issues -and follow up with implementing the needed fixes as described below. +and follow up with implementing the needed fixes as described below. This feature works fully only with +`max_parallel_tasks: 1` in `config-user.yml`; however, it is very fast so even limited on a single thread +it should take much less time than running the full recipe. Fix structure ============= From 026c0a0df4e2505e3e13cc552b60fd15aa25e103 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 13 Nov 2019 15:16:27 +0000 Subject: [PATCH 33/40] actual use of Javi's exception handler --- esmvalcore/_recipe.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b3c90647c0..0044612bc3 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -334,12 +334,16 @@ def _get_default_settings(variable, config_user, derive=False): } # Configure CMOR metadata check + raise_exception = True + if 'dry-run' in config_user: + raise_exception = False if variable.get('cmor_table'): settings['cmor_check_metadata'] = { 'cmor_table': variable['cmor_table'], 'mip': variable['mip'], 'short_name': variable['short_name'], 'frequency': variable['frequency'], + 'raise_exception': raise_exception, } # Configure final CMOR data check if variable.get('cmor_table'): From d0cd401e8268727897878ed3c6f191cf5d56efb6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 13 Nov 2019 15:16:42 +0000 Subject: [PATCH 34/40] actual use of Javi's exception handler --- esmvalcore/cmor/check.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 4336fe0131..b0323e5274 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -701,7 +701,8 @@ def _checker(cube): return _checker -def cmor_check_metadata(cube, cmor_table, mip, short_name, frequency): +def cmor_check_metadata(cube, cmor_table, mip, + short_name, frequency, raise_exception): """Check if metadata conforms to variable's CMOR definiton. None of the checks at this step will force the cube to load the data. @@ -718,9 +719,14 @@ def cmor_check_metadata(cube, cmor_table, mip, short_name, frequency): Variable's short name. frequency: basestring Data frequency. + raise_exception: bool + Boolean operator that raises or not + the exception resulted from checker. """ - checker = _get_cmor_checker(cmor_table, mip, short_name, frequency) + checker = _get_cmor_checker(cmor_table, mip, + short_name, frequency, + raise_exception=raise_exception) checker(cube).check_metadata() return cube From 3203b5550c7272ff30e528eb7163d7850fb6114e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 13 Nov 2019 15:16:52 +0000 Subject: [PATCH 35/40] actual use of Javi's exception handler --- tests/integration/test_recipe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 30b55c0c5e..60d3948e42 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -361,6 +361,7 @@ def test_default_preprocessor(tmp_path, patched_datafinder, config_user): 'mip': 'Oyr', 'short_name': 'chl', 'frequency': 'yr', + 'raise_exception': True, }, 'cmor_check_data': { 'cmor_table': 'CMIP5', @@ -442,6 +443,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'mip': 'fx', 'short_name': 'sftlf', 'frequency': 'fx', + 'raise_exception': True, }, 'cmor_check_data': { 'cmor_table': 'CMIP5', From 12c0c2e8e259715c6e02d29abb167ce664d420cf Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 3 Feb 2020 12:49:54 +0000 Subject: [PATCH 36/40] fixed the fix for conflict --- esmvalcore/cmor/check.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index d1f392b4a9..19dd432f22 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -107,12 +107,7 @@ def check_metadata(self, logger=None): self.report_warnings(logger) self.report_errors(logger) - if self.frequency != 'fx': - self._add_auxiliar_time_coordinates() - if self.has_errors(): - return None - else: - return self._cube + return self._cube def report_errors(self, logger): """Report detected errors. From 9cd0e726391394633facec6e20b4c0c6114dc348 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 3 Feb 2020 12:54:22 +0000 Subject: [PATCH 37/40] fixed test --- tests/unit/cmor/test_cmor_check.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/unit/cmor/test_cmor_check.py b/tests/unit/cmor/test_cmor_check.py index 6c9e06301f..9caa5beea8 100644 --- a/tests/unit/cmor/test_cmor_check.py +++ b/tests/unit/cmor/test_cmor_check.py @@ -178,18 +178,6 @@ def test_check_bad_standard_name(self): self.cube.standard_name = 'wind_speed' self._check_fails_in_metadata(automatic_fixes=False) - def test_check_bad_standard_name_no_raise(self): - """Test check fails for a bad short_name.""" - self.cube = self.get_cube(self.var_info) - self.cube.standard_name = 'wind_speed' - checker = CMORCheck( - self.cube, - self.var_info, - automatic_fixes=False, - raise_exception=False, - ) - self.assertIsNone(checker.check_metadata()) - def test_check_bad_long_name_auto_fix(self): """Test check pass for a bad standard_name with automatic fixes.""" self.cube = self.get_cube(self.var_info) From 10e6125af396922b2a82ab99145dda2cccf6c53c Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 21 May 2021 10:49:16 +0200 Subject: [PATCH 38/40] Fix flake8 errors and formatting warnings --- esmvalcore/_main.py | 4 +++- esmvalcore/_recipe.py | 25 +++++++++++++++++++------ esmvalcore/_recipe_checks.py | 14 ++++++-------- esmvalcore/cmor/check.py | 2 +- esmvalcore/preprocessor/_io.py | 12 ++++++++---- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 115d269943..8c99ed2c24 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -413,7 +413,9 @@ def _check_limit(limit, value): resource_log = os.path.join(cfg['run_dir'], 'resource_usage.txt') from ._task import resource_usage_logger with resource_usage_logger(pid=os.getpid(), filename=resource_log): - process_recipe(recipe_file=recipe, config_user=cfg, dry_run=dry_run) + process_recipe(recipe_file=recipe, + config_user=cfg, + dry_run=dry_run) if os.path.exists(cfg["preproc_dir"]) and cfg["remove_preproc_dir"]: logger.info("Removing preproc containing preprocessed data") diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 52059db1b6..cb59002c04 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -46,7 +46,10 @@ TASKSEP = os.sep -def read_recipe_file(filename, config_user, dry_run=False, initialize_tasks=True): +def read_recipe_file(filename, + config_user, + dry_run=False, + initialize_tasks=True): """Read a recipe from file.""" check.recipe_with_schema(filename) with open(filename, 'r') as file: @@ -213,7 +216,11 @@ def _dataset_to_file(variable, config_user): break dryrun = config_user.get('dry-run') check.data_availability(files, variable, dryrun=dryrun) - check.data_availability(files, variable, dirnames, filenames, dryrun=dryrun) + check.data_availability(files, + variable, + dirnames, + filenames, + dryrun=dryrun) return files[0] @@ -551,7 +558,11 @@ def _get_ancestors(variable, config_user): if (not config_user.get('skip-nonexistent') or variable['dataset'] == variable.get('reference_dataset')): dryrun = config_user.get('dry-run') - check.data_availability(input_files, variable, dirnames, filenames, dryrun=dryrun) + check.data_availability(input_files, + variable, + dirnames, + filenames, + dryrun=dryrun) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -741,7 +752,9 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, try: ancestors = _get_ancestors(variable, config_user) except RecipeError as ex: - if _allow_skipping(ancestors, variable, config_user) or config_user.get('dry-run') and not ancestors: + if _allow_skipping( + ancestors, variable, config_user + ) or config_user.get('dry-run') and not ancestors: logger.info("Skipping: %s", ex.message) else: missing_vars.add(ex.message) @@ -910,8 +923,8 @@ def append(group_prefix, var): return derive_input -def _get_preprocessor_task(variables, profiles, - config_user, task_name, dry_check): +def _get_preprocessor_task(variables, profiles, config_user, task_name, + dry_check): """Create preprocessor task(s) for a set of datasets.""" # First set up the preprocessor profile variable = variables[0] diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 5c52e8464f..3b7e942a6e 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -117,12 +117,12 @@ def data_availability(input_files, var, dirnames, filenames, dryrun=False): logger.error("Looked for files matching%s", msg) elif dirnames and not filenames: logger.error( - "Looked for files in %s, but did not find any file pattern " - "to match against", dirnames) + "Looked for files in %s, but did not find any file " + "pattern to match against", dirnames) elif filenames and not dirnames: logger.error( - "Looked for files matching %s, but did not find any existing " - "input directory", filenames) + "Looked for files matching %s, but did not find any " + "existing input directory", filenames) logger.error("Set 'log_level' to 'debug' to get more information") raise RecipeError( f"Missing data for {var['alias']}: {var['short_name']}") @@ -150,10 +150,8 @@ def data_availability(input_files, var, dirnames, filenames, dryrun=False): ", ".join(str(year) for year in missing_years), input_files)) else: - logger.info( - "DRYRUN: MISSING DATA for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), - input_files)) + logger.info("DRYRUN: MISSING DATA for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), input_files)) def tasks_valid(tasks): diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index e3d3af8cc6..1aa0f64f4f 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -236,7 +236,7 @@ def report_errors(self): self._cube) if self._raise_exception: raise CMORCheckError(msg) - logger.error(msg) + self._logger.error(msg) def report_warnings(self): """Report detected warnings to the given logger. diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 709cacb253..d4863da1ea 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -199,10 +199,14 @@ def concatenate(cubes): return result -def save(cubes, filename, optimize_access='', - compress=False, dryrun=False, alias='', **kwargs): - """ - Save iris cubes to file. +def save(cubes, + filename, + optimize_access='', + compress=False, + dryrun=False, + alias='', + **kwargs): + """Save iris cubes to file. Parameters ---------- From f4fcbdc47ab9eefa51c456c75f65cb981cc65dcb Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 21 May 2021 10:58:13 +0200 Subject: [PATCH 39/40] Fix merge errors --- esmvalcore/_recipe.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index cb59002c04..d8c18eb045 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -215,7 +215,6 @@ def _dataset_to_file(variable, config_user): variable = required_var break dryrun = config_user.get('dry-run') - check.data_availability(files, variable, dryrun=dryrun) check.data_availability(files, variable, dirnames, @@ -302,19 +301,19 @@ def _get_default_settings(variable, config_user, derive=False): 'units': variable['units'], } - # Configure CMOR metadata check raise_exception = True if 'dry-run' in config_user: raise_exception = False - if variable.get('cmor_table'): - settings['cmor_check_metadata'] = { - 'cmor_table': variable['cmor_table'], - 'mip': variable['mip'], - 'short_name': variable['short_name'], - 'frequency': variable['frequency'], - 'raise_exception': raise_exception, - 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) - } + + # Configure CMOR metadata check + settings['cmor_check_metadata'] = { + 'cmor_table': variable['project'], + 'mip': variable['mip'], + 'short_name': variable['short_name'], + 'frequency': variable['frequency'], + 'raise_exception': raise_exception, + 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) + } # Configure final CMOR data check settings['cmor_check_data'] = dict(settings['cmor_check_metadata']) From 36165bf1cf11071f8990b450e3ca22604d324237 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 21 May 2021 11:02:54 +0200 Subject: [PATCH 40/40] Fix CLI description --- esmvalcore/_main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 8c99ed2c24..9ac93caf0b 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -341,10 +341,10 @@ def run(recipe, than one diagnostic to filter use the syntax 'diag1 diag2/script1' or '("diag1", "diag2/script1")' and pay attention to the quotes. dry_run: bool, optional - Check data and CMOR compliance and return a report. - This feature works fully only with max_parallel_tasks: 1 - in config-user.yml; however, it is very fast so even on - a single thread it should take much less time. + Check data and CMOR compliance and return a report. This feature + works fully only with "max_parallel_tasks: 1" in config-user.yml, + however, it is very fast so even on a single thread it should + take much less time. check_level: str, optional Configure the sensitivity of the CMOR check. Possible values are: `ignore` (all errors will be reported as warnings),