From 474ad73a694911a61b14a5619e5fbe695ffdfa54 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Mon, 30 Jan 2023 20:29:28 +0000 Subject: [PATCH 1/5] For "run_envir", check for valid values, set "model_ver" correctly for nco mode --- tests/WE2E/run_WE2E_tests.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index dc472f8333..7b52326e75 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -40,6 +40,11 @@ def run_we2e_tests(homedir, args) -> None: run_envir = args.run_envir machine = args.machine.lower() + # Check for invalid input + if run_envir: + if run_envir not in ['nco', 'community']: + raise KeyError(f"Invalid 'run_envir' provided: {run_envir}") + # If args.tests is a list of length more than one, we assume it is a list of test names if len(args.tests) > 1: tests_to_check=args.tests @@ -134,6 +139,10 @@ def run_we2e_tests(homedir, args) -> None: test_cfg['user'].update({"ACCOUNT": args.account}) if run_envir: test_cfg['user'].update({"RUN_ENVIR": run_envir}) + if run_envir == "nco": + if 'nco' not in test_cfg: + test_cfg['nco'] = dict() + test_cfg['nco'].update({"model_ver": "we2e"}) # if platform section was not in input config, initialize as empty dict if 'platform' not in test_cfg: test_cfg['platform'] = dict() From b83123004002e5f92c1740712966accb12c01cc1 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Mon, 30 Jan 2023 21:04:22 +0000 Subject: [PATCH 2/5] Fix another bad error message in setup.py --- ush/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/setup.py b/ush/setup.py index b5955e580e..fcd13ce262 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -201,7 +201,7 @@ def load_config_for_setup(ushdir, default_config, user_config): raise Exception( dedent( f""" - Date variable {val}={cfg_d['user'][val]} is not in a valid date format. + Date variable {val}={cfg_d['workflow'][val]} is not in a valid date format. For examples of valid formats, see the Users' Guide. """ From c734bb2a2e09a84584720b5b543bcdd9d4d5c40f Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 8 Feb 2023 22:42:21 +0000 Subject: [PATCH 3/5] Fix dumb error --- tests/WE2E/monitor_jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/WE2E/monitor_jobs.py b/tests/WE2E/monitor_jobs.py index 9e34a87264..81c196c977 100755 --- a/tests/WE2E/monitor_jobs.py +++ b/tests/WE2E/monitor_jobs.py @@ -81,7 +81,7 @@ def monitor_jobs(expt_dict: dict, monitor_file: str = '', debug: bool = False) - endtime = datetime.now() total_walltime = endtime - starttime - logging.info(f'All {num_expts} experiments finished in {str(total_walltime)}') + logging.info(f'All {len(running_expts)} experiments finished in {str(total_walltime)}') return monitor_file From 42873da1dbdf6b31ae3eb7eddc8131ceb56188ce Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 8 Feb 2023 22:43:07 +0000 Subject: [PATCH 4/5] Promote a debug print to info --- tests/WE2E/run_WE2E_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 7b52326e75..d12691da10 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -179,7 +179,7 @@ def run_we2e_tests(homedir, args) -> None: with open(ushdir + "/config.yaml","w") as f: f.writelines(cfg_to_yaml_str(test_cfg)) - logging.debug(f"Calling workflow generation function for test {test_name}\n") + logging.info(f"Calling workflow generation function for test {test_name}\n") if args.quiet: console_handler = logging.getLogger().handlers[1] console_handler.setLevel(logging.WARNING) From 1209588a8ca7c7c78ec3845e1bcaa90d575b3c88 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 9 Feb 2023 00:18:53 +0000 Subject: [PATCH 5/5] For the initialize step (calling rocotorun for the first time), do not skip checking DEAD, ERROR, or COMPLETE status jobs. This will alow the use of this script to, for example, make changes to a failed experiment and re-run after the appropriate rocotorewind command(s) have been run --- tests/WE2E/monitor_jobs.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/WE2E/monitor_jobs.py b/tests/WE2E/monitor_jobs.py index 81c196c977..8fbd4f2afb 100755 --- a/tests/WE2E/monitor_jobs.py +++ b/tests/WE2E/monitor_jobs.py @@ -46,7 +46,7 @@ def monitor_jobs(expt_dict: dict, monitor_file: str = '', debug: bool = False) - logging.info("Checking tests available for monitoring...") for expt in expt_dict: logging.info(f"Starting experiment {expt} running") - expt_dict[expt] = update_expt_status(expt_dict[expt], expt) + expt_dict[expt] = update_expt_status(expt_dict[expt], expt, True) write_monitor_file(monitor_file,expt_dict) @@ -81,11 +81,11 @@ def monitor_jobs(expt_dict: dict, monitor_file: str = '', debug: bool = False) - endtime = datetime.now() total_walltime = endtime - starttime - logging.info(f'All {len(running_expts)} experiments finished in {str(total_walltime)}') + logging.info(f'All {len(expt_dict)} experiments finished in {str(total_walltime)}') return monitor_file -def update_expt_status(expt: dict, name: str) -> dict: +def update_expt_status(expt: dict, name: str, refresh: bool = False) -> dict: """ This function reads the dictionary showing the location of a given experiment, runs a `rocotorun` command to update the experiment (running new jobs and updating the status of @@ -121,15 +121,18 @@ def update_expt_status(expt: dict, name: str) -> dict: to ensure there are no un-submitted jobs. We will no longer monitor this experiment. Args: - expt (dict): A dictionary containing the information for an individual experiment, as - described in the main monitor_jobs() function. - name (str): [optional] + expt (dict): A dictionary containing the information for an individual experiment, as + described in the main monitor_jobs() function. + name (str): Name of the experiment; used for logging only + refresh (bool): If true, this flag will check an experiment status even if it is listed + as DEAD, ERROR, or COMPLETE. Used for initial checks for experiments + that may have been restarted. Returns: dict: The updated experiment dictionary. """ #If we are no longer tracking this experiment, return unchanged - if expt["status"] in ['DEAD','ERROR','COMPLETE']: + if (expt["status"] in ['DEAD','ERROR','COMPLETE']) and not refresh: return expt # Update experiment, read rocoto database