From ff2cf242e1647c9efdaa19442676bdc50bc3b0d5 Mon Sep 17 00:00:00 2001 From: Andrew Shao Date: Fri, 2 Dec 2022 15:40:56 -0800 Subject: [PATCH] Refactor of OpenMPISettings and improved support for mpiexec (#242) mpiexec, mpirun, and orterun are all synonyms in the modern MPI standards. The mpirunSettings and mpirunStep have been refactored to the more generic mpiSettings and mpiStep. Within these modules the classes associated with mpiexec, mpirun, and orterun all inherit from mpiexec (which historically was the first executable method that was defined in the MPI standard). One exception to this is that the slurm workload manager provides a wrapper to mpiexec which wraps a subset of the supported options to mpiexec so that they can be passed to Slurm. In general, this is a very limited set of options and so we choose not to support it. Instead, we check to see if mpiexec is actually mpiexec.slurm and throw an UnsupportedError. Another exception (which is now supported) is the Parallel Application Launch Service used on more recent HPE/Cray HPC platforms which also wraps mpiexec. Users of these platforms should now use the new run settings class palsSettings and PALS launchers for their workflows. [ Commited by @ashao ] [ Reviewed by @MattToast ] Co-authored-by: Riccardo Balin --- .../_core/launcher/cobalt/cobaltLauncher.py | 6 +- smartsim/_core/launcher/lsf/lsfLauncher.py | 4 +- smartsim/_core/launcher/pbs/pbsLauncher.py | 12 +- .../_core/launcher/slurm/slurmLauncher.py | 4 +- smartsim/_core/launcher/step/__init__.py | 2 +- .../step/{mpirunStep.py => mpiStep.py} | 134 +++++-- smartsim/database/orchestrator.py | 35 +- smartsim/settings/__init__.py | 4 +- smartsim/settings/mpiSettings.py | 350 ++++++++++++++++++ smartsim/settings/mpirunSettings.py | 319 +--------------- smartsim/settings/palsSettings.py | 212 +++++++++++ .../mpi_impl_stubs/intel2019/mpiexec | 5 - .../mpi_impl_stubs/intel2019/mpirun | 5 - .../mpi_impl_stubs/intel2019/orterun | 3 - .../test_configs/mpi_impl_stubs/slurm/mpiexec | 29 ++ tests/test_model.py | 14 +- ...enmpi_settings.py => test_mpi_settings.py} | 125 +++---- tests/test_pals_settings.py | 56 +++ 18 files changed, 855 insertions(+), 464 deletions(-) rename smartsim/_core/launcher/step/{mpirunStep.py => mpiStep.py} (53%) create mode 100644 smartsim/settings/mpiSettings.py create mode 100644 smartsim/settings/palsSettings.py delete mode 100755 tests/test_configs/mpi_impl_stubs/intel2019/mpiexec delete mode 100755 tests/test_configs/mpi_impl_stubs/intel2019/mpirun delete mode 100755 tests/test_configs/mpi_impl_stubs/intel2019/orterun create mode 100755 tests/test_configs/mpi_impl_stubs/slurm/mpiexec rename tests/{test_openmpi_settings.py => test_mpi_settings.py} (66%) create mode 100644 tests/test_pals_settings.py diff --git a/smartsim/_core/launcher/cobalt/cobaltLauncher.py b/smartsim/_core/launcher/cobalt/cobaltLauncher.py index 686ad8e58..334381cdc 100644 --- a/smartsim/_core/launcher/cobalt/cobaltLauncher.py +++ b/smartsim/_core/launcher/cobalt/cobaltLauncher.py @@ -35,7 +35,7 @@ from ...config import CONFIG from ..launcher import WLMLauncher from ..pbs.pbsCommands import qdel, qstat -from ..step import AprunStep, CobaltBatchStep, LocalStep, MpirunStep +from ..step import AprunStep, CobaltBatchStep, LocalStep, MpirunStep, MpiexecStep, OrterunStep from ..stepInfo import CobaltStepInfo from .cobaltParser import parse_cobalt_step_id, parse_cobalt_step_status, parse_qsub_out @@ -62,7 +62,9 @@ def __init__(self): AprunSettings: AprunStep, CobaltBatchSettings: CobaltBatchStep, MpirunSettings: MpirunStep, - RunSettings: LocalStep, + MpiexecSettings: MpiexecStep, + OrterunSettings: OrterunStep, + RunSettings: LocalStep } def run(self, step): diff --git a/smartsim/_core/launcher/lsf/lsfLauncher.py b/smartsim/_core/launcher/lsf/lsfLauncher.py index eecc5e6b0..230b533d0 100644 --- a/smartsim/_core/launcher/lsf/lsfLauncher.py +++ b/smartsim/_core/launcher/lsf/lsfLauncher.py @@ -32,7 +32,7 @@ from ....status import STATUS_CANCELLED, STATUS_COMPLETED from ...config import CONFIG from ..launcher import WLMLauncher -from ..step import BsubBatchStep, JsrunStep, LocalStep, MpirunStep +from ..step import BsubBatchStep, JsrunStep, LocalStep, MpirunStep, MpiexecStep, OrterunStep from ..stepInfo import LSFBatchStepInfo, LSFJsrunStepInfo from .lsfCommands import bjobs, bkill, jskill, jslist from .lsfParser import ( @@ -63,6 +63,8 @@ class LSFLauncher(WLMLauncher): JsrunSettings: JsrunStep, BsubBatchSettings: BsubBatchStep, MpirunSettings: MpirunStep, + MpiexecSettings: MpiexecStep, + OrterunSettings: OrterunStep, RunSettings: LocalStep, } diff --git a/smartsim/_core/launcher/pbs/pbsLauncher.py b/smartsim/_core/launcher/pbs/pbsLauncher.py index 8ee6d1d64..74d826692 100644 --- a/smartsim/_core/launcher/pbs/pbsLauncher.py +++ b/smartsim/_core/launcher/pbs/pbsLauncher.py @@ -32,7 +32,14 @@ from ....status import STATUS_CANCELLED, STATUS_COMPLETED from ...config import CONFIG from ..launcher import WLMLauncher -from ..step import AprunStep, LocalStep, MpirunStep, QsubBatchStep +from ..step import ( + AprunStep, + LocalStep, + MpirunStep, + MpiexecStep, + OrterunStep, + QsubBatchStep +) from ..stepInfo import PBSStepInfo from .pbsCommands import qdel, qstat from .pbsParser import parse_qstat_jobid, parse_step_id_from_qstat @@ -57,8 +64,11 @@ class PBSLauncher(WLMLauncher): supported_rs = { AprunSettings: AprunStep, QsubBatchSettings: QsubBatchStep, + MpiexecSettings: MpiexecStep, MpirunSettings: MpirunStep, + OrterunSettings: OrterunStep, RunSettings: LocalStep, + PalsMpiexecSettings: MpiexecStep } def run(self, step): diff --git a/smartsim/_core/launcher/slurm/slurmLauncher.py b/smartsim/_core/launcher/slurm/slurmLauncher.py index e48629fe1..94586376a 100644 --- a/smartsim/_core/launcher/slurm/slurmLauncher.py +++ b/smartsim/_core/launcher/slurm/slurmLauncher.py @@ -33,7 +33,7 @@ from ....status import STATUS_CANCELLED from ...config import CONFIG from ..launcher import WLMLauncher -from ..step import LocalStep, MpirunStep, SbatchStep, SrunStep +from ..step import LocalStep, MpirunStep, MpiexecStep, SbatchStep, SrunStep, OrterunStep from ..stepInfo import SlurmStepInfo from .slurmCommands import sacct, scancel, sstat from .slurmParser import parse_sacct, parse_sstat_nodes, parse_step_id_from_sacct @@ -59,6 +59,8 @@ class SlurmLauncher(WLMLauncher): SrunSettings: SrunStep, SbatchSettings: SbatchStep, MpirunSettings: MpirunStep, + MpiexecSettings: MpiexecStep, + OrterunSettings: OrterunStep, RunSettings: LocalStep, } diff --git a/smartsim/_core/launcher/step/__init__.py b/smartsim/_core/launcher/step/__init__.py index 2813e36c1..6daa33273 100644 --- a/smartsim/_core/launcher/step/__init__.py +++ b/smartsim/_core/launcher/step/__init__.py @@ -2,6 +2,6 @@ from .cobaltStep import CobaltBatchStep from .localStep import LocalStep from .lsfStep import BsubBatchStep, JsrunStep -from .mpirunStep import MpirunStep +from .mpiStep import MpirunStep, MpiexecStep, OrterunStep from .pbsStep import QsubBatchStep from .slurmStep import SbatchStep, SrunStep diff --git a/smartsim/_core/launcher/step/mpirunStep.py b/smartsim/_core/launcher/step/mpiStep.py similarity index 53% rename from smartsim/_core/launcher/step/mpirunStep.py rename to smartsim/_core/launcher/step/mpiStep.py index b704cb1a0..c768bc0b4 100644 --- a/smartsim/_core/launcher/step/mpirunStep.py +++ b/smartsim/_core/launcher/step/mpiStep.py @@ -34,10 +34,9 @@ logger = get_logger(__name__) - -class MpirunStep(Step): +class _BaseMPIStep(Step): def __init__(self, name, cwd, run_settings): - """Initialize a OpenMPI mpirun job step + """Initialize a job step conforming to the MPI standard :param name: name of the entity to be launched :type name: str @@ -46,74 +45,79 @@ def __init__(self, name, cwd, run_settings): :param run_settings: run settings for entity :type run_settings: RunSettings """ + super().__init__(name, cwd) + + self.run_settings = run_settings + self.alloc = None if not self.run_settings.in_batch: self._set_alloc() + _supported_launchers = [ + "PBS", + "COBALT", + "SLURM", + "LSB" + ] + + @property + def _run_command(self): + return self.run_settings._run_command + def get_launch_cmd(self): """Get the command to launch this step :return: launch command :rtype: list[str] """ - mpirun = self.run_settings.run_command - mpirun_cmd = [mpirun, "-wdir", self.cwd] - # add env vars to mpirun command - mpirun_cmd.extend(self.run_settings.format_env_vars()) + mpi_cmd = [self._run_command, "--wdir", self.cwd] + # add env vars to mpi command + mpi_cmd.extend(self.run_settings.format_env_vars()) - # add mpirun settings to command - mpirun_cmd.extend(self.run_settings.format_run_args()) + # add mpi settings to command + mpi_cmd.extend(self.run_settings.format_run_args()) if self.run_settings.colocated_db_settings: # disable cpu binding as the entrypoint will set that # for the application and database process now - mpirun_cmd.extend(["--bind-to", "none"]) + # mpi_cmd.extend(["--cpu-bind", "none"]) # Replace the command with the entrypoint wrapper script bash = shutil.which("bash") launch_script_path = self.get_colocated_launch_script() - mpirun_cmd += [bash, launch_script_path] + mpi_cmd += [bash, launch_script_path] - mpirun_cmd += self._build_exe() + mpi_cmd += self._build_exe() # if its in a batch, redirect stdout to # file in the cwd. if self.run_settings.in_batch: output = self.get_step_file(ending=".out") - mpirun_cmd += [">", output] - return mpirun_cmd + mpi_cmd += [">", output] + return mpi_cmd def _set_alloc(self): """Set the id of the allocation :raises AllocationError: allocation not listed or found """ - if "PBS_JOBID" in os.environ: # cov-pbs - self.alloc = os.environ["PBS_JOBID"] - logger.debug( - f"Running on PBS allocation {self.alloc} gleaned from user environment" - ) - elif "COBALT_JOBID" in os.environ: # cov-cobalt - self.alloc = os.environ["COBALT_JOBID"] - logger.debug( - f"Running on Cobalt allocation {self.alloc} gleaned from user environment" - ) - elif "SLURM_JOBID" in os.environ: # cov-slurm - self.alloc = os.environ["SLURM_JOBID"] - logger.debug( - f"Running on Slurm allocation {self.alloc} gleaned from user environment" - ) - elif "LSB_JOBID" in os.environ: # cov-lsf - self.alloc = os.environ["LSB_JOBID"] - logger.debug( - f"Running on Slurm allocation {self.alloc} gleaned from user environment" - ) - else: - raise AllocationError( - "No allocation specified or found and not running in batch" - ) + + environment_keys = os.environ.keys() + for launcher in self._supported_launchers: + jobid_field = f'{launcher.upper()}_JOBID' + if jobid_field in environment_keys: + self.alloc = os.environ[jobid_field] + logger.debug( + f"Running on allocation {self.alloc} from {jobid_field}" + ) + return + + # If this function did not return above, no allocations were found + raise AllocationError( + "No allocation specified or found and not running in batch" + ) def _build_exe(self): """Build the executable for this step @@ -129,7 +133,7 @@ def _build_exe(self): return exe + args def _make_mpmd(self): - """Build mpirun (MPMD) executable""" + """Build mpiexec (MPMD) executable""" exe = self.run_settings.exe args = self.run_settings.exe_args cmd = exe + args @@ -142,3 +146,55 @@ def _make_mpmd(self): cmd = sh_split(" ".join(cmd)) return cmd + +class MpiexecStep(_BaseMPIStep): + def __init__(self, name, cwd, run_settings): + """Initialize an mpiexec job step + + :param name: name of the entity to be launched + :type name: str + :param cwd: path to launch dir + :type cwd: str + :param run_settings: run settings for entity + :type run_settings: RunSettings + :param default_run_command: The default command to launch an MPI + application + :type default_run_command: str, optional + """ + + super().__init__(name, cwd, run_settings) + + +class MpirunStep(_BaseMPIStep): + def __init__(self, name, cwd, run_settings): + """Initialize an mpirun job step + + :param name: name of the entity to be launched + :type name: str + :param cwd: path to launch dir + :type cwd: str + :param run_settings: run settings for entity + :type run_settings: RunSettings + :param default_run_command: The default command to launch an MPI + application + :type default_run_command: str, optional + """ + + super().__init__(name, cwd, run_settings) + +class OrterunStep(_BaseMPIStep): + def __init__(self, name, cwd, run_settings): + """Initialize an orterun job step + + :param name: name of the entity to be launched + :type name: str + :param cwd: path to launch dir + :type cwd: str + :param run_settings: run settings for entity + :type run_settings: RunSettings + :param default_run_command: The default command to launch an MPI + application + :type default_run_command: str, optional + """ + + super().__init__(name, cwd, run_settings) \ No newline at end of file diff --git a/smartsim/database/orchestrator.py b/smartsim/database/orchestrator.py index 87b31a5ea..4a576a212 100644 --- a/smartsim/database/orchestrator.py +++ b/smartsim/database/orchestrator.py @@ -46,6 +46,9 @@ CobaltBatchSettings, JsrunSettings, MpirunSettings, + MpiexecSettings, + PalsMpiexecSettings, + OrterunSettings, QsubBatchSettings, SbatchSettings, SrunSettings, @@ -101,9 +104,9 @@ def __init__( launcher = detect_launcher() by_launcher = { - "slurm": ["srun", "mpirun"], - "pbs": ["aprun", "mpirun"], - "cobalt": ["aprun", "mpirun"], + "slurm": ["srun", "mpirun", "mpiexec"], + "pbs": ["aprun", "mpirun", "mpiexec"], + "cobalt": ["aprun", "mpirun", "mpiexec"], "lsf": ["jsrun"], "local": [None], } @@ -719,16 +722,24 @@ def _check_network_interface(self): def _fill_reserved(self): """Fill the reserved batch and run arguments dictionaries""" - self._reserved_run_args[MpirunSettings] = [ - "np", - "N", - "c", - "output-filename", - "n", - "wdir", - "wd", - "host", + + mpi_like_settings = [ + MpirunSettings, + MpiexecSettings, + OrterunSettings, + PalsMpiexecSettings ] + for settings in mpi_like_settings: + self._reserved_run_args[settings] = [ + "np", + "N", + "c", + "output-filename", + "n", + "wdir", + "wd", + "host", + ] self._reserved_run_args[SrunSettings] = [ "nodes", "N", diff --git a/smartsim/settings/__init__.py b/smartsim/settings/__init__.py index cf18fec48..84a0515a5 100644 --- a/smartsim/settings/__init__.py +++ b/smartsim/settings/__init__.py @@ -2,9 +2,10 @@ from .base import RunSettings from .cobaltSettings import CobaltBatchSettings from .lsfSettings import BsubBatchSettings, JsrunSettings -from .mpirunSettings import MpiexecSettings, MpirunSettings, OrterunSettings +from .mpiSettings import MpiexecSettings, MpirunSettings, OrterunSettings from .pbsSettings import QsubBatchSettings from .slurmSettings import SbatchSettings, SrunSettings +from .palsSettings import PalsMpiexecSettings from .containers import Container, Singularity __all__ = [ @@ -19,6 +20,7 @@ "RunSettings", "SbatchSettings", "SrunSettings", + "PalsMpiexecSettings", "Container", "Singularity", ] diff --git a/smartsim/settings/mpiSettings.py b/smartsim/settings/mpiSettings.py new file mode 100644 index 000000000..513ad6cf7 --- /dev/null +++ b/smartsim/settings/mpiSettings.py @@ -0,0 +1,350 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2022, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import shutil +import subprocess + +from ..error import SSUnsupportedError, LauncherError +from ..log import get_logger +from .base import RunSettings + +logger = get_logger(__name__) + + +class _BaseMPISettings(RunSettings): + """Base class for all common arguments of MPI-standard run commands""" + + def __init__( + self, exe, exe_args=None, run_command="mpiexec", run_args=None, + env_vars=None, fail_if_missing_exec=True, **kwargs + ): + """Settings to format run job with an MPI-standard binary + + Note that environment variables can be passed with a None + value to signify that they should be exported from the current + environment + + Any arguments passed in the ``run_args`` dict will be converted + command line arguments and prefixed with ``--``. Values of + None can be provided for arguments that do not have values. + + :param exe: executable + :type exe: str + :param exe_args: executable arguments, defaults to None + :type exe_args: str | list[str], optional + :param run_args: arguments for run command, defaults to None + :type run_args: dict[str, str], optional + :param env_vars: environment vars to launch job with, defaults to None + :type env_vars: dict[str, str], optional + :param fail_if_missing_exec: Throw an exception of the MPI command + is missing. Otherwise, throw a warning + :type fail_if_missing_exec: bool, optional + """ + super().__init__( + exe, + exe_args, + run_command=run_command, + run_args=run_args, + env_vars=env_vars, + **kwargs, + ) + self.mpmd = [] + + if not shutil.which(self._run_command): + msg = ( + f"Cannot find {self._run_command}. Try passing the " + "full path via run_command." + ) + if fail_if_missing_exec: + raise LauncherError(msg) + else: + logger.warning(msg) + + reserved_run_args = {"wd", "wdir"} + + def make_mpmd(self, mpirun_settings): + """Make a mpmd workload by combining two ``mpirun`` commands + + This connects the two settings to be executed with a single + Model instance + + :param mpirun_settings: MpirunSettings instance + :type mpirun_settings: MpirunSettings + """ + if self.colocated_db_settings: + raise SSUnsupportedError( + "Colocated models cannot be run as a mpmd workload" + ) + self.mpmd.append(mpirun_settings) + + def set_task_map(self, task_mapping): + """Set ``mpirun`` task mapping + + this sets ``--map-by `` + + For examples, see the man page for ``mpirun`` + + :param task_mapping: task mapping + :type task_mapping: str + """ + self.run_args["map-by"] = str(task_mapping) + + def set_cpus_per_task(self, cpus_per_task): + """Set the number of tasks for this job + + This sets ``--cpus-per-proc`` for MPI compliant implementations + + note: this option has been deprecated in openMPI 4.0+ + and will soon be replaced. + + :param cpus_per_task: number of tasks + :type cpus_per_task: int + """ + self.run_args["cpus-per-proc"] = int(cpus_per_task) + + def set_cpu_binding_type(self, bind_type): + """Specifies the cores to which MPI processes are bound + + This sets ``--bind-to`` for MPI compliant implementations + + :param bind_type: binding type + :type bind_type: str + """ + self.run_args["bind-to"] = str(bind_type) + + def set_tasks_per_node(self, tasks_per_node): + """Set the number of tasks per node + + :param tasks_per_node: number of tasks to launch per node + :type tasks_per_node: int + """ + self.run_args["npernode"] = int(tasks_per_node) + + def set_tasks(self, tasks): + """Set the number of tasks for this job + + This sets ``-n`` for MPI compliant implementations + + :param tasks: number of tasks + :type tasks: int + """ + self.run_args["n"] = int(tasks) + + def set_hostlist(self, host_list): + """Set the hostlist for the ``mpirun`` command + + This sets ``--host`` + + :param host_list: list of host names + :type host_list: str | list[str] + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all([isinstance(host, str) for host in host_list]): + raise TypeError("host_list argument must be list of strings") + self.run_args["host"] = ",".join(host_list) + + def set_hostlist_from_file(self, file_path): + """Use the contents of a file to set the hostlist + + This sets ``--hostfile`` + + :param file_path: Path to the hostlist file + :type file_path: str + """ + self.run_args["hostfile"] = str(file_path) + + def set_verbose_launch(self, verbose): + """Set the job to run in verbose mode + + This sets ``--verbose`` + + :param verbose: Whether the job should be run verbosely + :type verbose: bool + """ + if verbose: + self.run_args["verbose"] = None + else: + self.run_args.pop("verbose", None) + + def set_quiet_launch(self, quiet): + """Set the job to run in quiet mode + + This sets ``--quiet`` + + :param quiet: Whether the job should be run quietly + :type quiet: bool + """ + if quiet: + self.run_args["quiet"] = None + else: + self.run_args.pop("quiet", None) + + def set_broadcast(self, dest_path=None): + """Copy the specified executable(s) to remote machines + + This sets ``--preload-binary`` + + :param dest_path: Destination path (Ignored) + :type dest_path: str | None + """ + if dest_path is not None and isinstance(dest_path, str): + logger.warning( + ( + f"{type(self)} cannot set a destination path during broadcast. " + "Using session directory instead" + ) + ) + self.run_args["preload-binary"] = None + + def set_walltime(self, walltime): + """Set the maximum number of seconds that a job will run + + This sets ``--timeout`` + + :param walltime: number like string of seconds that a job will run in secs + :type walltime: str + """ + self.run_args["timeout"] = str(walltime) + + def format_run_args(self): + """Return a list of MPI-standard formatted run arguments + + :return: list of MPI-standard arguments for these settings + :rtype: list[str] + """ + # args launcher uses + args = [] + restricted = ["wdir", "wd"] + + for opt, value in self.run_args.items(): + if opt not in restricted: + prefix = "--" + if not value: + args += [prefix + opt] + else: + args += [prefix + opt, str(value)] + return args + + def format_env_vars(self): + """Format the environment variables for mpirun + + :return: list of env vars + :rtype: list[str] + """ + formatted = [] + env_string = "-x" + + if self.env_vars: + for name, value in self.env_vars.items(): + if value: + formatted += [env_string, "=".join((name, str(value)))] + else: + formatted += [env_string, name] + return formatted + + +class MpirunSettings(_BaseMPISettings): + def __init__(self, exe, exe_args=None, run_args=None, env_vars=None, **kwargs): + """Settings to run job with ``mpirun`` command (MPI-standard) + + Note that environment variables can be passed with a None + value to signify that they should be exported from the current + environment + + Any arguments passed in the ``run_args`` dict will be converted + into ``mpirun`` arguments and prefixed with ``--``. Values of + None can be provided for arguments that do not have values. + + :param exe: executable + :type exe: str + :param exe_args: executable arguments, defaults to None + :type exe_args: str | list[str], optional + :param run_args: arguments for run command, defaults to None + :type run_args: dict[str, str], optional + :param env_vars: environment vars to launch job with, defaults to None + :type env_vars: dict[str, str], optional + """ + super().__init__(exe, exe_args, "mpirun", run_args, env_vars, **kwargs) + +class MpiexecSettings(_BaseMPISettings): + def __init__(self, exe, exe_args=None, run_args=None, env_vars=None, **kwargs): + """Settings to run job with ``mpiexec`` command (MPI-standard) + + Note that environment variables can be passed with a None + value to signify that they should be exported from the current + environment + + Any arguments passed in the ``run_args`` dict will be converted + into ``mpiexec`` arguments and prefixed with ``--``. Values of + None can be provided for arguments that do not have values. + + :param exe: executable + :type exe: str + :param exe_args: executable arguments, defaults to None + :type exe_args: str | list[str], optional + :param run_args: arguments for run command, defaults to None + :type run_args: dict[str, str], optional + :param env_vars: environment vars to launch job with, defaults to None + :type env_vars: dict[str, str], optional + """ + super().__init__(exe, exe_args, "mpiexec", run_args, env_vars, **kwargs) + + completed_process = subprocess.run( + [self._run_command, "--help"], + capture_output=True + ) + help_statement = completed_process.stdout.decode() + if "mpiexec.slurm" in help_statement: + raise SSUnsupportedError( + "Slurm's wrapper for mpiexec is unsupported. Use slurmSettings instead" + ) + +class OrterunSettings(_BaseMPISettings): + def __init__(self, exe, exe_args=None, run_args=None, env_vars=None, **kwargs): + """Settings to run job with ``orterun`` command (MPI-standard) + + Note that environment variables can be passed with a None + value to signify that they should be exported from the current + environment + + Any arguments passed in the ``run_args`` dict will be converted + into ``orterun`` arguments and prefixed with ``--``. Values of + None can be provided for arguments that do not have values. + + :param exe: executable + :type exe: str + :param exe_args: executable arguments, defaults to None + :type exe_args: str | list[str], optional + :param run_args: arguments for run command, defaults to None + :type run_args: dict[str, str], optional + :param env_vars: environment vars to launch job with, defaults to None + :type env_vars: dict[str, str], optional + """ + super().__init__(exe, exe_args, "orterun", run_args, env_vars, **kwargs) \ No newline at end of file diff --git a/smartsim/settings/mpirunSettings.py b/smartsim/settings/mpirunSettings.py index 616a5685a..797da3534 100644 --- a/smartsim/settings/mpirunSettings.py +++ b/smartsim/settings/mpirunSettings.py @@ -24,318 +24,15 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import subprocess -import re - -from ..error import SSUnsupportedError +from .mpiSettings import MpiexecSettings, MpirunSettings, OrterunSettings +from warnings import simplefilter, warn from ..log import get_logger -from .base import RunSettings logger = get_logger(__name__) - -class _OpenMPISettings(RunSettings): - """Base class for all common arguments of OpenMPI run commands""" - - def __init__( - self, exe, exe_args=None, run_command="", run_args=None, env_vars=None, **kwargs - ): - """Settings to format run job with an OpenMPI binary - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - command line arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param exe: executable - :type exe: str - :param exe_args: executable arguments, defaults to None - :type exe_args: str | list[str], optional - :param run_args: arguments for run command, defaults to None - :type run_args: dict[str, str], optional - :param env_vars: environment vars to launch job with, defaults to None - :type env_vars: dict[str, str], optional - """ - super().__init__( - exe, - exe_args, - run_command=run_command, - run_args=run_args, - env_vars=env_vars, - **kwargs, - ) - self.mpmd = [] - - reserved_run_args = {"wd", "wdir"} - - def make_mpmd(self, mpirun_settings): - """Make a mpmd workload by combining two ``mpirun`` commands - - This connects the two settings to be executed with a single - Model instance - - :param mpirun_settings: MpirunSettings instance - :type mpirun_settings: MpirunSettings - """ - if self.colocated_db_settings: - raise SSUnsupportedError( - "Colocated models cannot be run as a mpmd workload" - ) - self.mpmd.append(mpirun_settings) - - def set_task_map(self, task_mapping): - """Set ``mpirun`` task mapping - - this sets ``--map-by `` - - For examples, see the man page for ``mpirun`` - - :param task_mapping: task mapping - :type task_mapping: str - """ - self.run_args["map-by"] = str(task_mapping) - - def set_cpus_per_task(self, cpus_per_task): - """Set the number of tasks for this job - - This sets ``--cpus-per-proc`` - - note: this option has been deprecated in openMPI 4.0+ - and will soon be replaced. - - :param cpus_per_task: number of tasks - :type cpus_per_task: int - """ - self.run_args["cpus-per-proc"] = int(cpus_per_task) - - def set_tasks_per_node(self, tasks_per_node): - """Set the number of tasks per node - - :param tasks_per_node: number of tasks to launch per node - :type tasks_per_node: int - """ - self.run_args["npernode"] = int(tasks_per_node) - - def set_tasks(self, tasks): - """Set the number of tasks for this job - - This sets ``--n`` - - :param tasks: number of tasks - :type tasks: int - """ - self.run_args["n"] = int(tasks) - - def set_hostlist(self, host_list): - """Set the hostlist for the ``mpirun`` command - - This sets ``--host`` - - :param host_list: list of host names - :type host_list: str | list[str] - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all([isinstance(host, str) for host in host_list]): - raise TypeError("host_list argument must be list of strings") - self.run_args["host"] = ",".join(host_list) - - def set_hostlist_from_file(self, file_path): - """Use the contents of a file to set the hostlist - - This sets ``--hostfile`` - - :param file_path: Path to the hostlist file - :type file_path: str - """ - self.run_args["hostfile"] = str(file_path) - - def set_verbose_launch(self, verbose): - """Set the job to run in verbose mode - - This sets ``--verbose`` - - :param verbose: Whether the job should be run verbosely - :type verbose: bool - """ - if verbose: - self.run_args["verbose"] = None - else: - self.run_args.pop("verbose", None) - - def set_quiet_launch(self, quiet): - """Set the job to run in quiet mode - - This sets ``--quiet`` - - :param quiet: Whether the job should be run quietly - :type quiet: bool - """ - if quiet: - self.run_args["quiet"] = None - else: - self.run_args.pop("quiet", None) - - def set_broadcast(self, dest_path=None): - """Copy the specified executable(s) to remote machines - - This sets ``--preload-binary`` - - :param dest_path: Destination path (Ignored) - :type dest_path: str | None - """ - if dest_path is not None and isinstance(dest_path, str): - logger.warning( - ( - f"{type(self)} cannot set a destination path during broadcast. " - "Using session directory instead" - ) - ) - self.run_args["preload-binary"] = None - - def set_walltime(self, walltime): - """Set the maximum number of seconds that a job will run - - This sets ``--timeout`` - - :param walltime: number like string of seconds that a job will run in secs - :type walltime: str - """ - self.run_args["timeout"] = str(walltime) - - def format_run_args(self): - """Return a list of OpenMPI formatted run arguments - - :return: list of OpenMPI arguments for these settings - :rtype: list[str] - """ - # args launcher uses - args = [] - restricted = ["wdir", "wd"] - - for opt, value in self.run_args.items(): - if opt not in restricted: - prefix = "--" - if not value: - args += [prefix + opt] - else: - args += [prefix + opt, str(value)] - return args - - def format_env_vars(self): - """Format the environment variables for mpirun - - :return: list of env vars - :rtype: list[str] - """ - formatted = [] - - if self.env_vars: - for name, value in self.env_vars.items(): - if value: - formatted += ["-x", "=".join((name, str(value)))] - else: - formatted += ["-x", name] - return formatted - - -class MpirunSettings(_OpenMPISettings): - def __init__(self, exe, exe_args=None, run_args=None, env_vars=None, **kwargs): - """Settings to run job with ``mpirun`` command (OpenMPI) - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - into ``mpirun`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param exe: executable - :type exe: str - :param exe_args: executable arguments, defaults to None - :type exe_args: str | list[str], optional - :param run_args: arguments for run command, defaults to None - :type run_args: dict[str, str], optional - :param env_vars: environment vars to launch job with, defaults to None - :type env_vars: dict[str, str], optional - """ - super().__init__(exe, exe_args, "mpirun", run_args, env_vars, **kwargs) - - completed_process = subprocess.run( - [self.run_command, "-V"], capture_output=True - ) # type: subprocess.CompletedProcess - version_statement = completed_process.stdout.decode() - - if not re.match(r"mpirun\s\(Open MPI\)\s4.\d+.\d+", version_statement): - logger.warning("Non-OpenMPI implementation of `mpirun` detected") - - -class MpiexecSettings(_OpenMPISettings): - def __init__(self, exe, exe_args=None, run_args=None, env_vars=None, **kwargs): - """Settings to run job with ``mpiexec`` command (OpenMPI) - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - into ``mpiexec`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param exe: executable - :type exe: str - :param exe_args: executable arguments, defaults to None - :type exe_args: str | list[str], optional - :param run_args: arguments for run command, defaults to None - :type run_args: dict[str, str], optional - :param env_vars: environment vars to launch job with, defaults to None - :type env_vars: dict[str, str], optional - """ - super().__init__(exe, exe_args, "mpiexec", run_args, env_vars, **kwargs) - - completed_process = subprocess.run( - [self.run_command, "-V"], capture_output=True - ) # type: subprocess.CompletedProcess - version_statement = completed_process.stdout.decode() - - if not re.match(r"mpiexec\s\(OpenRTE\)\s4.\d+.\d+", version_statement): - logger.warning("Non-OpenMPI implementation of `mpiexec` detected") - - -class OrterunSettings(_OpenMPISettings): - def __init__(self, exe, exe_args=None, run_args=None, env_vars=None, **kwargs): - """Settings to run job with ``orterun`` command (OpenMPI) - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - into ``orterun`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param exe: executable - :type exe: str - :param exe_args: executable arguments, defaults to None - :type exe_args: str | list[str], optional - :param run_args: arguments for run command, defaults to None - :type run_args: dict[str, str], optional - :param env_vars: environment vars to launch job with, defaults to None - :type env_vars: dict[str, str], optional - """ - super().__init__(exe, exe_args, "orterun", run_args, env_vars, **kwargs) - - completed_process = subprocess.run( - [self.run_command, "-V"], capture_output=True - ) # type: subprocess.CompletedProcess - version_statement = completed_process.stdout.decode() - - if not re.match(r"orterun\s\(OpenRTE\)\s4.\d+.\d+", version_statement): - logger.warning("Non-OpenMPI implementation of `orterun` detected") +simplefilter("once", DeprecationWarning) +warn( + "mpirunSettings will be deprecated; use mpiSettings instead.", + DeprecationWarning, + stacklevel=2 +) \ No newline at end of file diff --git a/smartsim/settings/palsSettings.py b/smartsim/settings/palsSettings.py new file mode 100644 index 000000000..0bb17e643 --- /dev/null +++ b/smartsim/settings/palsSettings.py @@ -0,0 +1,212 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2022, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import shutil +import subprocess + +from ..error import SSUnsupportedError, LauncherError +from ..log import get_logger +from .mpiSettings import _BaseMPISettings + +logger = get_logger(__name__) + + +class PalsMpiexecSettings(_BaseMPISettings): + """Settings to run job with ``mpiexec`` under the HPE Cray + Parallel Application Launch Service (PALS) + + Note that environment variables can be passed with a None + value to signify that they should be exported from the current + environment + + Any arguments passed in the ``run_args`` dict will be converted + into ``mpiexec`` arguments and prefixed with ``--``. Values of + None can be provided for arguments that do not have values. + + :param exe: executable + :type exe: str + :param exe_args: executable arguments, defaults to None + :type exe_args: str | list[str], optional + :param run_args: arguments for run command, defaults to None + :type run_args: dict[str, str], optional + :param env_vars: environment vars to launch job with, defaults to None + :type env_vars: dict[str, str], optional + """ + + def __init__( + self, exe, exe_args=None, run_command="mpiexec", run_args=None, + env_vars=None, fail_if_missing_exec=True, **kwargs + ): + """Settings to format run job with an MPI-standard binary + + Note that environment variables can be passed with a None + value to signify that they should be exported from the current + environment + + Any arguments passed in the ``run_args`` dict will be converted + command line arguments and prefixed with ``--``. Values of + None can be provided for arguments that do not have values. + + :param exe: executable + :type exe: str + :param exe_args: executable arguments, defaults to None + :type exe_args: str | list[str], optional + :param run_args: arguments for run command, defaults to None + :type run_args: dict[str, str], optional + :param env_vars: environment vars to launch job with, defaults to None + :type env_vars: dict[str, str], optional + :param fail_if_missing_exec: Throw an exception of the MPI command + is missing. Otherwise, throw a warning + :type fail_if_missing_exec: bool, optional + """ + super().__init__( + exe, + exe_args, + run_command=run_command, + run_args=run_args, + env_vars=env_vars, + fail_if_missing_exec=fail_if_missing_exec, + **kwargs, + ) + + def set_task_map(self, task_mapping): + """Set ``mpirun`` task mapping + + this sets ``--map-by `` + + For examples, see the man page for ``mpirun`` + + :param task_mapping: task mapping + :type task_mapping: str + """ + logger.warning("set_task_map not supported under PALS") + + def set_cpus_per_task(self, cpus_per_task): + """Set the number of tasks for this job + + This sets ``--cpus-per-proc`` for MPI compliant implementations + + note: this option has been deprecated in openMPI 4.0+ + and will soon be replaced. + + :param cpus_per_task: number of tasks + :type cpus_per_task: int + """ + logger.warning("set_cpus_per_task not supported under PALS") + + def set_cpu_binding_type(self, bind_type): + """Specifies the cores to which MPI processes are bound + + This sets ``--bind-to`` for MPI compliant implementations + + :param bind_type: binding type + :type bind_type: str + """ + self.run_args["cpu-bind"] = str(bind_type) + + def set_tasks_per_node(self, tasks_per_node): + """Set the number of tasks per node + + :param tasks_per_node: number of tasks to launch per node + :type tasks_per_node: int + """ + self.run_args["ppn"] = int(tasks_per_node) + + def set_quiet_launch(self, quiet): + """Set the job to run in quiet mode + + This sets ``--quiet`` + + :param quiet: Whether the job should be run quietly + :type quiet: bool + """ + + logger.warning("set_quiet_launch not supported under PALS") + + def set_broadcast(self, dest_path=None): + """Copy the specified executable(s) to remote machines + + This sets ``--preload-binary`` + + :param dest_path: Destination path (Ignored) + :type dest_path: str | None + """ + if dest_path is not None and isinstance(dest_path, str): + logger.warning( + ( + f"{type(self)} cannot set a destination path during broadcast. " + "Using session directory instead" + ) + ) + self.run_args["transfer"] = None + + def set_walltime(self, walltime): + """Set the maximum number of seconds that a job will run + + :param walltime: number like string of seconds that a job will run in secs + :type walltime: str + """ + logger.warning("set_walltime not supported under PALS") + + def format_run_args(self): + """Return a list of MPI-standard formatted run arguments + + :return: list of MPI-standard arguments for these settings + :rtype: list[str] + """ + # args launcher uses + args = [] + restricted = ["wdir", "wd"] + + for opt, value in self.run_args.items(): + if opt not in restricted: + prefix = "--" + if not value: + args += [prefix + opt] + else: + args += [prefix + opt, str(value)] + return args + + def format_env_vars(self): + """Format the environment variables for mpirun + + :return: list of env vars + :rtype: list[str] + """ + formatted = [] + + export_vars = [] + if self.env_vars: + for name, value in self.env_vars.items(): + if value: + formatted += ['--env', "=".join((name, str(value)))] + else: + export_vars.append(name) + + if export_vars: + formatted += ['--envlist', ','.join(export_vars)] + + return formatted diff --git a/tests/test_configs/mpi_impl_stubs/intel2019/mpiexec b/tests/test_configs/mpi_impl_stubs/intel2019/mpiexec deleted file mode 100755 index ba0c93ce9..000000000 --- a/tests/test_configs/mpi_impl_stubs/intel2019/mpiexec +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -echo "Intel(R) MPI Library for Linux* OS, Version 2019 Update 9 Build 20200923 (id: abd58e492) -Copyright 2003-2020, Intel Corporation. -" \ No newline at end of file diff --git a/tests/test_configs/mpi_impl_stubs/intel2019/mpirun b/tests/test_configs/mpi_impl_stubs/intel2019/mpirun deleted file mode 100755 index ba0c93ce9..000000000 --- a/tests/test_configs/mpi_impl_stubs/intel2019/mpirun +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -echo "Intel(R) MPI Library for Linux* OS, Version 2019 Update 9 Build 20200923 (id: abd58e492) -Copyright 2003-2020, Intel Corporation. -" \ No newline at end of file diff --git a/tests/test_configs/mpi_impl_stubs/intel2019/orterun b/tests/test_configs/mpi_impl_stubs/intel2019/orterun deleted file mode 100755 index 1149e1459..000000000 --- a/tests/test_configs/mpi_impl_stubs/intel2019/orterun +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -echo "Not a real orterun" \ No newline at end of file diff --git a/tests/test_configs/mpi_impl_stubs/slurm/mpiexec b/tests/test_configs/mpi_impl_stubs/slurm/mpiexec new file mode 100755 index 000000000..ad1357c15 --- /dev/null +++ b/tests/test_configs/mpi_impl_stubs/slurm/mpiexec @@ -0,0 +1,29 @@ +#!/bin/sh + +# Based on running mpiexec.slurm without loading a specific MPI +# implementation + +echo " +Usage: + mpiexec.slurm args executable pgmargs + + where args are comannd line arguments for mpiexec (see below), + executable is the name of the eecutable and pgmargs are command line + arguments for the executable. For example the following command will run + the MPI program a.out on 4 processes: + + mpiexec.slurm -n 4 a.out + + mpiexec.slurm supports the following options: + + [-n nprocs] + [-host hostname] + [-verbose] + [-nostdin] + [-allstdin] + [-nostdout] + [-pernode] + [-config config_file] + [-help|-?] + [-man] +" diff --git a/tests/test_model.py b/tests/test_model.py index d6e3786ba..ae7e5009b 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -3,7 +3,7 @@ from smartsim import Experiment from smartsim.error import EntityExistsError, SSUnsupportedError from smartsim.settings import RunSettings -from smartsim.settings.mpirunSettings import _OpenMPISettings +from smartsim.settings.mpiSettings import _BaseMPISettings def test_register_incoming_entity_preexists(): @@ -27,10 +27,18 @@ def test_disable_key_prefixing(): def test_catch_colo_mpmd_model(): exp = Experiment("experiment", launcher="local") - rs = _OpenMPISettings("python", exe_args="sleep.py") + rs = _BaseMPISettings( + "python", + exe_args="sleep.py", + fail_if_missing_exec=False + ) # make it an mpmd model - rs_2 = _OpenMPISettings("python", exe_args="sleep.py") + rs_2 = _BaseMPISettings( + "python", + exe_args="sleep.py", + fail_if_missing_exec=False + ) rs.make_mpmd(rs_2) model = exp.create_model("bad_colo_model", rs) diff --git a/tests/test_openmpi_settings.py b/tests/test_mpi_settings.py similarity index 66% rename from tests/test_openmpi_settings.py rename to tests/test_mpi_settings.py index 05b29acf7..fba065877 100644 --- a/tests/test_openmpi_settings.py +++ b/tests/test_mpi_settings.py @@ -3,35 +3,36 @@ import os.path as osp import stat import sys -from asyncio.log import logger -from unicodedata import name import pytest -from smartsim.error import SSUnsupportedError -from smartsim.settings.mpirunSettings import ( +from smartsim.error import SSUnsupportedError, LauncherError +from smartsim.settings.mpiSettings import ( MpiexecSettings, MpirunSettings, OrterunSettings, - _OpenMPISettings, + _BaseMPISettings, ) +# Throw a warning instead of failing on machines without an MPI implementation +default_mpi_args = (sys.executable,) +default_mpi_kwargs = {"fail_if_missing_exec":False} @pytest.mark.parametrize( - "OpenMPISettings", [MpirunSettings, MpiexecSettings, OrterunSettings] + "MPISettings", [MpirunSettings, MpiexecSettings, OrterunSettings] ) -def test_not_instanced_if_not_found(OpenMPISettings): +def test_not_instanced_if_not_found(MPISettings): old_path = os.getenv("PATH") try: os.environ["PATH"] = "" - with pytest.raises(FileNotFoundError): - OpenMPISettings(sys.executable) + with pytest.raises(LauncherError): + MPISettings(*default_mpi_args) finally: os.environ["PATH"] = old_path @pytest.mark.parametrize( - "OpenMPISettings,stubs_path,stub_exe", + "MPISettings,stubs_path,stub_exe", [ pytest.param( MpirunSettings, @@ -54,9 +55,9 @@ def test_not_instanced_if_not_found(OpenMPISettings): ], ) def test_expected_openmpi_instance_without_warning( - OpenMPISettings, stubs_path, stub_exe, fileutils, caplog + MPISettings, stubs_path, stub_exe, fileutils, caplog ): - from smartsim.settings.mpirunSettings import logger + from smartsim.settings.mpiSettings import logger old_path = os.environ.get("PATH") old_prop = logger.propagate @@ -72,7 +73,7 @@ def test_expected_openmpi_instance_without_warning( os.environ["PATH"] = stubs_path with caplog.at_level(logging.WARNING): caplog.clear() - OpenMPISettings(sys.executable) + MPISettings(*default_mpi_args, **default_mpi_kwargs) for rec in caplog.records: if logging.WARNING <= rec.levelno: pytest.fail( @@ -86,65 +87,27 @@ def test_expected_openmpi_instance_without_warning( logger.propagate = old_prop -@pytest.mark.parametrize( - "OpenMPISettings,stubs_path,stub_exe", - [ - pytest.param( - MpirunSettings, - osp.join("mpi_impl_stubs", "intel2019"), - "mpirun", - id="OpenMPI4-mpirun", - ), - pytest.param( - MpiexecSettings, - osp.join("mpi_impl_stubs", "intel2019"), - "mpiexec", - id="OpenMPI4-mpiexec", - ), - pytest.param( - OrterunSettings, - osp.join("mpi_impl_stubs", "intel2019"), - "orterun", - id="OpenMPI4-orterun", - ), - ], -) -def test_warning_if_not_expected_openmpi( - OpenMPISettings, stubs_path, stub_exe, fileutils, caplog -): - from smartsim.settings.mpirunSettings import logger +def test_error_if_slurm_mpiexec(fileutils): + stubs_path = osp.join("mpi_impl_stubs","slurm") + stubs_path = fileutils.get_test_dir_path(stubs_path) + stub_exe = osp.join(stubs_path, "mpiexec") old_path = os.environ.get("PATH") - old_prop = logger.propagate - logger.propagate = True try: - stubs_path = fileutils.get_test_dir_path(stubs_path) - stub_exe = osp.join(stubs_path, stub_exe) st = os.stat(stub_exe) if not st.st_mode & stat.S_IEXEC: os.chmod(stub_exe, st.st_mode | stat.S_IEXEC) os.environ["PATH"] = stubs_path - with caplog.at_level(logging.WARNING): - caplog.clear() - OpenMPISettings(sys.executable) - - for rec in caplog.records: - if ( - logging.WARNING <= rec.levelno < logging.ERROR - and "Non-OpenMPI implementation" in rec.msg - ): - break - else: - pytest.fail("No Non-OpenMPI warning given to user") + with pytest.raises(SSUnsupportedError): + MpiexecSettings(sys.executable) finally: os.environ["PATH"] = old_path - logger.propagate = old_prop -def test_openmpi_base_settings(): - settings = _OpenMPISettings("python") +def test_base_settings(): + settings = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) settings.set_cpus_per_task(1) settings.set_tasks(2) settings.set_hostlist(["node005", "node006"]) @@ -153,12 +116,16 @@ def test_openmpi_base_settings(): assert formatted == result -def test_openmpi_base_args(): +def test_mpi_base_args(): run_args = { "map-by": "ppr:1:node", "np": 1, } - settings = _OpenMPISettings("python", run_args=run_args) + settings = _BaseMPISettings( + *default_mpi_args, + run_args=run_args, + **default_mpi_kwargs + ) formatted = settings.format_run_args() result = ["--map-by", "ppr:1:node", "--np", "1"] assert formatted == result @@ -168,25 +135,25 @@ def test_openmpi_base_args(): assert formatted == result -def test_openmpi_add_mpmd(): - settings = _OpenMPISettings("python") - settings_2 = _OpenMPISettings("python") +def test_mpi_add_mpmd(): + settings = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) + settings_2 = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) settings.make_mpmd(settings_2) assert len(settings.mpmd) > 0 assert settings.mpmd[0] == settings_2 def test_catch_colo_mpmd(): - settings = _OpenMPISettings("python") + settings = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) settings.colocated_db_settings = {"port": 6379, "cpus": 1} - settings_2 = _OpenMPISettings("python") + settings_2 = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) with pytest.raises(SSUnsupportedError): settings.make_mpmd(settings_2) def test_format_env(): env_vars = {"OMP_NUM_THREADS": 20, "LOGGING": "verbose"} - settings = _OpenMPISettings("python", env_vars=env_vars) + settings = _BaseMPISettings(*default_mpi_args, env_vars=env_vars, **default_mpi_kwargs) settings.update_env({"OMP_NUM_THREADS": 10}) formatted = settings.format_env_vars() result = [ @@ -199,26 +166,26 @@ def test_format_env(): def test_mpirun_hostlist_errors(): - settings = _OpenMPISettings("python") + settings = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) with pytest.raises(TypeError): settings.set_hostlist(4) def test_mpirun_hostlist_errors_1(): - settings = _OpenMPISettings("python") + settings = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) with pytest.raises(TypeError): settings.set_hostlist([444]) @pytest.mark.parametrize("reserved_arg", ["wd", "wdir"]) def test_no_set_reserved_args(reserved_arg): - srun = _OpenMPISettings("python") + srun = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) srun.set(reserved_arg) assert reserved_arg not in srun.run_args def test_set_cpus_per_task(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_cpus_per_task(6) assert rs.run_args["cpus-per-proc"] == 6 @@ -227,7 +194,7 @@ def test_set_cpus_per_task(): def test_set_tasks_per_node(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_tasks_per_node(6) assert rs.run_args["npernode"] == 6 @@ -236,7 +203,7 @@ def test_set_tasks_per_node(): def test_set_tasks(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_tasks(6) assert rs.run_args["n"] == 6 @@ -245,7 +212,7 @@ def test_set_tasks(): def test_set_hostlist(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_hostlist(["host_A", "host_B"]) assert rs.run_args["host"] == "host_A,host_B" @@ -257,7 +224,7 @@ def test_set_hostlist(): def test_set_hostlist_from_file(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_hostlist_from_file("./path/to/hostfile") assert rs.run_args["hostfile"] == "./path/to/hostfile" @@ -266,7 +233,7 @@ def test_set_hostlist_from_file(): def test_set_verbose(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_verbose_launch(True) assert "verbose" in rs.run_args @@ -278,7 +245,7 @@ def test_set_verbose(): def test_quiet_launch(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_quiet_launch(True) assert "quiet" in rs.run_args @@ -290,7 +257,7 @@ def test_quiet_launch(): def test_set_broadcast(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_broadcast() assert "preload-binary" in rs.run_args @@ -299,7 +266,7 @@ def test_set_broadcast(): def test_set_time(): - rs = _OpenMPISettings("python") + rs = _BaseMPISettings(*default_mpi_args, **default_mpi_kwargs) rs.set_time(minutes=1, seconds=12) assert rs.run_args["timeout"] == "72" diff --git a/tests/test_pals_settings.py b/tests/test_pals_settings.py new file mode 100644 index 000000000..1928f3171 --- /dev/null +++ b/tests/test_pals_settings.py @@ -0,0 +1,56 @@ +import pytest + +import sys + +from smartsim.error import SSUnsupportedError +from smartsim.settings import PalsMpiexecSettings + +default_exe = sys.executable +default_kwargs = { + 'fail_if_missing_exec':False +} + +# Uncomment when +#@pytest.mark.parametrize( +# "function_name",[ +# 'set_task_map', +# 'set_cpus_per_task', +# 'set_quiet_launch', +# 'set_walltime' +# ] +#) +#def test_unsupported_methods(function_name): +# settings = PalsMpiexecSettings(default_exe, **default_kwargs) +# func = getattr(settings, function_name) +# with pytest.raises(SSUnsupportedError): +# func(None) + +def test_cpu_binding_type(): + settings = PalsMpiexecSettings(default_exe, **default_kwargs) + settings.set_cpu_binding_type('numa') + assert settings.format_run_args() == ['--cpu-bind', 'numa'] + +def test_tasks_per_node(): + settings = PalsMpiexecSettings(default_exe, **default_kwargs) + settings.set_tasks_per_node(48) + assert settings.format_run_args() == ['--ppn', '48'] + +def test_broadcast(): + settings = PalsMpiexecSettings(default_exe, **default_kwargs) + settings.set_broadcast() + assert settings.format_run_args() == ['--transfer'] + +def test_format_env_vars(): + example_env_vars = { + 'FOO_VERSION':'3.14', + 'PATH':None, + 'LD_LIBRARY_PATH':None + } + settings = PalsMpiexecSettings( + default_exe, + **default_kwargs, + env_vars=example_env_vars + ) + formatted = ' '.join(settings.format_env_vars()) + expected = '--env FOO_VERSION=3.14 --envlist PATH,LD_LIBRARY_PATH' + assert formatted == expected \ No newline at end of file