diff --git a/docs/src/code/algo.rst b/docs/src/code/algo.rst index 6608ac189..190948304 100644 --- a/docs/src/code/algo.rst +++ b/docs/src/code/algo.rst @@ -2,8 +2,6 @@ Algorithm modules ***************** -TODO - .. automodule:: orion.algo :members: @@ -17,5 +15,6 @@ TODO algo/gridsearch algo/hyperband algo/asha + algo/pbt algo/tpe algo/parallel_strategy diff --git a/docs/src/code/algo/asha.rst b/docs/src/code/algo/asha.rst index 948544981..6c4ebf6d6 100644 --- a/docs/src/code/algo/asha.rst +++ b/docs/src/code/algo/asha.rst @@ -1,8 +1,5 @@ Asynchronous Successive Halving Algorithm ========================================= -Can't build documentation because of import order. -Sphinx is loading ``orion.algo.asha`` before ``orion.algo`` and therefore -there is a cycle between the definition of ``BaseAlgorithm`` and -``ASHA`` as the meta-class ``Factory`` is trying to import ``ASHA``. -`PR #135 `_ should get rid of this problem. +.. automodule:: orion.algo.asha + :members: diff --git a/docs/src/code/algo/base.rst b/docs/src/code/algo/base.rst index e8a7ba5a8..e53af00d0 100644 --- a/docs/src/code/algo/base.rst +++ b/docs/src/code/algo/base.rst @@ -3,5 +3,3 @@ Base definition of algorithms .. autoclass:: orion.algo.base.BaseAlgorithm :members: - - diff --git a/docs/src/code/algo/pbt.rst b/docs/src/code/algo/pbt.rst new file mode 100644 index 000000000..014f77420 --- /dev/null +++ b/docs/src/code/algo/pbt.rst @@ -0,0 +1,89 @@ +Population Based Training +========================= + +.. contents:: + :depth: 3 + :local: + +.. role:: hidden + :class: hidden-section + +Population Based Training +------------------------- + +.. autoclass:: orion.algo.pbt.pbt.PBT + :members: + +LineageNode +----------- + +.. autoclass:: orion.algo.pbt.pbt.LineageNode + :members: + +Lineages +-------- + +.. autoclass:: orion.algo.pbt.pbt.Lineages + :members: + +Exploit classes for Population Based Training +--------------------------------------------- + +BaseExploit +~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.exploit.BaseExploit + :members: + + +PipelineExploit +~~~~~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.exploit.PipelineExploit + :members: + + +TruncateExploit +~~~~~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.exploit.TruncateExploit + :members: + +BacktrackExploit +~~~~~~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.exploit.BacktrackExploit + :members: + +Explore classes for Population Based Training +--------------------------------------------- + +BaseExplore +~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.explore.BaseExplore + :members: + + +PipelineExplore +~~~~~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.explore.PipelineExplore + :members: + + +PerturbExplore +~~~~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.explore.PerturbExplore + :members: + +ResampleExplore +~~~~~~~~~~~~~~~ + +.. autoclass:: orion.algo.pbt.explore.ResampleExplore + :members: + + + + diff --git a/docs/src/code/core/evc.rst b/docs/src/code/core/evc.rst index c5192d6d1..1adbef54a 100644 --- a/docs/src/code/core/evc.rst +++ b/docs/src/code/core/evc.rst @@ -8,7 +8,6 @@ Experiment Version Control :maxdepth: 1 :caption: Modules - evc/tree evc/experiment evc/adapters evc/conflicts diff --git a/docs/src/code/core/utils.rst b/docs/src/code/core/utils.rst index ebe67027f..c00da4c3d 100644 --- a/docs/src/code/core/utils.rst +++ b/docs/src/code/core/utils.rst @@ -9,6 +9,7 @@ Utilities utils/format_trials utils/format_terminal utils/singleton + utils/tree .. automodule:: orion.core.utils :members: diff --git a/docs/src/code/core/evc/tree.rst b/docs/src/code/core/utils/tree.rst similarity index 51% rename from docs/src/code/core/evc/tree.rst rename to docs/src/code/core/utils/tree.rst index 92472dfe4..10705fa67 100644 --- a/docs/src/code/core/evc/tree.rst +++ b/docs/src/code/core/utils/tree.rst @@ -1,5 +1,5 @@ Generic Tree ============ -.. automodule:: orion.core.evc.tree +.. automodule:: orion.core.utils.tree :members: diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst index 37ae20753..58ee71994 100644 --- a/docs/src/user/algorithms.rst +++ b/docs/src/user/algorithms.rst @@ -212,6 +212,69 @@ Configuration executed_times, compute_bracket_idx +.. _PBT: + +Population Based Training (PBT) +------------------------------- + +Population based training is an evolutionary algorithm that evolve trials +from low fidelity levels to high fidelity levels (ex: number of epochs), reusing +the model's parameters along the way. This has the effect of creating hyperparameter +schedules through the fidelity levels. + +See documentation below for more information on the algorithm and how to use it. + +.. note:: + + Current implementation does not support more than one fidelity dimension. + +Configuration +~~~~~~~~~~~~~ + +.. code-block:: yaml + + experiment: + + strategy: StubParallelStrategy + + algorithms: + pbt: + population_size: 50 + generations: 10 + fork_timeout: 60 + exploit: + of_type: PipelineExploit + exploit_configs: + - of_type: BacktrackExploit + min_forking_population: 5 + truncation_quantile: 0.9 + candidate_pool_ratio: 0.2 + - of_type: TruncateExploit + min_forking_population: 5 + truncation_quantile: 0.8 + candidate_pool_ratio: 0.2 + explore: + of_type: PipelineExplore + explore_configs: + - of_type: ResampleExplore + probability: 0.2 + - of_type: PerturbExplore + factor: 1.2 + volatility: 0.0001 + + + +.. note:: + Notice the additional ``strategy`` in configuration which is not mandatory for most other + algorithms. See :ref:`StubParallelStrategy` for more information. + + +.. autoclass:: orion.algo.pbt.pbt.PBT + :noindex: + :exclude-members: space, state_dict, set_state, suggest, observe, is_done, seed_rng, + configuration, requires_type, rng, register + + .. _tpe-algorithm: diff --git a/setup.py b/setup.py index b8a7aac87..b22693321 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ packages = [ # Packages must be sorted alphabetically to ease maintenance and merges. "orion.algo", + "orion.algo.pbt", "orion.analysis", "orion.benchmark", "orion.client", @@ -53,6 +54,7 @@ "hyperband = orion.algo.hyperband:Hyperband", "tpe = orion.algo.tpe:TPE", "EvolutionES = orion.algo.evolution_es:EvolutionES", + "pbt = orion.algo.pbt.pbt:PBT", ], "Database": [ "ephemeraldb = orion.core.io.database.ephemeraldb:EphemeralDB", diff --git a/src/orion/algo/base.py b/src/orion/algo/base.py index 012b1b2a8..07697b893 100644 --- a/src/orion/algo/base.py +++ b/src/orion/algo/base.py @@ -161,7 +161,7 @@ def format_trial(self, trial): return trial - def get_id(self, trial, ignore_fidelity=False): + def get_id(self, trial, ignore_fidelity=False, ignore_parent=False): """Return unique hash for a trials based on params The trial is assumed to be in the transformed space if the algorithm is working in a @@ -174,6 +174,10 @@ def get_id(self, trial, ignore_fidelity=False): ignore_fidelity: bool, optional If True, the fidelity dimension is ignored when computing a unique hash for the trial. Defaults to False. + ignore_parent: bool, optional + If True, the parent id is ignored when computing a unique hash for + the trial. Defaults to False. + """ # Apply transforms and reverse to see data as it would come from DB @@ -188,6 +192,7 @@ def get_id(self, trial, ignore_fidelity=False): ignore_fidelity=ignore_fidelity, ignore_experiment=True, ignore_lie=True, + ignore_parent=ignore_parent, ) @property @@ -357,8 +362,8 @@ def judge(self, trial, measurements): # pylint:disable=no-self-use,unused-argum trial: ``orion.core.worker.trial.Trial`` Trial object to retrieve from the database - Notes: - ------ + Notes + ----- Calling algorithm to `judge` a `point` based on its online `measurements` will effectively change a state in the algorithm (like a reinforcement learning agent's hidden state or an diff --git a/src/orion/algo/hyperband.py b/src/orion/algo/hyperband.py index dbc3cdc92..924f37794 100644 --- a/src/orion/algo/hyperband.py +++ b/src/orion/algo/hyperband.py @@ -192,8 +192,10 @@ def sample_from_bracket(self, bracket, num): params={self.fidelity_index: bracket.rungs[0]["resources"]} ) - full_id = self.get_id(trial, ignore_fidelity=False) - id_wo_fidelity = self.get_id(trial, ignore_fidelity=True) + full_id = self.get_id(trial, ignore_fidelity=False, ignore_parent=False) + id_wo_fidelity = self.get_id( + trial, ignore_fidelity=True, ignore_parent=True + ) bracket_id = self.trial_to_brackets.get(id_wo_fidelity, None) if bracket_id is not None: @@ -262,7 +264,7 @@ def set_state(self, state_dict): def register_samples(self, bracket, samples): for sample in samples: - full_id = self.get_id(sample, ignore_fidelity=False) + full_id = self.get_id(sample, ignore_fidelity=False, ignore_parent=False) if self.has_observed(sample): raise RuntimeError( "Hyperband resampling a trial that was already completed. " @@ -273,9 +275,12 @@ def register_samples(self, bracket, samples): self.register(sample) bracket.register(sample) - if self.get_id(sample, ignore_fidelity=True) not in self.trial_to_brackets: + if ( + self.get_id(sample, ignore_fidelity=True, ignore_parent=True) + not in self.trial_to_brackets + ): self.trial_to_brackets[ - self.get_id(sample, ignore_fidelity=True) + self.get_id(sample, ignore_fidelity=True, ignore_parent=True) ] = self.brackets.index(bracket) def promote(self, num): @@ -384,7 +389,7 @@ def create_brackets(self): def _get_bracket(self, trial): """Get the bracket of a trial""" - _id_wo_fidelity = self.get_id(trial, ignore_fidelity=True) + _id_wo_fidelity = self.get_id(trial, ignore_fidelity=True, ignore_parent=True) return self.brackets[self.trial_to_brackets[_id_wo_fidelity]] def observe(self, trials): @@ -474,7 +479,9 @@ def is_filled(self): def get_trial_max_resource(self, trial): """Return the max resource value that has been tried for a trial""" max_resource = 0 - _id_wo_fidelity = self.hyperband.get_id(trial, ignore_fidelity=True) + _id_wo_fidelity = self.hyperband.get_id( + trial, ignore_fidelity=True, ignore_parent=True + ) for rung in self.rungs: if _id_wo_fidelity in rung["results"]: max_resource = rung["resources"] @@ -511,7 +518,9 @@ def sample(self, num): def register(self, trial): """Register a trial in the corresponding rung""" - self._get_results(trial)[self.hyperband.get_id(trial, ignore_fidelity=True)] = ( + self._get_results(trial)[ + self.hyperband.get_id(trial, ignore_fidelity=True, ignore_parent=True) + ] = ( trial.objective.value if trial.objective else None, copy.deepcopy(trial), ) @@ -562,7 +571,7 @@ def get_candidates(self, rung_id): while len(trials) + len(next_rung) < should_have_n_trials: objective, trial = rung[i] assert objective is not None - _id = self.hyperband.get_id(trial, ignore_fidelity=True) + _id = self.hyperband.get_id(trial, ignore_fidelity=True, ignore_parent=True) if _id not in next_rung: trials.append(trial) i += 1 diff --git a/src/orion/algo/pbt/exploit.py b/src/orion/algo/pbt/exploit.py new file mode 100644 index 000000000..ff36db87d --- /dev/null +++ b/src/orion/algo/pbt/exploit.py @@ -0,0 +1,372 @@ +""" +Exploit classes for Population Based Training +--------------------------------------------- + +Formulation of a general exploit function for population based training. +Implementations must inherit from ``orion.algo.pbt.BaseExploit``. + +Exploit objects can be created using `exploit_factory.create()`. + +Examples +-------- +>>> exploit_factory.create('TruncateExploit') +>>> exploit_factory.create('TruncateExploit', min_forking_population=10) + +""" + +import logging + +import numpy + +from orion.core.utils import GenericFactory + +logger = logging.getLogger(__name__) + + +class BaseExploit: + """Abstract class for Exploit in :py:class:`orion.algo.pbt.pbt.PBT` + + The exploit class is responsible for deciding whether the Population Based Training algorithm + should continue training a trial configuration at next fidelity level or whether it should fork + from another trial configuration. + + This class is expected to be stateless and serve as a configurable callable object. + """ + + def __init__(self): + pass + + def __call__(self, rng, trial, lineages): + """Execute exploit + + The method receives the current trial under examination and all lineages of + population based training. It must then decide whether the trial should be promoted + (continue with a higher fidelity) or if another trial should be forked instead. + + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExploit`` because the exploit + class must be stateless. + trial: Trial + The :py:class:`orion.core.worker.trial.Trial` that is currently under examination. + lineages: Lineages + All :py:class:`orion.algo.pbt.pbt.Lineages` created by the population based training + algorithm that is using this exploit class. + + Returns + ------- + ``None`` + The exploit class signals that there are not enough completed trials in lineages to make + a decision for current trial. + ``Trial`` + If the returned trial is the same as the one received as argument, it means that + population based training should continue with same parameters. + If another trial from the lineages is returned, it means that population based training + should try to explore new parameters. + + """ + pass + + @property + def configuration(self): + """Configuration of the exploit object""" + return dict(of_type=self.__class__.__name__.lower()) + + +class PipelineExploit(BaseExploit): + """ + Pipeline of BaseExploit objects + + The pipeline executes the BaseExploit objects sequentially. If one object returns + `None`, the pipeline is stopped and it returns `None`. Likewise, if one object returns + a trial different than the one passed, the pipeline is stopped and this trial is returned. + Otherwise, if all BaseExploit objects return the same trial as the one passed to the pipeline, + then the pipeline returns it. + + Parameters + ---------- + exploit_configs: list of dict + List of dictionary representing the configurations of BaseExploit children. + + Examples + -------- + >>> PipelineExploit( + exploit_configs=[ + {'of_type': 'BacktrackExploit'}, + {'of_type': 'TruncateExploit'} + ]) + """ + + def __init__(self, exploit_configs): + self.pipeline = [] + for exploit_config in exploit_configs: + self.pipeline.append(exploit_factory.create(**exploit_config)) + + def __call__(self, rng, trial, lineages): + """Execute exploit objects sequentially + + If one object returns `None`, the pipeline is stopped and it returns `None`. Likewise, if + one object returns a trial different than the one passed, the pipeline is stopped and this + trial is returned. Otherwise, if all BaseExploit objects return the same trial as the one + passed to the pipeline, then the pipeline returns it. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExploit`` because the exploit + class must be stateless. + trial: Trial + The :py:class:`orion.core.worker.trial.Trial` that is currently under examination. + lineages: Lineages + All :py:class:`orion.algo.pbt.pbt.Lineages` created by the population based training + algorithm that is using this exploit class. + + Returns + ------- + ``None`` + The exploit class signals that there are not enough completed trials in lineages to make + a decision for current trial. + ``Trial`` + If the returned trial is the same as the one received as argument, it means that + population based training should continue with same parameters. + If another trial from the lineages is returned, it means that population based training + should try to explore new parameters. + + """ + + for exploit in self.pipeline: + logger.debug("Executing %s", exploit.__class__.__name__) + selected_trial = exploit(rng, trial, lineages) + if selected_trial is not trial: + logger.debug( + "Exploit %s selected trial %s over %s", + exploit.__class__.__name__, + selected_trial, + trial, + ) + return selected_trial + else: + logger.debug( + "Exploit %s is skipping for trial %s", + exploit.__class__.__name__, + trial, + ) + + return trial + + @property + def configuration(self): + """Configuration of the exploit object""" + configuration = super(PipelineExploit, self).configuration + configuration["exploit_configs"] = [ + exploit.configuration for exploit in self.pipeline + ] + return configuration + + +class TruncateExploit(BaseExploit): + """Truncate Exploit + + If the given trial is under a ``truncation_quantile`` compared to all other trials that + has reached the same fidelity level, then a new candidate trial is selected for forking. + The new candidate is selected from a pool of best ``candidate_pool_ratio``\% of the available + trials at the same fidelity level. + + If there are less than ``min_forking_population`` trials that have reached the fidelity level + as the passed trial, then `None` is return to signal that we should reconsider this trial later + on when more trials are completed at this fidelity level. + + Parameters + ---------- + min_forking_population: int, optional + Minimum number of trials that should be completed up to the fidelity level of the current + trial passed. TruncateExploit will return ``None`` when this requirement is not met. + Default: 5 + truncation_quantile: float, optional + If the passed trial's objective is above quantile ``truncation_quantile``, then another + candidate is considered for forking. Default: 0.8 + candidate_pool_ratio: float, optional + When choosing another candidate for forking, it will be randomly selected from the + best ``candidate_pool_ratio``\% of the available trials. Default: 0.2 + + """ + + def __init__( + self, + min_forking_population=5, + truncation_quantile=0.8, + candidate_pool_ratio=0.2, + ): + self.min_forking_population = min_forking_population + self.truncation_quantile = truncation_quantile + self.candidate_pool_ratio = candidate_pool_ratio + + def __call__(self, rng, trial, lineages): + """Select other trial if current one not good enough + + If the given trial is under a ``self.truncation_quantile`` compared to all other trials that + has reached the same fidelity level, then a new candidate trial is selected for forking. + The new candidate is selected from a pool of best ``self.candidate_pool_ratio``\% of the + available trials at the same fidelity level. + + If there are less than ``self.min_forking_population`` trials that have reached the fidelity + level as the passed trial, then `None` is return to signal that we should reconsider this + trial later on when more trials are completed at this fidelity level. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExploit`` because the exploit + class must be stateless. + trial: Trial + The :py:class:`orion.core.worker.trial.Trial` that is currently under examination. + lineages: Lineages + All :py:class:`orion.algo.pbt.pbt.Lineages` created by the population based training + algorithm that is using this exploit class. + + Returns + ------- + ``None`` + The exploit class signals that there are not enough completed trials in lineages to make + a decision for current trial. + ``Trial`` + If the returned trial is the same as the one received as argument, it means that + population based training should continue with same parameters. + If another trial from the lineages is returned, it means that population based training + should try to explore new parameters. + + """ + + trials = lineages.get_trials_at_depth(trial) + return self._truncate(rng, trial, trials) + + def _truncate( + self, + rng, + trial, + trials, + ): + completed_trials = [trial for trial in trials if trial.status == "completed"] + + if len(completed_trials) < self.min_forking_population: + logger.debug( + "Not enough trials completed to exploit: %s", len(completed_trials) + ) + return None + + if trial not in completed_trials: + raise ValueError( + f"Trial {trial.id} not included in list of completed trials." + ) + + sorted_trials = sorted( + completed_trials, key=lambda trial: trial.objective.value + ) + + worse_trials = sorted_trials[ + int(self.truncation_quantile * len(sorted_trials)) : + ] + + if trial not in worse_trials: + logger.debug("Trial %s is good enough, no need to exploit.", trial) + return trial + + candidate_threshold_index = int(self.candidate_pool_ratio * len(sorted_trials)) + + if candidate_threshold_index == 0: + logger.warning( + "Not enough completed trials to have a candidate pool. " + "You should consider increasing min_forking_population or candidate_pool_ratio" + ) + return None + + index = rng.choice(numpy.arange(0, candidate_threshold_index)) + return sorted_trials[index] + + @property + def configuration(self): + """Configuration of the exploit object""" + configuration = super(TruncateExploit, self).configuration + configuration.update( + dict( + min_forking_population=self.min_forking_population, + truncation_quantile=self.truncation_quantile, + candidate_pool_ratio=self.candidate_pool_ratio, + ) + ) + + return configuration + + +class BacktrackExploit(TruncateExploit): + """ + Backtracking Exploit + + This exploit is inspired from PBT with backtracking proposed in [1]. + Instead of using all trials at the same level of fidelity as in + ``TruncateExploit``, it selects + the best trials from each lineage (worker), one per lineage. The objective of the + best trial is compared to the objective of the trial under analysis, and if the ratio + is higher than some treshold the current trial is not promoted. A trial from the pool + of best trials is selected randomly. + + The backtracking threshold + defined by [1] is unstable however and cause division error by 0 when the best candidate trial + has an objective of 0. Also, if we select trials at any fidelity levels, we would + likely drop any trial at a low fidelity in favor of best trials at high fidelity. + This class use a quantile threshold instead of the ratio in [1] to determine if a trial should + be continued at next fidelity level. The candidates for forking are select from + best trials from all running lineages (workers), like proposed in [1], but limited to trials + up to the fidelity level of the current trial under analysis. + + [1] Zhang, Baohe, Raghu Rajan, Luis Pineda, Nathan Lambert, André Biedenkapp, Kurtland Chua, + Frank Hutter, and Roberto Calandra. "On the importance of hyperparameter optimization for + model-based reinforcement learning." In International Conference on Artificial Intelligence and + Statistics, pp. 4015-4023. PMLR, 2021. + """ + + def __call__(self, rng, trial, lineages): + """Select other trial if current one not good enough + + If the given trial is under a ``self.truncation_quantile`` compared to all other best + trials with lower or equal fidelity level, + then a new candidate trial is selected for forking. + The new candidate is selected from a pool of best ``self.candidate_pool_ratio``\% of the + best trials with lower or equal fidelity level. See class description for more + explanation on the rationale. + + If there are less than ``self.min_forking_population`` trials that have reached the fidelity + level as the passed trial, then `None` is return to signal that we should reconsider this + trial later on when more trials are completed at this fidelity level. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExploit`` because the exploit + class must be stateless. + trial: Trial + The :py:class:`orion.core.worker.trial.Trial` that is currently under examination. + lineages: Lineages + All :py:class:`orion.algo.pbt.pbt.Lineages` created by the population based training + algorithm that is using this exploit class. + + Returns + ------- + ``None`` + The exploit class signals that there are not enough completed trials in lineages to make + a decision for current trial. + ``Trial`` + If the returned trial is the same as the one received as argument, it means that + population based training should continue with same parameters. + If another trial from the lineages is returned, it means that population based training + should try to explore new parameters. + + """ + + elites = lineages.get_elites(max_depth=trial) + return self._truncate(rng, trial, elites + [trial]) + + +exploit_factory = GenericFactory(BaseExploit) diff --git a/src/orion/algo/pbt/explore.py b/src/orion/algo/pbt/explore.py new file mode 100644 index 000000000..59597073f --- /dev/null +++ b/src/orion/algo/pbt/explore.py @@ -0,0 +1,363 @@ +""" +Explore classes for Population Based Training +--------------------------------------------- + +Formulation of a general explore function for population based training. +Implementations must inherit from ``orion.algo.pbt.BaseExplore``. + +Explore objects can be created using `explore_factory.create()`. + +Examples +-------- +>>> explore_factory.create('PerturbExplore') +>>> explore_factory.create('PerturbExplore', factor=1.5) + +""" + +import numpy + +from orion.core.utils import GenericFactory +from orion.core.utils.flatten import flatten, unflatten + + +class BaseExplore: + """Abstract class for Explore in :py:class:`orion.algo.pbt.pbt.PBT` + + The explore class is responsible for proposing new parameters for a given trial and space. + + This class is expected to be stateless and serve as a configurable callable object. + """ + + def __init__(self): + pass + + def __call__(self, rng, space, params): + """Execute explore + + The method receives the space and the parameters of the current trial under examination. + It must then select new parameters for the trial. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExplore`` because the explore + class must be stateless. + space: Space + The search space optimized by the algorithm. + params: dict + Dictionary representing the parameters of the current trial under examination + (`trial.params`). + + Returns + ------- + ``dict`` + The new set of parameters for the trial to be branched. + + """ + + pass + + @property + def configuration(self): + """Configuration of the exploit object""" + return dict(of_type=self.__class__.__name__.lower()) + + +class PipelineExplore(BaseExplore): + """ + Pipeline of BaseExploit objects + + The pipeline executes the BaseExplore objects sequentially. If one object returns + the parameters that are different than the ones passed (``params``), then the pipeline + returns these parameter values. Otherwise, if all BaseExplore objects return the same + parameters as the one passed to the pipeline, then the pipeline returns it. + + Parameters + ---------- + explore_configs: list of dict + List of dictionary representing the configurations of BaseExplore children. + + Examples + -------- + This pipeline is useful if for instance you want to sample from the space with a small + probability, but otherwise use a local perturbation. + + >>> PipelineExplore( + explore_configs=[ + {'of_type': 'ResampleExplore', probability=0.05}, + {'of_type': 'PerturbExplore'} + ]) + + """ + + def __init__(self, explore_configs): + self.pipeline = [] + for explore_config in explore_configs: + self.pipeline.append(explore_factory.create(**explore_config)) + + def __call__(self, rng, space, params): + """Execute explore objects sequentially + + If one explore object returns the parameters that are different than the ones passed + (``params``), then the pipeline returns these parameter values. Otherwise, if all + BaseExplore objects return the same parameters as the one passed to the pipeline, then the + pipeline returns it. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExplore`` because the explore + class must be stateless. + space: Space + The search space optimized by the algorithm. + params: dict + Dictionary representing the parameters of the current trial under examination + (`trial.params`). + + Returns + ------- + ``dict`` + The new set of parameters for the trial to be branched. + + """ + + for explore in self.pipeline: + new_params = explore(rng, space, params) + if new_params is not params: + return new_params + + return params + + @property + def configuration(self): + """Configuration of the exploit object""" + configuration = super(PipelineExplore, self).configuration + configuration["explore_configs"] = [ + explore.configuration for explore in self.pipeline + ] + return configuration + + +class PerturbExplore(BaseExplore): + """ + Perturb parameters for exploration + + Given a set of parameter values, this exploration object randomly perturb + them with a given ``factor``. It will multiply the value of a dimension + with probability 0.5, otherwise divide it. Values are clamped to limits of the + search space when exceeding it. For categorical dimensions, a new value is sampled + from categories with equal probability for each categories. + + Parameters + ---------- + factor: float, optional + Factor used to multiply or divide with probability 0.5 the values of the dimensions. + Only applies to real or int dimensions. Integer dimensions are pushed to next integer + if ``new_value > value`` otherwise reduced to previous integer, where new_value is + the result of either ``value * factor`` or ``value / factor``. + Categorial dimensions are sampled from categories randomly. Default: 1.2 + volatility: float, optional + If the results of ``value * factor`` or ``value / factor`` exceeds the + limit of the search space, the new value is set to limit and then added + or substracted ``abs(normal(0, volatility))`` (if at lower limit or upper limit). + Default: 0.0001 + + Notes + ----- + Categorical dimensions with special probabilities are not supported for now. A category + with be sampled with equal probability for each categories. + + """ + + def __init__(self, factor=1.2, volatility=0.0001): + self.factor = factor + self.volatility = volatility + + def perturb_real(self, rng, dim_value, interval): + """Perturb real value dimension + + Parameters + ---------- + rng: numpy.random.Generator + Random number generator + dim_value: float + Value of the dimension + interval: tuple of float + Limit of the dimension (lower, upper) + + """ + if rng.random() > 0.5: + dim_value *= self.factor + else: + dim_value *= 1.0 / self.factor + + if dim_value > interval[1]: + dim_value = max( + interval[1] - numpy.abs(rng.normal(0, self.volatility)), interval[0] + ) + elif dim_value < interval[0]: + dim_value = min( + interval[0] + numpy.abs(rng.normal(0, self.volatility)), interval[1] + ) + + return dim_value + + def perturb_int(self, rng, dim_value, interval): + """Perturb integer value dimension + + Parameters + ---------- + rng: numpy.random.Generator + Random number generator + dim_value: int + Value of the dimension + interval: tuple of int + Limit of the dimension (lower, upper) + + """ + + new_dim_value = self.perturb_real(rng, dim_value, interval) + + rounded_new_dim_value = int(numpy.round(new_dim_value)) + + if rounded_new_dim_value == dim_value and new_dim_value > dim_value: + new_dim_value = dim_value + 1 + elif rounded_new_dim_value == dim_value and new_dim_value < dim_value: + new_dim_value = dim_value - 1 + else: + new_dim_value = rounded_new_dim_value + + # Avoid out of dimension. + new_dim_value = min(max(new_dim_value, interval[0]), interval[1]) + + return new_dim_value + + def perturb_cat(self, rng, dim_value, dim): + """Perturb categorical dimension + + Parameters + ---------- + rng: numpy.random.Generator + Random number generator + dim_value: object + Value of the dimension, can be any type. + dim: orion.algo.space.CategoricalDimension + CategoricalDimension object defining the search space for this dimension. + + """ + return rng.choice(dim.interval()) + + def __call__(self, rng, space, params): + """Execute perturbation + + Given a set of parameter values, this exploration object randomly perturb them with a given + ``factor``. It will multiply the value of a dimension with probability 0.5, otherwise divide + it. Values are clamped to limits of the search space when exceeding it. For categorical + dimensions, a new value is sampled from categories with equal probability for each + categories. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExplore`` because the explore + class must be stateless. + space: Space + The search space optimized by the algorithm. + params: dict + Dictionary representing the parameters of the current trial under examination + (`trial.params`). + + Returns + ------- + ``dict`` + The new set of parameters for the trial to be branched. + + """ + + new_params = {} + params = flatten(params) + for dim in space.values(): + dim_value = params[dim.name] + if dim.type == "real": + dim_value = self.perturb_real(rng, dim_value, dim.interval()) + elif dim.type == "integer": + dim_value = self.perturb_int(rng, dim_value, dim.interval()) + elif dim.type == "categorical": + dim_value = self.perturb_cat(rng, dim_value, dim) + elif dim.type == "fidelity": + # do nothing + pass + else: + raise ValueError(f"Unsupported dimension type {dim.type}") + + new_params[dim.name] = dim_value + + return unflatten(new_params) + + @property + def configuration(self): + """Configuration of the exploit object""" + configuration = super(PerturbExplore, self).configuration + configuration["factor"] = self.factor + configuration["volatility"] = self.volatility + return configuration + + +class ResampleExplore(BaseExplore): + """ + Sample parameters search space + + With given probability ``probability``, it will sample a new set of parameters + from the search space totally independently of the ``parameters`` passed to ``__call__``. + Otherwise, it will return the passed ``parameters``. + + Parameters + ---------- + probability: float, optional + Probability of sampling a new set of parameters. Default: 0.2 + + """ + + def __init__(self, probability=0.2): + self.probability = probability + + def __call__(self, rng, space, params): + """Execute resampling + + With given probability ``self.probability``, it will sample a new set of parameters from the + search space totally independently of the ``parameters`` passed to ``__call__``. Otherwise, + it will return the passed ``parameters``. + + Parameters + ---------- + rng: numpy.random.Generator + A random number generator. It is not contained in ``BaseExplore`` because the explore + class must be stateless. + space: Space + The search space optimized by the algorithm. + params: dict + Dictionary representing the parameters of the current trial under examination + (`trial.params`). + + Returns + ------- + ``dict`` + The new set of parameters for the trial to be branched. + + """ + + if rng.random() < self.probability: + trial = space.sample(1, seed=tuple(rng.randint(0, 1000000, size=3)))[0] + params = trial.params + + return params + + @property + def configuration(self): + """Configuration of the exploit object""" + configuration = super(ResampleExplore, self).configuration + configuration["probability"] = self.probability + return configuration + + +explore_factory = GenericFactory(BaseExplore) diff --git a/src/orion/algo/pbt/pbt.py b/src/orion/algo/pbt/pbt.py new file mode 100644 index 000000000..760b66be0 --- /dev/null +++ b/src/orion/algo/pbt/pbt.py @@ -0,0 +1,891 @@ +# -*- coding: utf-8 -*- +""" +Population Based Training +========================= + +""" +import copy +import logging +import shutil +import time + +import numpy + +from orion.algo.base import BaseAlgorithm +from orion.algo.pbt.exploit import exploit_factory +from orion.algo.pbt.explore import explore_factory +from orion.algo.random import Random +from orion.core.utils.flatten import flatten, unflatten +from orion.core.utils.tree import TreeNode + +logger = logging.getLogger(__name__) + + +SPACE_ERROR = """ +PBT cannot be used if space does not contain a fidelity dimension. +For more information on the configuration and usage of Hyperband, see +https://orion.readthedocs.io/en/develop/user/algorithms.html#pbt +""" + + +def get_objective(trial): + if trial.objective and trial.objective.value is not None: + return trial.objective.value + + return float("inf") + + +def compute_fidelities(n_branching, low, high, base): + if base == 1: + return numpy.linspace(low, high, num=n_branching + 1, endpoint=True).tolist() + else: + budgets = numpy.logspace( + numpy.log(low) / numpy.log(base), + numpy.log(high) / numpy.log(base), + n_branching + 1, + base=base, + endpoint=True, + ) + + return budgets.tolist() + + +class PBT(BaseAlgorithm): + """Population Based Training algorithm + + Population based training is an evolutionary algorithm that evolve trials + from low fidelity levels to high fidelity levels (ex: number of epochs). + For a population of size `m`, it first samples `m` trials at lowest fidelity level. + When trials are completed, it decides based on the ``exploit`` configuration whether + the trial should be promoted to next fidelity level or whether another trial + should be selected instead and forked. When a trial is forked, new hyperparameters are + selected based on the trials hyperparameters and the ``explore`` configuration. + The original trial's working_dir is then copied over to the new trial's working_dir + so that the user script can resume execution from model parameters of original trial. + + It is important that the weights of models trained for each trial are saved in the corresponding + directory at path ``trial.working_dir``. The file name does not matter. The entire directory is + copied to a new ``trial.working_dir`` when PBT selects a good model and explore new + hyperparameters. The new trial can be resumed by the user by loading the weigths found in the + freshly copied ``new_trial.working_dir``, and saved back at the same path at end of trial + execution. To access ``trial.working_dir`` from Oríon's commandline API, see documentation at + https://orion.readthedocs.io/en/stable/user/script.html#command-line-templating. To access + ``trial.working_dir`` from Oríon's Python API, set argument ``trial_arg="trial"`` when executing + method :py:meth:`orion.client.experiment.ExperimentClient.workon`. + + The number of fidelity levels is determined by the argument ``generations``. The lowest + and highest fidelity levels, and the distrubition, is determined by the search space's + dimension that will have a prior ``fidelity(low, high, base)``, where ``base`` is the + logarithm base of the dimension. Original PBT algorithm uses a base of 1. + + PBT will try to return as many trials as possible when calling ``suggest(num)``, up to ``num``. + When ``population_size`` trials are sampled and more trials are requested, it will try to + generate new trials by promoting or forking existing trials in a queue. This queue will get + filled when calling ``observe(trials)`` on completed or broken trials. + + If trials are broken at lowest fidelity level, they are ignored and will not count + in population size so that PBT can sample additional trials to reach ``population_size`` + completed trials at lowest fidelity. If a trial is broken at higher fidelity, the + original trial leading to the broken trial is examinated again for ``exploit`` and ``explore``. + If the broken trial was the result of a fork, then we backtrack to the trial that was dropped + during ``exploit`` in favor of the forked trial. If the broken trial was a promotion, then + we backtrack to the original trial that was promoted. + + For more information on the algorithm, + see original paper at https://arxiv.org/abs/1711.09846. + + Jaderberg, Max, et al. "Population based training of neural networks." + arXiv preprint, arXiv:1711.09846 (2017). + + Notes + ----- + It is important that the experiment using this algorithm has a working directory properly + set. The experiment's working dir serve as the base for the trial's working directories. + + The trial's working directory is ``trial.working_dir``. This is where the weights of the model + should be saved. Using ``trial.hash_params`` to determine a unique working dir for the trial + will result in working on a different directory than the one copied by PBT, hence missing the + copied model parameters. + + Parameters + ---------- + space: `orion.algo.space.Space` + Optimisation space with priors for each dimension. + seed: None, int or sequence of int + Seed for the random number generator used to sample new trials. + Default: ``None`` + population_size: int, optional + Size of the population. No trial will be continued until there are `population_size` + trials executed until lowest fidelity. If a trial is broken during execution at lowest + fidelity, the algorithm will sample a new trial, keeping the population of *non-broken* + trials at `population_size`. For efficiency it is better to have less workers running than + population_size. Default: 50. + generations: int, optional + Number of generations, from lowest fidelity to highest one. This will determine how + many branchings occur during the execution of PBT. Default: 10 + exploit: dict or None, optional + Configuration for a ``pbt.exploit.BaseExploit`` object that determines + when if a trial should be exploited or not. If None, default configuration + is a ``PipelineExploit`` with ``BacktrackExploit`` and ``TruncateExploit``. + explore: dict or None, optional + Configuration for a ``pbt.explore.BaseExplore`` object that returns new parameter + values for exploited trials. If None, default configuration is a ``PipelineExplore`` with + ``ResampleExplore`` and ``PerturbExplore``. + fork_timeout: int, optional + Maximum amount of time in seconds that an attempt to mutate a trial should take, otherwise + algorithm.suggest() will raise ``SuggestionTimeout``. Default: 60 + + """ + + requires_type = None + requires_dist = "linear" + requires_shape = "flattened" + + def __init__( + self, + space, + seed=None, + population_size=50, + generations=10, + exploit=None, + explore=None, + fork_timeout=60, + ): + if exploit is None: + exploit = { + "of_type": "PipelineExploit", + "exploit_configs": [ + { + "of_type": "BacktrackExploit", + "min_forking_population": 5, + "truncation_quantile": 0.9, + "candidate_pool_ratio": 0.2, + }, + { + "of_type": "TruncateExploit", + "min_forking_population": 5, + "truncation_quantile": 0.8, + "candidate_pool_ratio": 0.2, + }, + ], + } + + if explore is None: + explore = { + "of_type": "PipelineExplore", + "explore_configs": [ + {"of_type": "ResampleExplore", "probability": 0.2}, + {"of_type": "PerturbExplore", "factor": 1.2, "volatility": 0.0001}, + ], + } + + self.random_search = Random(space) + self._queue = [] + + fidelity_index = self.fidelity_index + if fidelity_index is None: + raise RuntimeError(SPACE_ERROR) + + self.fidelity_dim = space[fidelity_index] + + self.fidelities = compute_fidelities( + generations, + self.fidelity_dim.low, + self.fidelity_dim.high, + self.fidelity_dim.base, + ) + self.fidelity_upgrades = { + a: b for a, b in zip(self.fidelities, self.fidelities[1:]) + } + logger.info("Executing PBT with fidelities: %s", self.fidelities) + + self.exploit_func = exploit_factory.create(**exploit) + self.explore_func = explore_factory.create(**explore) + + self.lineages = Lineages() + self._lineage_dropped_head = {} + + super(PBT, self).__init__( + space, + seed=seed, + population_size=population_size, + generations=generations, + exploit=exploit, + explore=explore, + fork_timeout=fork_timeout, + ) + + @property + def space(self): + """Return transformed space of PBT""" + return self.random_search.space + + @space.setter + def space(self, space): + """Set the space of PBT and initialize it""" + self.random_search.space = space + + @property + def rng(self): + """Random Number Generator""" + return self.random_search.rng + + def seed_rng(self, seed): + """Seed the state of the random number generator. + + Parameters + ---------- + seed: int + Integer seed for the random number generator. + """ + self.random_search.seed_rng(seed) + + @property + def state_dict(self): + """Return a state dict that can be used to reset the state of the algorithm.""" + state_dict = super(PBT, self).state_dict + state_dict["random_search"] = self.random_search.state_dict + state_dict["lineages"] = copy.deepcopy(self.lineages) + state_dict["queue"] = copy.deepcopy(self._queue) + return state_dict + + def set_state(self, state_dict): + """Reset the state of the algorithm based on the given state_dict""" + super(PBT, self).set_state(state_dict) + self.random_search.set_state(state_dict["random_search"]) + self.lineages = state_dict["lineages"] + self._queue = state_dict["queue"] + + @property + def _num_root(self): + """Number of trials with lowest fidelity level that are not broken.""" + return sum( + int(lineage.root.item.status != "broken") for lineage in self.lineages + ) + + @property + def is_done(self): + """Is done if ``population_size`` trials at highest fidelity level are completed.""" + n_completed = 0 + final_depth = self._get_depth_of(self.fidelity_dim.high) + for trial in self.lineages.get_trials_at_depth(final_depth): + n_completed += int(trial.status == "completed") + + return n_completed >= self.population_size + + def register(self, trial): + """Save the trial as one suggested or observed by the algorithm + + The trial is additionally saved in the lineages object of PBT. + + Parameters + ---------- + trial: ``orion.core.worker.trial.Trial`` + Trial from a `orion.algo.space.Space`. + + """ + super(PBT, self).register(trial) + self.lineages.register(trial) + + def suggest(self, num): + """Suggest a ``num`` ber of new sets of parameters. + + PBT will try to sample up to ``population_size`` trials at lowest fidelity level. + If more trials are required, it will try to promote or fork trials based on the queue + of available trials observed. + + Parameters + ---------- + num: int + Number of points to suggest. The algorithm may return less than the number of points + requested. + + Returns + ------- + list of trials + A list of trials representing values suggested by the algorithm. + + """ + + # Sample points until num is met, or population_size + num_random_samples = min(max(self.population_size - self._num_root, 0), num) + logger.debug( + "PBT has %s pending or completed trials at root, %s broken trials.", + self._num_root, + len(self.lineages) - self._num_root, + ) + logger.debug("Sampling %s new trials", num_random_samples) + trials = self._sample(num_random_samples) + logger.debug("Sampled %s new trials", len(trials)) + logger.debug( + "After sampling, PBT has %s pending or completed trials at root, %s broken trials.", + self._num_root, + len(self.lineages) - self._num_root, + ) + + # Then try branching based on observed_queue until num is met or queue is exhausted. + num_fork_samples = max(num - len(trials), 0) + logger.debug( + "Attempting Forking %s trials, with %s trials queued available for forking", + num_fork_samples, + len(self._queue), + ) + forked_trials = self._fork_lineages(num_fork_samples) + logger.debug("Forked %s new trials", len(forked_trials)) + logger.debug( + "After forking, PBT has %s pending or completed trials at root, %s broken trials.", + self._num_root, + len(self.lineages) - self._num_root, + ) + + trials += forked_trials + + return trials + + def _sample(self, num): + """Sample trials based on random search""" + sampled_trials = self.random_search.suggest(num) + + trials = [] + for trial in sampled_trials: + branched_trial = trial.branch( + params={self.fidelity_dim.name: self.fidelity_dim.low} + ) + self.register(branched_trial) + trials.append(branched_trial) + + return trials + + def _get_depth_of(self, fidelity): + """Get the depth of a fidelity in the lineages""" + return self.fidelities.index(fidelity) + + def _fork_lineages(self, num): + """Try to promote or fork up to ``num`` trials from the queue.""" + + branched_trials = [] + skipped_trials = [] + + while len(branched_trials) < num and self._queue: + trial = self._queue.pop(0) + + trial_to_branch, new_trial = self._generate_offspring(trial) + + if trial_to_branch is None: + logger.debug("Skipping trial %s", trial) + skipped_trials.append(trial) + continue + + self.lineages.fork(trial_to_branch, new_trial) + + if trial is not trial_to_branch: + logger.debug("Dropped trial %s in favor of %s", trial, trial_to_branch) + self.lineages.set_jump(trial, new_trial) + + logger.debug("Forking trial %s to %s", trial_to_branch, new_trial) + + branched_trials.append(new_trial) + self.register(new_trial) + + self._queue = skipped_trials + self._queue + + return branched_trials + + def _generate_offspring(self, trial): + """Try to promote or fork a given trial.""" + + new_trial = trial + + if not self.has_suggested(new_trial): + raise RuntimeError( + "Trying to fork a trial that was not registered yet. This should never happen" + ) + + attempts = 0 + start = time.perf_counter() + while ( + self.has_suggested(new_trial) + and time.perf_counter() - start <= self.fork_timeout + ): + trial_to_explore = self.exploit_func( + self.rng, + trial, + self.lineages, + ) + + if trial_to_explore is None: + return None, None + elif trial_to_explore is trial: + new_params = {} + trial_to_branch = trial + logger.debug("Promoting trial %s, parameters stay the same.", trial) + else: + new_params = flatten( + self.explore_func(self.rng, self.space, trial_to_explore.params) + ) + trial_to_branch = trial_to_explore + logger.debug( + "Forking trial %s with new parameters %s", + trial_to_branch, + new_params, + ) + + # Set next level of fidelity + new_params[self.fidelity_index] = self.fidelity_upgrades[ + trial_to_branch.params[self.fidelity_index] + ] + + new_trial = trial_to_branch.branch(params=new_params) + new_trial = self.space.transform(self.space.reverse(new_trial)) + + logger.debug("Attempt %s - Creating new trial %s", attempts, new_trial) + + attempts += 1 + + if ( + self.has_suggested(new_trial) + and time.perf_counter() - start > self.fork_timeout + ): + # TODO: Replace with SuggestionTimeout or relevant Exception based on PR #684. + raise RuntimeError( + f"Could not generate unique new parameters for trial {trial.id} in " + f"less than {self.fork_timeout} seconds. Attempted {attempts} times." + ) + + return trial_to_branch, new_trial + + def _triage(self, trials): + """Triage observed trials and return those that may be queued.""" + + trials_to_verify = [] + + # First try to resume from trials if necessary, then only push to queue leafs + for trial in trials: + if not self.has_suggested(trial): + logger.debug("Ignoring unknown trial %s", trial) + continue + + if not self.has_observed(trial) and trial.status in ["completed", "broken"]: + logger.debug("Will verify trial %s for queue", trial) + trials_to_verify.append(trial) + + self.register(trial) + + return trials_to_verify + + def _queue_trials_for_promotions(self, trials): + """Queue trials if they are completed or ancestor trials if they are broken.""" + for trial in trials: + # TODO: On resumption, broken trials will be observed and will lead + # to retry. This is because jumps are lost. + if trial.status == "broken": + # Branch again from trial that lead to this broken one. + lineage_to_retry = self.lineages.get_lineage(trial).get_true_ancestor() + if lineage_to_retry: + logger.debug( + "Trial %s is broken, queuing ancestor %s to re-attempt forking.", + trial, + lineage_to_retry.item, + ) + self._queue.append(lineage_to_retry.item) + else: + logger.debug( + ( + "Trial %s from initial generation is broken, " + "new trials can be sampled at next suggest() call." + ), + trial, + ) + + elif trial.status == "completed": + logger.debug( + "Trial %s is completed, queuing it to attempt forking.", trial + ) + self._queue.append(trial) + + def observe(self, trials): + """Observe the trials and queue those available for promotion or forking. + + Parameters + ---------- + trials: list of ``orion.core.worker.trial.Trial`` + Trials from a `orion.algo.space.Space`. + + """ + # TODO: Jumps are lost during resumption. Need to save algo state to conserve them. + trials_to_verify = self._triage(trials) + self._queue_trials_for_promotions(trials_to_verify) + + +class Lineages: + """Lineages of trials for workers in PBT + + This class regroup all lineages of trials generated by PBT for a given experiment. + + Each lineage is a path from a leaf trial (highest fidelity level) up to the root + (lowest fidelity level). Multiple lineages can fork from the same root, forming a tree. + A Lineages object may reference multiple trees of lineages. Iterating a Lineages object will + iterate on the roots of these trees. + + """ + + def __init__(self): + self._lineage_roots = [] + self._trial_to_lineages = {} + + def __len__(self): + """Number of roots in the Lineages""" + return len(self._lineage_roots) + + def __iter__(self): + """Iterate over the roots of the Lineages""" + return iter(self._lineage_roots) + + def add(self, trial): + """Add a trial to the lineages + + If the trial is already in the lineages, this will only return the corresponding lineage + node. Otherwise, a new lineage node will be created and added as a root. + + Parameters + ---------- + trial: ``orion.core.worker.trial.Trial`` + Trial from a `orion.algo.space.Space`. + + Returns + ------- + orion.algo.pbt.pbt.LineageNode + The lineage node for the given trial. + + """ + if trial.id in self._trial_to_lineages: + return self._trial_to_lineages[trial.id] + + lineage = LineageNode(trial) + self._lineage_roots.append(lineage) + self._trial_to_lineages[trial.id] = lineage + return lineage + + def fork(self, base_trial, new_trial): + """Fork a base trial to a new one. + + The base trial should already be registered in the Lineages + + Parameters + ---------- + base_trial: ``orion.core.worker.trial.Trial`` + The base trial that will be the parent lineage node. + new_trial: ``orion.core.worker.trial.Trial`` + The new trial that will be the child lineage node. + + Raises + ------ + KeyError + If the base trial is not already registered in the Lineages + + """ + + new_lineage = self._trial_to_lineages[base_trial.id].fork(new_trial) + self._trial_to_lineages[new_trial.id] = new_lineage + return new_lineage + + def get_lineage(self, trial): + """Get the lineage node corresponding to a given trial. + + Parameters + ---------- + trial: ``orion.core.worker.trial.Trial`` + The trial for which the function should return the corresponding lineage node. + + Raises + ------ + KeyError + If the base trial is not already registered in the Lineages + """ + return self._trial_to_lineages[trial.id] + + def set_jump(self, base_trial, new_trial): + """Set a jump between two trials + + This jump is set to represent the relation between the base trial and the new trial. + This means the base trial was dropped during exploit and the new trial is the result + of a fork from another trial selected during exploit. + + Both trials should already be registered in the Lineages. + + Parameters + ---------- + base_trial: ``orion.core.worker.trial.Trial`` + The base trial that was dropped. + new_trial: ``orion.core.worker.trial.Trial`` + The new trial that was forked. + + Raises + ------ + KeyError + If the base trial or the new trial are not already registered in the Lineages. + + """ + self.get_lineage(base_trial).set_jump(self.get_lineage(new_trial)) + + def register(self, trial): + """Add or save the trial in the Lineages + + If the trial is not already in the Lineages, it is added as root. Otherwise, + the corresponding lineage node is updated with given trial object. + + Parameters + ---------- + trial: ``orion.core.worker.trial.Trial`` + The trial to register. + + """ + if trial.id not in self._trial_to_lineages: + lineage = self.add(trial) + else: + lineage = self.get_lineage(trial) + lineage.register(trial) + + return lineage + + def get_elites(self, max_depth=None): + """Get best trials of each lineage + + Each lineage is a path from a leaf to the root. When there is a forking, + the path followed is not from child (new trial) to parent (forked trial), but + rather to base trial (trial dropped). This is to represent the path taken + by the sequence of trial execution within a worker. This also avoids having + duplicate elite trials on different lineages. + + Best trials may be looked for up to a ``max_depth``. + + Parameters + ---------- + max_depth: int or ``orion.core.worker.trial.Trial``, optional + The maximum depth to look for best trials. It can be an int to represent the depth + directly, or a trial, from which the depth will be infered. If a trial, this trial + should be in the Lineages. Default: None, that is, no max depth. + """ + if max_depth and not isinstance(max_depth, int): + max_depth = self.get_lineage(max_depth).node_depth + + def get_parent_at_depth(node, depth): + while node.node_depth > depth: + node = node.parent + + return node + + trials = [] + for lineage in self._lineage_roots: + nodes = lineage.leafs + + if max_depth is not None: + trimmed_nodes = set() + for node in nodes: + node = get_parent_at_depth(node, max_depth) + trimmed_nodes.add(node) + nodes = list(trimmed_nodes) + + for node in nodes: + if node.jumps and ( + (max_depth is None) or (node.node_depth < max_depth) + ): + continue + + best_trial = node.get_best_trial() + if best_trial is not None: + trials.append(best_trial) + + return trials + + def get_trials_at_depth(self, trial_or_depth): + """Returns the trials or all lineages at a given depth + + Parameters + ---------- + trial_or_depth: int or ``orion.core.worker.trial.Trial`` + If an int, this represents the depth directly. If a trial, the depth will be infered + from it. This trial should be in the Lineages. + + Raises + ------ + KeyError + If depth is infered from trial but trial is not already registered in the Lineages + + """ + if isinstance(trial_or_depth, int): + depth = trial_or_depth + else: + depth = self.get_lineage(trial_or_depth).node_depth + + trials = [] + for lineage in self._lineage_roots: + for trial_node in lineage.get_nodes_at_depth(depth): + trials.append(trial_node.item) + + return trials + + +class LineageNode(TreeNode): + """ + Lineage node + + The lineage node is based on :py:class:`orion.core.utils.tree.TreeNode`. It provides + additional methods to help represent lineages for PBT, in particular, ``fork``, + ``set_jump``, ``get_true_ancestor`` and ``get_best_trial``. + + A lineage node can be connected to a parent and children, like a typical TreeNode, but + also to ``jumps`` and a ``base``. The jumps and base represent the connection between nodes + when PBT drops a trial and rather fork another one. In such case, the dropped trial + will refer to the new trial (the forked one) with ``jumps`` (it can refer to many if + the new trials crashed and required rollback) and the forked trial will refer to the + dropped one with ``base`` (it can only refer one). + + Parameters + ---------- + trial: ``orion.core.worker.trial.Trial`` + The trial to represent with the lineage node. + parent: LineageNode, optional + The parent node for this lineage node. Default: None, that is, no parent. + + """ + + def __init__(self, trial, parent=None): + super(LineageNode, self).__init__(copy.deepcopy(trial), parent=parent) + self._jump = TreeNode(self) + + @property + def tree_name(self): + """Name of the node for pretty printing.""" + return str(self.item) + + @property + def jumps(self): + """New trials generated from forks when dropping this node.""" + return [node.item for node in self._jump.children] + + @property + def base(self): + """Base trial that was dropped in favor of this forked trial, if this trial resulted from a + fork. + """ + return self._jump.parent.item if self._jump.parent else None + + def register(self, trial): + """Save the trial object. + + Register will copy the object so that any modifications on it externally will not + impact the interval representation of the Lineage node. + """ + self.item = copy.deepcopy(trial) + + def fork(self, new_trial): + """Fork the trial to the new one. + + A new lineage node refering to ``new_trial`` will be created and added as a child + to current node. + + The working directory of the current trial, ``trial.working_dir`` + will be copied to ``new_trial.working_dir``. + + Parameters + ---------- + new_trial: ``orion.core.worker.trial.Trial`` + A new trial that is a child of the current one. + + Returns + ------- + LineageNode + LineageNode refering to ``new_trial`` + + Raises + ------ + RuntimeError + The working directory of the trials is identical. This should never happen + since the working_dir is infered from a hash on trial parameters, and therefore + identical working_dir would imply that different trials have identical parameters. + + """ + if self.item.working_dir == new_trial.working_dir: + raise RuntimeError( + f"The new trial {new_trial.id} has the same working directory as " + f"trial {self.item.id}, which would lead to corrupted checkpoints. " + "This should never happen. Please " + "report at https://github.com/Epistimio/orion/issues" + ) + + try: + shutil.copytree(self.item.working_dir, new_trial.working_dir) + except FileExistsError as e: + raise FileExistsError( + f"Folder already exists for trial {new_trial.id}. This could be a folder " + "remaining from a previous experiment with same trial id." + ) from e + + return LineageNode(new_trial, parent=self) + + def set_jump(self, node): + """Set the jump to given node + + This will also have the effect of setting ``node.base = self``. + + Parameters + ---------- + node: LineageNode + Node to refer to as the jump targen for the current node. + + Raises + ------ + RuntimeError + If the given node already has a base. + + """ + if node._jump.parent is not None: + raise RuntimeError( + "Trying to jump to an existing node. Jumps to another lineage should only " + "occur on new nodes." + ) + + node._jump.set_parent(self._jump) + + def get_true_ancestor(self): + """Return the base if current trial is the result of a fork, otherwise return parent if is + has one, otherwise returns None.""" + if self.base is not None: + return self.base + + if self.parent is not None: + return self.parent + + return None + + def get_best_trial(self): + """Return best trial on the path from root up to this node. + + The path followed is through `true` ancestors, that is, looking at + base if the current node is the result of a fork, otherwise looking at the parent. + + Only leaf node trials may not be completed. If there is only one node in the tree + and the node's trial is not completed, ``None`` is returned instead of a trial object. + + Returns + ------- + ``None`` + Only one node in the tree and it is not completed. + + ``orion.core.worker.trial.Trial`` + Trial with best objective (lowest). + + """ + parent_node = self.get_true_ancestor() + + if parent_node: + parent_trial = parent_node.get_best_trial() + + if get_objective(parent_trial) <= get_objective(self.item): + return parent_trial + + if self.item.status != "completed": + return None + + return self.item diff --git a/src/orion/client/experiment.py b/src/orion/client/experiment.py index 5eb0b7228..b08e16cc6 100644 --- a/src/orion/client/experiment.py +++ b/src/orion/client/experiment.py @@ -15,6 +15,7 @@ import orion.core import orion.core.utils.format_trials as format_trials from orion.core.io.database import DuplicateKeyError +from orion.core.utils import backward from orion.core.utils.exceptions import ( BrokenExperiment, CompletedExperiment, @@ -24,6 +25,7 @@ WaitingForTrials, ) from orion.core.utils.flatten import flatten, unflatten +from orion.core.utils.working_dir import SetupWorkingDir from orion.core.worker.trial import Trial, TrialCM from orion.core.worker.trial_pacemaker import TrialPacemaker from orion.executor.base import executor_factory @@ -222,6 +224,11 @@ def working_dir(self): """Working directory of the experiment.""" return self._experiment.working_dir + @working_dir.setter + def working_dir(self, value): + """Working directory of the experiment.""" + self._experiment.working_dir = value + @property def producer(self): """Return the producer configuration of the experiment.""" @@ -797,20 +804,22 @@ def workon( self._experiment.max_trials = max_trials self._experiment.algorithms.algorithm.max_trials = max_trials - trials = self.executor.wait( - self.executor.submit( - self._optimize, - fct, - pool_size, - reservation_timeout, - max_trials_per_worker, - max_broken, - trial_arg, - on_error, - **kwargs, + with SetupWorkingDir(self): + + trials = self.executor.wait( + self.executor.submit( + self._optimize, + fct, + pool_size, + reservation_timeout, + max_trials_per_worker, + max_broken, + trial_arg, + on_error, + **kwargs, + ) + for _ in range(n_workers) ) - for _ in range(n_workers) - ) return sum(trials) @@ -834,6 +843,7 @@ def _optimize( with self.suggest( pool_size=pool_size, timeout=reservation_timeout ) as trial: + backward.ensure_trial_working_dir(self, trial) kwargs.update(flatten(trial.params)) diff --git a/src/orion/core/evc/experiment.py b/src/orion/core/evc/experiment.py index c8d8b5ca3..42ea9d8d9 100644 --- a/src/orion/core/evc/experiment.py +++ b/src/orion/core/evc/experiment.py @@ -17,7 +17,7 @@ import functools import logging -from orion.core.evc.tree import TreeNode +from orion.core.utils.tree import TreeNode from orion.storage.base import get_storage log = logging.getLogger(__name__) @@ -39,7 +39,7 @@ class ExperimentNode(TreeNode): .. seealso:: - :py:class:`orion.core.evc.tree.TreeNode` for tree-specific attributes and methods. + :py:class:`orion.core.utils.tree.TreeNode` for tree-specific attributes and methods. """ @@ -54,7 +54,7 @@ def __init__(self, name, version, experiment=None, parent=None, children=tuple() """Initialize experiment node with item, experiment, parent and children .. seealso:: - :class:`orion.core.evc.tree.TreeNode` for information about the attributes + :class:`orion.core.utils.tree.TreeNode` for information about the attributes """ super(ExperimentNode, self).__init__(experiment, parent, children) self.name = name diff --git a/src/orion/core/utils/backward.py b/src/orion/core/utils/backward.py index 4f5bf31b2..2dc5d7ac9 100644 --- a/src/orion/core/utils/backward.py +++ b/src/orion/core/utils/backward.py @@ -178,6 +178,15 @@ def algo_observe(algo, trials, results): """Convert trials so that algo can observe with legacy format (trials, results).""" for trial, trial_results in zip(trials, results): for name, trial_result in trial_results.items(): + if trial.exp_working_dir is None: + trial.exp_working_dir = "/nothing" + trial.status = "completed" trial.results.append(Trial.Result(name=name, type=name, value=trial_result)) algo.observe(trials) + + +def ensure_trial_working_dir(experiment, trial): + """If the trial's exp working dir is not set, set it to current experiment's working dir.""" + if not trial.exp_working_dir: + trial.exp_working_dir = experiment.working_dir diff --git a/src/orion/core/evc/tree.py b/src/orion/core/utils/tree.py similarity index 87% rename from src/orion/core/evc/tree.py rename to src/orion/core/utils/tree.py index 8b15ebe9b..4ff722dda 100644 --- a/src/orion/core/evc/tree.py +++ b/src/orion/core/utils/tree.py @@ -1,15 +1,8 @@ # -*- coding: utf-8 -*- """ -Tree data structure for the experiment version control system -============================================================= +Tree data structure +=================== -Tree data structure for the experiment version control system - -Experiment version control requires building trees of the experiments so -that we can fetch trials from one experiment to another or navigate from -one experiment to another to visualise different statistics. - -TreeNode and tree iterators support the tree data structure of the experiment version control. TreeNode is a generic class which can carry arbitrary python objects. It comes with basic methods to set parent and children. A method `map` allows to apply functions recursively on the tree in a generic manner. @@ -23,7 +16,7 @@ class PreOrderTraversal(object): Attributes ---------- - stack: list of `orion.core.evc.tree.TreeNode` + stack: list of `orion.core.utils.tree.TreeNode` Nodes logged during iteration """ @@ -56,9 +49,9 @@ class DepthFirstTraversal(object): Attributes ---------- - stack: list of `orion.core.evc.tree.TreeNode` + stack: list of `orion.core.utils.tree.TreeNode` Nodes logged during iteration - seen: set of `orion.core.evc.tree.TreeNode` + seen: set of `orion.core.utils.tree.TreeNode` Nodes which have been returned during iteration """ @@ -113,18 +106,18 @@ class TreeNode(object): Tree of nodes are iterable, by default with preorder traversal. .. seealso:: - `orion.core.evc.tree.PreOrderTraversal` - `orion.core.evc.tree.DepthFirstTraversal` + `orion.core.utils.tree.PreOrderTraversal` + `orion.core.utils.tree.DepthFirstTraversal` Attributes ---------- item: object Can be anything - parent: None or instance of `orion.core.evc.tree.TreeNode` + parent: None or instance of `orion.core.utils.tree.TreeNode` The parent of the current node, None if the current node is the root. - children: None or list of instances of `orion.core.evc.tree.TreeNode` + children: None or list of instances of `orion.core.utils.tree.TreeNode` The children of the curent node. - root: instance of `orion.core.evc.tree.TreeNode` + root: instance of `orion.core.utils.tree.TreeNode` The top node of the current tree. The root node returns itself. Examples @@ -185,7 +178,7 @@ def __init__(self, item, parent=None, children=tuple()): """Initialize node with item, parent and children .. seealso:: - :class:`orion.core.evc.tree.TreeNode` for information about the attributes + :class:`orion.core.utils.tree.TreeNode` for information about the attributes """ self._item = item self._parent = None @@ -226,7 +219,7 @@ def set_parent(self, node): dropping this current node from the previous parent's children list. .. seealso:: - `orion.core.evc.tree.TreeNode.drop_parent` + `orion.core.utils.tree.TreeNode.drop_parent` """ if node is self.parent: return @@ -272,7 +265,7 @@ def add_children(self, *nodes): Note that added children will have their parent set to the current node as well. .. seealso:: - `orion.core.evc.tree.TreeNode.drop_children` + `orion.core.utils.tree.TreeNode.drop_children` """ for child in nodes: if child is not None and not isinstance(child, TreeNode): @@ -297,6 +290,43 @@ def root(self): return self.parent.root + @property + def leafs(self): + """Get the leafs of the tree""" + leafs = [] + for child in self.children: + leafs += child.leafs + + if not leafs: + return [self] + + return leafs + + @property + def node_depth(self): + """The depth of the node in the tree with respect to the root node.""" + if self.parent: + return self.parent.node_depth + 1 + + return 0 + + def get_nodes_at_depth(self, depth): + """Returns a list of nodes at the corresponding depth. + + Depth is relative to current node. To get nodes at a depth relative + to the root, use ``node.root.get_nodes_at_depth(depth)``. + """ + + def has_depth(node, children): + if node.node_depth - self.node_depth == depth: + return [node], None + + return [], children + + nodes = self.map(has_depth, self.children) + + return sum([node.item for node in nodes], []) + def map(self, function, node): r"""Apply a function recursively on the tree diff --git a/src/orion/core/utils/working_dir.py b/src/orion/core/utils/working_dir.py index 6228cd8f7..be1d2a1a8 100644 --- a/src/orion/core/utils/working_dir.py +++ b/src/orion/core/utils/working_dir.py @@ -6,44 +6,51 @@ ContextManager class to create a permanent directory or a temporary one. """ +import logging import os import tempfile +log = logging.getLogger(__name__) + # pylint: disable=too-few-public-methods -class WorkingDir: - """ContextManager class for temporary or permanent directory.""" - - def __init__(self, working_dir, temp=True, suffix=None, prefix=None): - """Create the context manager with the given name. - - Parameters - ---------- - name : str, optional - Name of the directory. If empty, will create a temporary one. - - """ - self.working_dir = str(working_dir) - self._temp = temp - self._suffix = suffix - self._prefix = prefix +class SetupWorkingDir: + """ContextManager class for temporary or permanent directory. + + Parameters + ---------- + experiment: ``orion.client.experiment.ExperimentClient`` + Experiment for which the working directory will be created + + """ + + def __init__(self, experiment): + self.experiment = experiment + self.tmp = None self._tmpdir = None def __enter__(self): """Create the a permanent directory or a temporary one.""" - os.makedirs(self.working_dir, exist_ok=True) - if not self._temp: - path = os.path.join(self.working_dir, self._prefix + self._suffix) - os.makedirs(path, exist_ok=True) - return path + self.tmp = bool(not self.experiment.working_dir) + + if self.tmp: + base_path = os.path.join(tempfile.gettempdir(), "orion") + os.makedirs(base_path, exist_ok=True) + self._tmpdir = tempfile.TemporaryDirectory( + prefix=f"{self.experiment.name}-v{self.experiment.version}", + dir=self.experiment.working_dir, + ) + self.experiment.working_dir = self._tmpdir.name + else: + os.makedirs(self.experiment.working_dir, exist_ok=True) + + log.debug("Working directory at '%s':", self.experiment.working_dir) - self._tmpdir = tempfile.TemporaryDirectory( - suffix=self._suffix, prefix=self._prefix, dir=self.working_dir - ) - return self._tmpdir.name + return self.experiment.working_dir def __exit__(self, exc_type, exc_value, traceback): """Cleanup temporary directory.""" - if self._temp: + if self.tmp: self._tmpdir.cleanup() + self.experiment.working_dir = None diff --git a/src/orion/core/worker/consumer.py b/src/orion/core/worker/consumer.py index 9485b26e8..2514e3a8b 100644 --- a/src/orion/core/worker/consumer.py +++ b/src/orion/core/worker/consumer.py @@ -22,7 +22,6 @@ InexecutableUserScript, MissingResultFile, ) -from orion.core.utils.working_dir import WorkingDir log = logging.getLogger(__name__) @@ -99,11 +98,6 @@ def __init__( # Fetch space builder self.template_builder = OrionCmdlineParser(user_script_config) self.template_builder.set_state_dict(experiment.metadata["parser"]) - # Get path to user's script and infer trial configuration directory - if experiment.working_dir: - self.working_dir = os.path.abspath(experiment.working_dir) - else: - self.working_dir = os.path.join(tempfile.gettempdir(), "orion") self.pacemaker = None @@ -122,21 +116,13 @@ def __call__(self, trial, **kwargs): True if the trial was successfully executed. False if the trial is broken. """ - log.debug("Creating new directory at '%s':", self.working_dir) - temp_dir = not bool(self.experiment.working_dir) - prefix = self.experiment.name + "_" - suffix = trial.id + log.debug("Consumer context: %s", trial.working_dir) + os.makedirs(trial.working_dir, exist_ok=True) - with WorkingDir( - self.working_dir, temp_dir, prefix=prefix, suffix=suffix - ) as workdirname: - log.debug("New consumer context: %s", workdirname) - trial.working_dir = workdirname + results_file = self._consume(trial, trial.working_dir) - results_file = self._consume(trial, workdirname) - - log.debug("Parsing results from file and fill corresponding Trial object.") - results = self.retrieve_results(results_file) + log.debug("Parsing results from file and fill corresponding Trial object.") + results = self.retrieve_results(results_file) return results diff --git a/src/orion/core/worker/experiment.py b/src/orion/core/worker/experiment.py index 90a54a290..336bcc318 100644 --- a/src/orion/core/worker/experiment.py +++ b/src/orion/core/worker/experiment.py @@ -354,6 +354,7 @@ def register_trial(self, trial, status="new"): trial.experiment = self._id trial.status = status trial.submit_time = stamp + trial.exp_working_dir = self.working_dir self._storage.register_trial(trial) diff --git a/src/orion/core/worker/producer.py b/src/orion/core/worker/producer.py index 55a5fa384..c5ed65f03 100644 --- a/src/orion/core/worker/producer.py +++ b/src/orion/core/worker/producer.py @@ -112,7 +112,9 @@ def _update_params_hashes(self, trials): """Register locally all param hashes of trials""" for trial in trials: self.params_hashes.add( - Trial.compute_trial_hash(trial, ignore_experiment=True, ignore_lie=True) + Trial.compute_trial_hash( + trial, ignore_experiment=True, ignore_lie=True, ignore_parent=True + ) ) def update(self): diff --git a/src/orion/core/worker/trial.py b/src/orion/core/worker/trial.py index 7fc130a86..2a7131d6a 100644 --- a/src/orion/core/worker/trial.py +++ b/src/orion/core/worker/trial.py @@ -10,6 +10,7 @@ import copy import hashlib import logging +import os from orion.core.utils.exceptions import InvalidResult from orion.core.utils.flatten import unflatten @@ -178,14 +179,14 @@ class Param(Value): "_id", "_status", "worker", - "_working_dir", + "_exp_working_dir", "heartbeat", "submit_time", "start_time", "end_time", "_results", "_params", - "parents", + "parent", "id_override", ) allowed_stati = ( @@ -200,7 +201,7 @@ class Param(Value): def __init__(self, **kwargs): """See attributes of `Trial` for meaning and possible arguments for `kwargs`.""" for attrname in self.__slots__: - if attrname in ("_results", "_params", "parents"): + if attrname in ("_results", "_params"): setattr(self, attrname, list()) else: setattr(self, attrname, None) @@ -211,7 +212,9 @@ def __init__(self, **kwargs): self.id_override = kwargs.pop("_id", None) for attrname, value in kwargs.items(): - if attrname == "results": + if attrname == "parents": + log.info("Trial.parents attribute is deprecated. Value is ignored.") + elif attrname == "results": attr = getattr(self, attrname) for item in value: attr.append(self.Result(**item)) @@ -257,16 +260,18 @@ def branch(self, status="new", params=None): if params: raise ValueError(f"Some parameters are not part of base trial: {params}") - return Trial(status=status, params=config_params) + return Trial( + status=status, + params=config_params, + parent=self.id, + exp_working_dir=self.exp_working_dir, + ) def to_dict(self): """Needed to be able to convert `Trial` to `dict` form.""" trial_dictionary = dict() for attrname in self.__slots__: - if attrname == "_working_dir": - continue - attrname = attrname.lstrip("_") trial_dictionary[attrname] = getattr(self, attrname) @@ -313,15 +318,40 @@ def results(self, results): self._results = results + def get_working_dir( + self, + ignore_fidelity=False, + ignore_experiment=False, + ignore_lie=False, + ignore_parent=False, + ): + if not self.exp_working_dir: + raise RuntimeError( + "Cannot infer trial's working_dir because trial.exp_working_dir is not set." + ) + trial_hash = self.compute_trial_hash( + self, + ignore_fidelity=ignore_fidelity, + ignore_experiment=ignore_experiment, + ignore_lie=ignore_lie, + ignore_parent=ignore_parent, + ) + return os.path.join(self.exp_working_dir, trial_hash) + @property def working_dir(self): """Return the current working directory of the trial.""" - return self._working_dir + return self.get_working_dir() + + @property + def exp_working_dir(self): + """Return the current working directory of the experiment.""" + return self._exp_working_dir - @working_dir.setter - def working_dir(self, value): - """Change the current working directory of the trial.""" - self._working_dir = value + @exp_working_dir.setter + def exp_working_dir(self, value): + """Change the current base working directory of the trial.""" + self._exp_working_dir = value @property def status(self): @@ -400,7 +430,17 @@ def hash_params(self): .. note:: The params contributing to the hash do not include the fidelity. """ - return self.compute_trial_hash(self, ignore_fidelity=True, ignore_lie=True) + return self.compute_trial_hash( + self, ignore_fidelity=True, ignore_lie=True, ignore_parent=True + ) + + def __eq__(self, other): + """Whether two trials are equal is based on id alone. + + This includes params, experiment, parent and lie. All other attributes of the + trials are ignored when comparing them. + """ + return self.id == other.id def __hash__(self): """Return the hashname for this trial""" @@ -416,29 +456,6 @@ def full_name(self): ) return self.format_values(self._params, sep="-").replace("/", ".") - def _fetch_results(self, type, results): - """Fetch results for the given type""" - return [result for result in results if result.type == type] - - def _fetch_one_result_of_type(self, result_type, results=None): - if results is None: - results = self.results - - value = self._fetch_results(result_type, results) - - if not value: - return None - - if len(value) > 1: - log.warning("Found multiple results of '%s' type:\n%s", result_type, value) - log.warning( - "Multi-objective optimization is not currently supported.\n" - "Optimizing according to the first one only: %s", - value[0], - ) - - return value[0] - def _repr_values(self, values, sep=","): """Represent with a string the given values.""" return Trial.format_values(values, sep) @@ -463,7 +480,11 @@ def format_params(params, sep=",", ignore_fidelity=False): @staticmethod def compute_trial_hash( - trial, ignore_fidelity=False, ignore_experiment=False, ignore_lie=False + trial, + ignore_fidelity=False, + ignore_experiment=False, + ignore_lie=False, + ignore_parent=False, ): """Generate a unique param md5sum hash for a given `Trial`""" if not trial._params and not trial.experiment: @@ -482,10 +503,39 @@ def compute_trial_hash( if not ignore_lie and trial.lie: lie_repr = Trial.format_values([trial.lie]) + # TODO: When implementing TrialClient, we should compute the hash of the parent + # based on the same ignore_ attributes. For now we use the full id of the parent. + parent_repr = "" + if not ignore_parent and trial.parent is not None: + parent_repr = str(trial.parent) + return hashlib.md5( - (params + experiment_repr + lie_repr).encode("utf-8") + (params + experiment_repr + lie_repr + parent_repr).encode("utf-8") ).hexdigest() + def _fetch_results(self, type, results): + """Fetch results for the given type""" + return [result for result in results if result.type == type] + + def _fetch_one_result_of_type(self, result_type, results=None): + if results is None: + results = self.results + + value = self._fetch_results(result_type, results) + + if not value: + return None + + if len(value) > 1: + log.warning("Found multiple results of '%s' type:\n%s", result_type, value) + log.warning( + "Multi-objective optimization is not currently supported.\n" + "Optimizing according to the first one only: %s", + value[0], + ) + + return value[0] + class TrialCM: __slots__ = ("_cm_experiment", "_cm_trial") diff --git a/src/orion/testing/algo.py b/src/orion/testing/algo.py index 4d6f8237d..a1b6979d0 100644 --- a/src/orion/testing/algo.py +++ b/src/orion/testing/algo.py @@ -338,7 +338,7 @@ def assert_dim_type_supported(self, mocker, num, attr, test_space): assert trials[0] in space spy.call_count == 1 self.observe_trials(trials, algo, 1) - self.assert_callbacks(spy, num, algo) + self.assert_callbacks(spy, num + 1, algo) def test_configuration(self): """Test that configuration property attribute contains all class arguments.""" @@ -409,7 +409,7 @@ def test_seed_rng(self, mocker, num, attr): self.force_observe(algo.n_observed, new_algo) assert trials[0].id == new_algo.suggest(1)[0].id - self.assert_callbacks(spy, num, new_algo) + self.assert_callbacks(spy, num + 1, new_algo) @phase def test_seed_rng_init(self, mocker, num, attr): @@ -428,7 +428,7 @@ def test_seed_rng_init(self, mocker, num, attr): self.force_observe(algo.n_observed, new_algo) assert new_algo.suggest(1)[0].id == trials[0].id - self.assert_callbacks(spy, num, new_algo) + self.assert_callbacks(spy, num + 1, new_algo) @phase def test_state_dict(self, mocker, num, attr): @@ -447,7 +447,7 @@ def test_state_dict(self, mocker, num, attr): new_algo.set_state(state) assert a.id == new_algo.suggest(1)[0].id - self.assert_callbacks(spy, num, algo) + self.assert_callbacks(spy, num + 1, algo) @phase def test_suggest_n(self, mocker, num, attr): @@ -551,6 +551,7 @@ def test_n_observed(self, mocker, num, attr): assert algo.n_observed == num trials = algo.suggest(1) assert algo.n_observed == num + assert len(trials) == 1 self.observe_trials(trials, algo) assert algo.n_observed == num + 1 diff --git a/tests/unittests/algo/pbt/base.py b/tests/unittests/algo/pbt/base.py new file mode 100644 index 000000000..1085ebbd9 --- /dev/null +++ b/tests/unittests/algo/pbt/base.py @@ -0,0 +1,286 @@ +# -*- coding: utf-8 -*- +"""Example usage and tests for :mod:`orion.algo.random`.""" +import os +import random +import shutil + +import numpy +import pytest + +from orion.algo.pbt.exploit import BaseExploit +from orion.algo.pbt.explore import BaseExplore +from orion.algo.pbt.pbt import PBT, LineageNode, Lineages, compute_fidelities +from orion.core.io.space_builder import SpaceBuilder +from orion.core.utils.flatten import flatten +from orion.core.utils.pptree import print_tree +from orion.core.worker.transformer import build_required_space +from orion.core.worker.trial import Trial + + +def build_full_tree(depth, child_per_parent=2, starting_objective=1): + """Build a full tree + + Parameters + ---------- + depth: int + Depth of the tree + + child_per_parent: int, optional + Number of child per node. Default: 2 + """ + + def create_node_item(node_index): + return TrialStub(id=f"id-{node_index}", objective=node_index) + + node_index = starting_objective + root = LineageNode(create_node_item(node_index)) + node_index += 1 + node_buffer = [root] + next_nodes = [] + for i in range(depth - 1): + for node in node_buffer: + for k in range(child_per_parent): + next_nodes.append( + LineageNode(create_node_item(node_index), parent=node) + ) + node_index += 1 + node_buffer = next_nodes + next_nodes = [] + + print_tree(root, nameattr="tree_name") + + return root + + +def build_population(objectives): + depth = len(objectives) + size = len(objectives[0]) + lineages = Lineages() + + for lineage_index in range(size): + lineages.add( + TrialStub( + id=f"lineage-{lineage_index}-0", + objective=objectives[0][lineage_index], + ) + ) + + for generation in range(1, depth): + for lineage_index in range(size): + new_trial = TrialStub( + id=f"lineage-{lineage_index}-{generation}", + objective=objectives[generation][lineage_index], + ) + parent_trial = TrialStub(id=f"lineage-{lineage_index}-{generation-1}") + if lineage_index == ((generation - 1) % size): + next_index = (lineage_index + 1) % len(lineages) + base_trial = parent_trial + parent_trial = TrialStub(id=f"lineage-{next_index}-{generation-1}") + lineages.fork(parent_trial, new_trial) + lineages.set_jump(base_trial, new_trial) + else: + lineages.fork(parent_trial, new_trial) + + return lineages + + +def compare_generations(trials, population_size, depth): + trial_ids = set(trial.id for trial in trials) + expected_ids = set(f"lineage-{i}-{depth}" for i in range(population_size)) + assert trial_ids == expected_ids + + +class RNGStub: + pass + + +@pytest.fixture +def no_shutil_copytree(monkeypatch): + monkeypatch.setattr("shutil.copytree", lambda dir_a, dir_b: None) + yield + + +@pytest.fixture +def space(): + return SpaceBuilder().build( + { + "x": "uniform(0, 100)", + "y": "uniform(0, 10, discrete=True)", + "z": 'choices(["a", "b", 0, True])', + "f": "fidelity(1, 100, base=1)", + } + ) + + +@pytest.fixture +def hspace(): + return SpaceBuilder().build( + { + "numerical": { + "x": "uniform(0, 100)", + "y": "uniform(0, 10, discrete=True)", + "f": "fidelity(1, 100, base=1)", + }, + "z": 'choices(["a", "b", 0, True])', + } + ) + + +def sample_trials( + space, + num, + seed=1, + status=None, + objective=None, + params=None, + exp_working_dir="/nothing", +): + if params is None: + params = {"f": space["f"].original_dimension.original_dimension.low} + + trials = space.sample(num, seed=seed) + new_trials = [] + for trial in trials: + if params: + trial = trial.branch(params=params) + + trial = space.transform(space.reverse(trial)) + + trial.exp_working_dir = exp_working_dir + + if status: + trial.status = status + if status == "completed" and objective is not None: + trial._results.append( + Trial.Result(name="objective", type="objective", value=1) + ) + + new_trials.append(trial) + + return new_trials + + +def build_lineages_for_exploit( + space, monkeypatch, trials=None, elites=None, additional_trials=None, seed=1, num=10 +): + if trials is None: + trials = space.sample(num, seed=seed) + for i, trial in enumerate(trials): + trial.status = "completed" + trial._results.append( + trial.Result(name="objective", type="objective", value=i) + ) + if elites is None: + elites = space.sample(num, seed=seed + 1) + for i, trial in enumerate(elites): + trial.status = "completed" + trial._results.append( + trial.Result(name="objective", type="objective", value=i * 2) + ) + + if additional_trials: + trials += additional_trials + + def return_trials(*args, **kwargs): + return trials + + def return_elites(*args, **kwargs): + return elites + + lineages = Lineages() + monkeypatch.setattr(lineages, "get_trials_at_depth", return_trials) + monkeypatch.setattr(lineages, "get_elites", return_elites) + + return lineages + + +class ObjectiveStub: + def __init__(self, value): + self.value = value + + +class TrialStub: + def __init__( + self, + working_dir=None, + objective=None, + id=None, + status=None, + params=None, + parent=None, + ): + self.id = id + if working_dir is None: + working_dir = id + + self.working_dir = working_dir + if objective and (status is None or status == "completed"): + self.objective = ObjectiveStub(objective) + else: + self.objective = None + + if status is None and objective is not None: + self.status = "completed" + elif status is None: + self.status = "new" + else: + self.status = status + + self.params = params + self.parent = parent + + def __repr__(self): + return self.id + + +class ExploitStub(BaseExploit): + def __init__(self, rval=None, skip=False, should_receive=None, **kwargs): + self.rval = rval + self.skip = skip + self.should_receive = should_receive + self.kwargs = kwargs + + def __call__(self, rng, trial, lineages): + if self.should_receive: + assert trial is self.should_receive + + if self.skip: + return None + + if self.rval is not None: + return self.rval + + return trial + + @property + def configuration(self): + configuration = super(ExploitStub, self).configuration + configuration["rval"] = self.rval + configuration["skip"] = self.skip + configuration["should_receive"] = self.should_receive + configuration.update(self.kwargs) + return configuration + + +class ExploreStub(BaseExplore): + def __init__(self, rval=None, no_call=False, **kwargs): + self.rval = rval + self.no_call = no_call + self.kwargs = kwargs + + def __call__(self, rng, space, params): + if self.no_call: + raise RuntimeError("Should not have been called!") + + if self.rval is not None: + return self.rval + + return params + + @property + def configuration(self): + configuration = super(ExploreStub, self).configuration + configuration["rval"] = self.rval + configuration["no_call"] = self.no_call + configuration.update(self.kwargs) + return configuration diff --git a/tests/unittests/algo/pbt/test_exploit.py b/tests/unittests/algo/pbt/test_exploit.py new file mode 100644 index 000000000..624b0622b --- /dev/null +++ b/tests/unittests/algo/pbt/test_exploit.py @@ -0,0 +1,261 @@ +import numpy +import pytest +from base import ExploitStub, RNGStub, TrialStub, space + +from orion.algo.pbt.exploit import BacktrackExploit, PipelineExploit, TruncateExploit +from orion.algo.pbt.pbt import Lineages + + +def build_lineages_for_exploit( + space, monkeypatch, trials=None, elites=None, additional_trials=None, seed=1, num=10 +): + if trials is None: + trials = space.sample(num, seed=seed) + for i, trial in enumerate(trials): + trial.status = "completed" + trial._results.append( + trial.Result(name="objective", type="objective", value=i) + ) + if elites is None: + elites = space.sample(num, seed=seed + 1) + for i, trial in enumerate(elites): + trial.status = "completed" + trial._results.append( + trial.Result(name="objective", type="objective", value=i * 2) + ) + + if additional_trials: + trials += additional_trials + + def return_trials(*args, **kwargs): + return trials + + def return_elites(*args, **kwargs): + return elites + + lineages = Lineages() + monkeypatch.setattr(lineages, "get_trials_at_depth", return_trials) + monkeypatch.setattr(lineages, "get_elites", return_elites) + + return lineages + + +class TestPipelineExploit: + def test_no_exploit(self): + trial = TrialStub() + assert PipelineExploit([])(RNGStub(), trial, None) is trial + + def test_exploit_otherwise_next(self): + for i in range(4): + exploit = PipelineExploit( + [ + dict(of_type="exploitstub", rval=None if j < i else i, some="args") + for j in range(4) + ] + ) + assert exploit(RNGStub(), TrialStub(), None) == i + + def test_configuration(self): + + exploit_configs = [ + dict( + of_type="exploitstub", + some="args", + rval=1, + should_receive=None, + skip=True, + ), + dict( + of_type="exploitstub", + other="args", + rval=None, + should_receive="something", + skip=False, + ), + ] + exploit = PipelineExploit(exploit_configs) + + assert exploit.configuration == dict( + of_type="pipelineexploit", exploit_configs=exploit_configs + ) + + +class TruncateGenericTests: + constructor = None + + def test_configuration(self): + configuration = dict( + min_forking_population=5, candidate_pool_ratio=0.5, truncation_quantile=0.75 + ) + exploit = self.constructor(**configuration) + configuration["of_type"] = exploit.__class__.__name__.lower() + assert exploit.configuration == configuration + + def test_truncate_not_enough_trials(self, space, monkeypatch): + lineages = build_lineages_for_exploit(space, monkeypatch, num=4) + + exploit = self.constructor(min_forking_population=5) + + assert exploit(RNGStub(), TrialStub(), lineages) is None + + def test_truncate_trial_not_in_trials(self, space, monkeypatch): + trial = space.sample(1, seed=2)[0] + + lineages = build_lineages_for_exploit(space, monkeypatch) + + exploit = self.constructor() + + with pytest.raises( + ValueError, + match=f"Trial {trial.id} not included in list of completed trials.", + ): + exploit(numpy.random.RandomState(1), trial, lineages) + + def test_truncate_non_completed_trials(self, space, monkeypatch): + trial = space.sample(1, seed=2)[0] + + lineages = build_lineages_for_exploit( + space, monkeypatch, additional_trials=[trial] + ) + + assert trial in lineages.get_trials_at_depth(trial) + + exploit = self.constructor() + + with pytest.raises( + ValueError, + match=f"Trial {trial.id} not included in list of completed trials.", + ): + exploit(numpy.random.RandomState(1), trial, lineages) + + def test_truncate_empty_pool(self, space, monkeypatch): + lineages = build_lineages_for_exploit(space, monkeypatch) + + exploit = self.constructor(candidate_pool_ratio=0.0001) + + selected_trial = exploit._truncate( + numpy.random.RandomState(1), + lineages.get_trials_at_depth(1)[-1], + lineages.get_trials_at_depth(1), + ) + + assert selected_trial is None + + def get_trials(self, lineages, trial): + return lineages.get_trials_at_depth(trial) + + def test_fetch_trials_properly(self, space, monkeypatch): + + lineages = build_lineages_for_exploit(space, monkeypatch) + exploit = self.constructor() + + def test_truncate_args(rng, trial, trials): + assert trials == self.get_trials(lineages, trial) + + monkeypatch.setattr(exploit, "_truncate", test_truncate_args) + + exploit(RNGStub(), TrialStub(id="selected-trial"), lineages) + + @pytest.mark.parametrize("candidate_pool_ratio", [0.2, 0.4, 0.8]) + def test_truncate_valid_choice(self, candidate_pool_ratio, space, monkeypatch): + """Test the pool of available trials based on candidate_pool_ratio""" + lineages = build_lineages_for_exploit(space, monkeypatch) + trials = self.get_trials(lineages, TrialStub(objective=50)) + trials = sorted(trials, key=lambda trial: trial.objective.value) + + num_completed_trials = len(trials) + valid_choices = numpy.arange( + int(candidate_pool_ratio * num_completed_trials) + ).tolist() + selected_trial = trials[valid_choices[-1]] + + def mocked_choice(choices, *args, **kwargs): + assert choices.tolist() == valid_choices + return valid_choices[-1] + + rng = RNGStub() + rng.choice = mocked_choice + + completed_trial_index = numpy.random.choice(range(len(trials))) + completed_trial = trials[completed_trial_index] + + # Add non completed trials and shuffle the list to test it is filtered and sorted properly + trials += space.sample(20, seed=2) + numpy.random.shuffle(trials) + + exploit = self.constructor( + truncation_quantile=0, candidate_pool_ratio=candidate_pool_ratio + ) + + trial = exploit._truncate( + rng, + completed_trial, + trials, + ) + + assert trial is selected_trial + + @pytest.mark.parametrize("truncation_quantile", [0.0, 0.2, 0.4, 0.8, 1.0]) + def test_truncate(self, truncation_quantile, space, monkeypatch): + """Test threshold at which is needed based on truncation_quantile""" + # Test that trial within threshold is not replaced + lineages = build_lineages_for_exploit(space, monkeypatch) + trials = self.get_trials(lineages, TrialStub(objective=50)) + trials = sorted(trials, key=lambda trial: trial.objective.value) + + threshold_index = int(truncation_quantile * len(trials)) + + good_trial = trials[threshold_index - 1] + selected_trial = trials[-1] + + # Add non completed trials and shuffle the list to test it is filtered and sorted properly + lots_of_trials = trials + space.sample(20, seed=2) + numpy.random.shuffle(lots_of_trials) + + exploit = self.constructor( + truncation_quantile=truncation_quantile, candidate_pool_ratio=0.2 + ) + + if truncation_quantile > 0.0: + + def mocked_choice(choices, *args, **kwargs): + raise RuntimeError("Should not be called") + + rng = RNGStub() + rng.choice = mocked_choice + + trial = exploit._truncate( + rng, + good_trial, + lots_of_trials, + ) + + assert trial is good_trial + + if truncation_quantile < 1.0: + bad_trial = trials[threshold_index] + + def mocked_choice(choices, *args, **kwargs): + return -1 + + rng = RNGStub() + rng.choice = mocked_choice + + trial = exploit._truncate( + rng, + bad_trial, + lots_of_trials, + ) + + assert trial is selected_trial + + +class TestTruncate(TruncateGenericTests): + constructor = TruncateExploit + + +class TestTruncateWithBacktracking(TruncateGenericTests): + constructor = BacktrackExploit + + def get_trials(self, lineages, trial): + return lineages.get_elites(max_depth=trial) + [trial] diff --git a/tests/unittests/algo/pbt/test_explore.py b/tests/unittests/algo/pbt/test_explore.py new file mode 100644 index 000000000..3b0389c30 --- /dev/null +++ b/tests/unittests/algo/pbt/test_explore.py @@ -0,0 +1,225 @@ +import numpy +import pytest +from base import ExploreStub, RNGStub, TrialStub, hspace, space + +from orion.algo.pbt.explore import PerturbExplore, PipelineExplore, ResampleExplore +from orion.algo.space import Categorical, Dimension +from orion.core.utils.flatten import flatten + + +class TestPipelineExplore: + def test_no_explore(self): + params = object() + assert PipelineExplore([])(RNGStub(), None, params) is params + + def test_explore_otherwise_next(self): + for i in range(4): + explore = PipelineExplore( + [ + dict(of_type="explorestub", rval=None if j < i else i, some="args") + for j in range(4) + ] + ) + assert explore(RNGStub(), TrialStub(), None) == i + + def test_configuration(self): + + explore_configs = [ + dict(of_type="explorestub", some="args", rval=1, no_call=False), + dict(of_type="explorestub", other="args", rval=None, no_call=True), + ] + explore = PipelineExplore(explore_configs) + + assert explore.configuration == dict( + of_type="pipelineexplore", explore_configs=explore_configs + ) + + +class TestPerturb: + @pytest.mark.parametrize("factor", [0.5, 1, 1.5]) + def test_perturb_real_factor(self, factor): + explore = PerturbExplore(factor=factor) + + rng = RNGStub() + rng.random = lambda: 1.0 + + assert explore.perturb_real(rng, 1.0, (0.1, 2.0)) == factor + + rng.random = lambda: 0.0 + + assert explore.perturb_real(rng, 1.0, (0.1, 2.0)) == 1.0 / factor + + def test_perturb_real_below_interval_cap(self): + explore = PerturbExplore(factor=0.0, volatility=0) + + rng = RNGStub() + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: variance + + assert explore.perturb_real(rng, 0.0, (1.0, 2.0)) == 1.0 + + explore.volatility = 1000 + + assert explore.perturb_real(rng, 0.0, (1.0, 2.0)) == 2.0 + + def test_perturb_real_above_interval_cap(self): + explore = PerturbExplore(factor=1.0, volatility=0) + + rng = RNGStub() + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: variance + + assert explore.perturb_real(rng, 3.0, (1.0, 2.0)) == 2.0 + + explore.volatility = 1000 + + assert explore.perturb_real(rng, 3.0, (1.0, 2.0)) == 1.0 + + @pytest.mark.parametrize("volatility", [0.0, 0.05, 1.0]) + def test_perturb_real_volatility_below(self, volatility): + explore = PerturbExplore(factor=1.0, volatility=volatility) + + rng = RNGStub() + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: variance + + assert explore.perturb_real(rng, 0.0, (1.0, 2.0)) == 1.0 + volatility + + @pytest.mark.parametrize("volatility", [0.0, 0.05, 1.0]) + def test_perturb_real_volatility_above(self, volatility): + explore = PerturbExplore(factor=1.0, volatility=volatility) + + rng = RNGStub() + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: variance + + assert explore.perturb_real(rng, 3.0, (1.0, 2.0)) == 2.0 - volatility + + @pytest.mark.parametrize("factor", [0.5, 0.75, 1, 1.5]) + def test_perturb_int_factor(self, factor): + explore = PerturbExplore(factor=factor) + + rng = RNGStub() + rng.random = lambda: 1.0 + + assert explore.perturb_int(rng, 5, (0, 10)) == int(numpy.round(5 * factor)) + + rng.random = lambda: 0.0 + + assert explore.perturb_int(rng, 5, (0, 10)) == int(numpy.round(5 / factor)) + + def test_perturb_int_duplicate_equal(self): + explore = PerturbExplore(factor=1.0) + + rng = RNGStub() + rng.random = lambda: 1.0 + + assert explore.perturb_int(rng, 1, (0, 10)) == 1 + + def test_perturb_int_no_duplicate_below(self): + explore = PerturbExplore(factor=0.75) + + rng = RNGStub() + rng.random = lambda: 1.0 + + assert explore.perturb_int(rng, 1, (0, 10)) == 0 + + def test_perturb_int_no_duplicate_above(self): + explore = PerturbExplore(factor=0.75) + + rng = RNGStub() + + rng.random = lambda: 0.0 + + assert explore.perturb_int(rng, 1, (0, 10)) == 2 + + def test_perturb_int_no_out_of_bounds(self): + explore = PerturbExplore(factor=0.75, volatility=0) + + rng = RNGStub() + + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: variance + + assert explore.perturb_int(rng, 0, (0, 10)) == 0 + + rng.random = lambda: 0.0 + rng.normal = lambda mean, variance: variance + + assert explore.perturb_int(rng, 10, (0, 10)) == 10 + + def test_perturb_cat(self): + explore = PerturbExplore() + rng = RNGStub() + rng.randint = lambda low, high, size: [1] + rng.choice = lambda choices: choices[0] + + dim = Categorical("name", ["one", "two", 3, 4.0]) + assert explore.perturb_cat(rng, "whatever", dim) in dim + + def test_perturb(self, space): + explore = PerturbExplore() + rng = RNGStub() + rng.randint = lambda low, high, size: [1] + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: 0.0 + rng.choice = lambda choices: choices[0] + + params = {"x": 1.0, "y": 2, "z": 0, "f": 10} + new_params = explore(rng, space, params) + for key in space.keys(): + assert new_params[key] in space[key] + + def test_perturb_hierarchical_params(self, hspace): + explore = PerturbExplore() + rng = RNGStub() + rng.randint = lambda low, high, size: [1] + rng.random = lambda: 1.0 + rng.normal = lambda mean, variance: 0.0 + rng.choice = lambda choices: choices[0] + + params = {"numerical": {"x": 1.0, "y": 2, "f": 10}, "z": 0} + new_params = explore(rng, hspace, params) + assert "numerical" in new_params + assert "x" in new_params["numerical"] + for key in hspace.keys(): + assert flatten(new_params)[key] in hspace[key] + + def test_perturb_with_invalid_dim(self, space, monkeypatch): + explore = PerturbExplore() + + monkeypatch.setattr(Dimension, "type", "type_that_dont_exist") + + with pytest.raises( + ValueError, match="Unsupported dimension type type_that_dont_exist" + ): + explore(RNGStub(), space, {"x": 1.0, "y": 2, "z": 0, "f": 10}) + + def test_configuration(self): + + explore = PerturbExplore(factor=2.0, volatility=10.0) + + assert explore.configuration == dict( + of_type="perturbexplore", factor=2.0, volatility=10.0 + ) + + +class TestResample: + def test_resample_probability(self, space): + explore = ResampleExplore(probability=0.5) + + rng = RNGStub() + rng.randint = lambda low, high, size: [1] + rng.random = lambda: 0.5 + + params = {"x": 1.0, "y": 2, "z": 0, "f": 10} + + assert explore(rng, space, params) is params + + rng.random = lambda: 0.4 + + assert explore(rng, space, params) is not params + + def test_configuration(self): + explore = ResampleExplore(probability=0.5) + assert explore.configuration == dict(of_type="resampleexplore", probability=0.5) diff --git a/tests/unittests/algo/pbt/test_lineages.py b/tests/unittests/algo/pbt/test_lineages.py new file mode 100644 index 000000000..83eb94174 --- /dev/null +++ b/tests/unittests/algo/pbt/test_lineages.py @@ -0,0 +1,470 @@ +import os +import random +import shutil + +import pytest +from base import ( + ObjectiveStub, + TrialStub, + build_full_tree, + build_population, + compare_generations, + no_shutil_copytree, +) + +from orion.algo.pbt.pbt import LineageNode, Lineages + + +class TestLineageNode: + def test_register(self): + item = [0] + lineage = LineageNode(item) + assert lineage.item == item + assert lineage.item is not item + + item = [1] + lineage.register(item) + assert lineage.item == item + assert lineage.item is not item + + def test_fork(self, mocker): + path = "/some_path" + trial = TrialStub(path) + lineage = LineageNode(trial) + + new_path = "/another_path" + new_trial = TrialStub(new_path) + + mocker.patch("shutil.copytree") + new_lineage = lineage.fork(new_trial) + shutil.copytree.assert_called_once_with(path, new_path) + + assert new_lineage.item.working_dir == new_trial.working_dir + assert new_lineage.parent is lineage + assert lineage.children[0] is new_lineage + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_fork_identical_new_trial(self): + lineage = LineageNode(TrialStub(id="my-id", working_dir="same_folder")) + with pytest.raises( + RuntimeError, match="The new trial new-id has the same working directory" + ): + lineage.fork(TrialStub(id="new-id", working_dir="same_folder")) + + assert lineage.children == [] + + def test_fork_to_existing_path(self, tmp_path): + trial = TrialStub(id="stub", working_dir=os.path.join(tmp_path, "stub")) + os.makedirs(trial.working_dir) + lineage = LineageNode(trial) + new_trial = TrialStub(id="fork", working_dir=os.path.join(tmp_path, "fork")) + os.makedirs(new_trial.working_dir) + + with pytest.raises( + FileExistsError, match="Folder already exists for trial fork." + ): + lineage.fork(new_trial) + + assert lineage.children == [] + + def test_set_jump(self): + parent_lineage = LineageNode(1) + child_lineage = LineageNode(2) + parent_lineage.set_jump(child_lineage) + + assert child_lineage.parent is None + assert child_lineage.jumps == [] + assert child_lineage.base is parent_lineage + + assert parent_lineage.children == [] + assert parent_lineage.jumps == [child_lineage] + assert parent_lineage.base is None + + def test_set_jump_twice(self): + parent_lineage = LineageNode(1) + child_lineage = LineageNode(2) + parent_lineage.set_jump(child_lineage) + + another_child_lineage = LineageNode(3) + parent_lineage.set_jump(another_child_lineage) + + assert child_lineage.parent is None + assert child_lineage.jumps == [] + assert child_lineage.base is parent_lineage + + assert another_child_lineage.parent is None + assert another_child_lineage.jumps == [] + assert another_child_lineage.base is parent_lineage + + assert parent_lineage.children == [] + assert parent_lineage.jumps == [child_lineage, another_child_lineage] + assert parent_lineage.base is None + + def test_set_jump_to_old_node(self): + parent_lineage = LineageNode(1) + child_lineage = LineageNode(2) + parent_lineage.set_jump(child_lineage) + + another_child_lineage = LineageNode(3) + + with pytest.raises(RuntimeError, match="Trying to jump to an existing node"): + another_child_lineage.set_jump(child_lineage) + + assert child_lineage.parent is None + assert child_lineage.jumps == [] + assert child_lineage.base is parent_lineage + + assert another_child_lineage.parent is None + assert another_child_lineage.jumps == [] + assert another_child_lineage.base is None + + assert parent_lineage.children == [] + assert parent_lineage.jumps == [child_lineage] + assert parent_lineage.base is None + + def test_get_true_ancestor_no_parent(self): + lineage = LineageNode(1) + assert lineage.get_true_ancestor() is None + + def test_get_true_ancestor_parent_no_jump(self): + lineage = LineageNode(1) + child_lineage = LineageNode(2, parent=lineage) + assert child_lineage.get_true_ancestor() is lineage + + def test_get_true_ancestor_with_jump(self): + lineage = LineageNode(1) + child_lineage = LineageNode(2, parent=lineage) + true_lineage = LineageNode(3) + true_lineage.set_jump(child_lineage) + assert child_lineage.parent is lineage + assert child_lineage.base is true_lineage + assert child_lineage.get_true_ancestor() is true_lineage + + def test_get_best_trial_empty(self): + trial = TrialStub(id="id-1", objective=1) + lineage = LineageNode(trial) + assert lineage.get_best_trial().id == "id-1" + + def test_get_best_trial_straigth_lineage(self): + root = build_full_tree(4) + leafs = root.get_nodes_at_depth(3) + assert leafs[0].item.id == "id-8" + assert leafs[0].get_best_trial() == root.item + assert leafs[1].get_best_trial() == root.item + leafs[0].item.objective.value = -1 + # Now best trial is leaf on first branch + assert leafs[0].get_best_trial() == leafs[0].item + # But still root for second branch + assert leafs[1].get_best_trial() == root.item + + third_row = root.get_nodes_at_depth(2) + assert third_row[0].item.id == "id-4" + assert third_row[0].get_best_trial() == root.item + assert third_row[1].get_best_trial() == root.item + + third_row[0].item.objective.value = -2 + # Now best trial is third node on first branch + assert third_row[0].get_best_trial() == third_row[0].item + # But still root for second branch + assert third_row[1].get_best_trial() == root.item + # And third node on full first and second branches + assert leafs[0].get_best_trial() == third_row[0].item + assert leafs[1].get_best_trial() == third_row[0].item + # But not for third branch + assert leafs[2].get_best_trial() == root.item + + second_row = root.get_nodes_at_depth(1) + assert second_row[0].item.id == "id-2" + assert second_row[0].get_best_trial() == root.item + assert second_row[1].get_best_trial() == root.item + + second_row[0].item.objective.value = -3 + # Now best trial is second node on first branch + assert second_row[0].get_best_trial() == second_row[0].item + # But still root for second branch + assert second_row[1].get_best_trial() == root.item + # And second node on full 4 first branches + assert leafs[0].get_best_trial() == second_row[0].item + assert leafs[1].get_best_trial() == second_row[0].item + assert leafs[2].get_best_trial() == second_row[0].item + assert leafs[3].get_best_trial() == second_row[0].item + # But not for fifth branch + assert leafs[4].get_best_trial() == root.item + + def test_get_best_trial_equality(self): + root = build_full_tree(4) + + leafs = root.get_nodes_at_depth(3) + assert leafs[0].item.id == "id-8" + assert leafs[0].get_best_trial() == root.item + + # Return parent in case of equality, if they are all as good, we want the earliest one. + root.children[0].item.objective.value = root.item.objective.value + assert leafs[0].get_best_trial() == root.item + + # Make sure the second one is returned is root is not as good. + root.item.objective.value += 1 + assert leafs[0].get_best_trial() == root.children[0].item + + def test_get_best_trial_across_jumps(self): + root_a = build_full_tree(4, starting_objective=1) + root_b = build_full_tree(4, starting_objective=10) + + a_leafs = root_a.get_nodes_at_depth(3) + b_leafs = root_b.get_nodes_at_depth(3) + assert b_leafs[0].get_best_trial() == root_b.item + a_leafs[0].set_jump(b_leafs[0].parent) + + # Should look past jump of parent + assert b_leafs[0].get_best_trial() == root_a.item + # Should look past jump directly + assert b_leafs[0].parent.get_best_trial() == root_a.item + # Should look towards root, there is no jump between root and this node + assert b_leafs[0].parent.parent.get_best_trial() == root_b.item + + def test_get_best_trial_broken_leaf(self): + root = build_full_tree(4, starting_objective=1) + + leafs = root.get_nodes_at_depth(3) + leafs[0].item.objective = None + assert leafs[0].get_best_trial() == root.item + + def test_get_best_trial_non_completed_root(self): + lineage = LineageNode(TrialStub(id="my-id")) + assert lineage.get_best_trial() is None + + +class TestLineages: + def test_add_new_trial(self): + lineages = Lineages() + assert len(lineages) == 0 + lineage = lineages.add(TrialStub(id="stub")) + assert len(lineages) == 1 + assert lineages._lineage_roots[0] is lineage + assert lineages._trial_to_lineages["stub"] is lineage + + def test_add_duplicate(self): + lineages = Lineages() + assert len(lineages) == 0 + lineage = lineages.add(TrialStub(id="stub")) + assert len(lineages) == 1 + + new_lineage = lineages.add(TrialStub(id="stub")) + assert new_lineage is lineage + assert len(lineages) == 1 + + def test_fork_existing_trial(self, tmp_path): + lineages = Lineages() + trial = TrialStub(id="stub", working_dir=os.path.join(tmp_path, "stub")) + os.makedirs(trial.working_dir) + lineage = lineages.add(trial) + assert len(lineages) == 1 + new_trial = TrialStub(id="fork", working_dir=os.path.join(tmp_path, "fork")) + new_lineage = lineages.fork(trial, new_trial) + assert len(lineages) == 1 + assert lineages._lineage_roots[0].children[0] is new_lineage + assert lineages._trial_to_lineages["fork"] is new_lineage + + def test_fork_non_existing_trial(self): + lineages = Lineages() + trial = TrialStub(id="stub") + new_trial = TrialStub(id="fork") + + with pytest.raises(KeyError): + new_lineage = lineages.fork(trial, new_trial) + + def test_get_lineage_existing_root_trial(self): + lineages = Lineages() + trial = TrialStub(id="stub") + lineage = lineages.add(trial) + assert lineages.get_lineage(trial) is lineage + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_get_lineage_existing_node_trial(self): + lineages = Lineages() + for root_index in range(2): + + trial = TrialStub(id=f"lineage-{root_index}-0") + lineage = lineages.add(trial) + for depth in range(1, 10): + new_trial = TrialStub(id=f"lineage-{root_index}-{depth}") + lineage = lineages.fork(trial, new_trial) + trial = new_trial + + lineage = lineages.get_lineage(TrialStub(id="lineage-0-2")) + assert lineage.root is lineages._lineage_roots[0] + assert lineage.node_depth == 2 + + lineage = lineages.get_lineage(TrialStub(id="lineage-1-5")) + assert lineage.root is lineages._lineage_roots[1] + assert lineage.node_depth == 5 + + def test_get_lineage_non_existing_trial(self): + lineages = Lineages() + + with pytest.raises(KeyError): + lineages.get_lineage(TrialStub(id="id")) + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_set_jump_existing_trial(self): + lineages = Lineages() + root_1 = TrialStub(id="root-1") + lineage_1 = lineages.add(root_1) + root_2 = TrialStub(id="root-2") + lineage_2 = lineages.add(root_2) + child_trial = TrialStub(id="child") + child_lineage = lineages.fork(root_1, child_trial) + lineages.set_jump(root_2, child_trial) + + assert child_lineage.base is lineage_2 + assert lineage_2.jumps == [child_lineage] + assert child_lineage.jumps == [] + assert lineage_2.base is None + assert lineage_1.jumps == [] + assert lineage_1.base is None + + def test_set_jump_non_existing_base_trial(self): + lineages = Lineages() + with pytest.raises(KeyError, match="'dontexist'"): + lineages.set_jump( + TrialStub(id="dontexist"), TrialStub(id="dontexistbutdoesntmatter") + ) + + def test_set_jump_non_existing_new_trial(self): + lineages = Lineages() + trial = TrialStub(id="exists") + lineages.add(trial) + with pytest.raises(KeyError, match="'newtrialdontexist'"): + lineages.set_jump(trial, TrialStub(id="newtrialdontexist")) + + def test_register_new_trial(self): + lineages = Lineages() + new_trial = TrialStub(id="new") + lineage = lineages.register(new_trial) + assert lineages._lineage_roots == [lineage] + + def test_register_existing_trial(self): + lineages = Lineages() + trial = TrialStub(id="my-id") + lineage = lineages.add(trial) + assert lineages._lineage_roots == [lineage] + assert lineage.item.objective is None + + trial.objective = ObjectiveStub(1) + assert lineages.register(trial) is lineage + assert lineages._lineage_roots == [lineage] + assert lineage.item.objective.value == 1 + + def test_get_elites_empty(self): + lineages = Lineages() + assert lineages.get_elites() == [] + + def test_get_elites_none_completed(self): + lineages = Lineages() + lineages.add(TrialStub(id="1")) + lineages.add(TrialStub(id="2")) + lineages.add(TrialStub(id="3")) + assert lineages.get_elites() == [] + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_get_elites_various_depths(self): + + lineages = build_population( + [ + [2, 8, 9, 9, 8], + [9, 3, 8, 9, 7], + [8, 8, 8, 4, 6], + [7, 8, 9, 8, 5], + [7, 6, 8, 7, 5], + [6, 5, 7, 7, 4], + [5, 5, 6, 7, 5], + [4, 4, 5, 8, 5], + [4, 4, 9, 8, 5], + [4, 4, 8, 8, 5], + [4, 4, 7, 8, 5], + [4, 4, 6, 8, 5], + [4, 4, 8, 8, 5], + [4, 4, 9, 8, 5], + ] + ) + + elites = sorted(lineages.get_elites(), key=lambda trial: trial.id) + assert len(elites) == 5 + assert elites[0].id == "lineage-0-0" + assert elites[0].objective.value == 2 + + assert elites[1].id == "lineage-1-1" + assert elites[1].objective.value == 3 + + assert elites[2].id == "lineage-2-7" + assert elites[2].objective.value == 5 + + assert elites[3].id == "lineage-3-2" + assert elites[3].objective.value == 4 + + assert elites[4].id == "lineage-4-5" + assert elites[4].objective.value == 4 + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_get_elites_max_depth(self): + lineages = build_population( + [ + [2, 8, 9, 9, 8], + [9, 3, 8, 9, 7], + [8, 8, 8, 4, 6], + [7, 8, 9, 8, 5], + [7, 6, 8, 7, 5], + [6, 5, 7, 7, 4], + [5, 5, 6, 7, 5], + [4, 4, 5, 8, 5], + [4, 4, 9, 8, 5], + [4, 4, 8, 8, 5], + [4, 4, 7, 8, 5], + [4, 4, 6, 8, 5], + [4, 4, 8, 8, 5], + [4, 4, 9, 8, 5], + ] + ) + + elites = sorted(lineages.get_elites(0), key=lambda trial: trial.id) + assert [trial.objective.value for trial in elites] == [2, 8, 9, 9, 8] + + elites = sorted(lineages.get_elites(2), key=lambda trial: trial.id) + assert [trial.objective.value for trial in elites] == [2, 3, 8, 4, 6] + + elites = sorted(lineages.get_elites(5), key=lambda trial: trial.id) + assert [trial.objective.value for trial in elites] == [2, 3, 7, 4, 4] + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_get_trials_at_depth_given_depth(self): + population_size = 5 + generations = 10 + lineages = build_population( + [list(range(population_size)) for generation in range(generations)] + ) + for depth in [0, 1, 5, 9]: + compare_generations( + lineages.get_trials_at_depth(depth), population_size, depth + ) + + assert lineages.get_trials_at_depth(10) == [] + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_get_trials_at_depth_given_existing_trial(self): + population_size = 5 + generations = 10 + lineages = build_population( + [list(range(population_size)) for generation in range(generations)] + ) + for depth in [0, 1, 5, 9]: + lineage_index = random.choice(range(population_size)) + trial = TrialStub(id=f"lineage-{lineage_index}-{depth}") + compare_generations( + lineages.get_trials_at_depth(trial), population_size, depth + ) + + def test_get_trials_at_depth_given_non_existing_trial(self): + lineages = Lineages() + + with pytest.raises(KeyError, match="idontexist"): + lineages.get_trials_at_depth(TrialStub(id="idontexist")) diff --git a/tests/unittests/algo/pbt/test_pbt.py b/tests/unittests/algo/pbt/test_pbt.py new file mode 100644 index 000000000..4011fbc3d --- /dev/null +++ b/tests/unittests/algo/pbt/test_pbt.py @@ -0,0 +1,679 @@ +# -*- coding: utf-8 -*- +"""Example usage and tests for :mod:`orion.algo.random`.""" + +import pytest +from base import ( + ExploitStub, + ExploreStub, + ObjectiveStub, + TrialStub, + no_shutil_copytree, + sample_trials, + space, +) + +from orion.algo.pbt.pbt import PBT, compute_fidelities +from orion.core.worker.primary_algo import SpaceTransformAlgoWrapper +from orion.core.worker.trial import Trial +from orion.testing.algo import BaseAlgoTests + + +class TestComputeFidelities: + def test_base_1(self): + assert compute_fidelities(10, 10, 20, 1) == list(map(float, range(10, 21))) + + def test_other_bases(self): + assert compute_fidelities(9, 2, 2 ** 10, 2) == [2 ** i for i in range(1, 11)] + + +class TestPBTObserve: + def test_triage_unknown_trial(self, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + trial = pbt.space.sample(1, seed=1)[0] + trials_to_verify = pbt._triage([trial]) + + assert trials_to_verify == [] + assert len(pbt.lineages) == 0 + + @pytest.mark.parametrize("status", ["new", "reserved", "interrupted"]) + def test_triage_root_not_ready(self, status, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + trial = sample_trials(pbt.space, num=1, status=status)[0] + + pbt.register(trial) + + trials_to_verify = pbt._triage([trial]) + + assert trials_to_verify == [] + assert pbt.has_suggested(trial) + assert not pbt.has_observed(trial) + assert len(pbt.lineages) == 1 + + @pytest.mark.parametrize("status", ["broken", "completed"]) + def test_triage_root_ready(self, status, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + trial = sample_trials(pbt.space, num=1, status="new")[0] + + pbt.register(trial) + + trial.status = status + trial._results.append(Trial.Result(name="objective", type="objective", value=1)) + + trials_to_verify = pbt._triage([trial]) + + assert trials_to_verify == [trial] + + assert pbt.has_suggested(trial) + assert pbt.has_observed(trial) + assert len(pbt.lineages) == 1 + + @pytest.mark.parametrize("status", ["broken", "completed"]) + def test_triage_root_observed(self, status, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + trial = sample_trials(pbt.space, num=1, status="completed", objective=1)[0] + + pbt.register(trial) + + trials_to_verify = pbt._triage([trial]) + + assert trials_to_verify == [] + + assert pbt.has_suggested(trial) + assert pbt.has_observed(trial) + assert len(pbt.lineages) == 1 + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_dont_queue_broken_root_for_promotions(self, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + trial = sample_trials(pbt.space, num=1, status="broken")[0] + pbt.register(trial) + + # Should not queue anything + pbt._queue_trials_for_promotions([trial]) + assert len(pbt._queue) == 0 + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_queue_broken_trials_for_promotions(self, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + trial = sample_trials(pbt.space, num=1, status="completed", objective=1)[0] + pbt.register(trial) + + new_trial = trial.branch(params={"f": pbt.fidelities[trial.params["f"]]}) + pbt.lineages.fork(trial, new_trial) + + new_trial.status = "broken" + pbt.register(new_trial) + + # Should queue the parent of the broken trial + pbt._queue_trials_for_promotions([new_trial]) + assert len(pbt._queue) == 1 + assert pbt._queue[0].id == trial.id + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_queue_broken_trials_from_jump_for_promotions(self, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + parent_trial = sample_trials(pbt.space, num=1, status="completed", objective=1)[ + 0 + ] + base_trial = sample_trials( + pbt.space, num=1, seed=2, status="completed", objective=1 + )[0] + + pbt.register(parent_trial) + pbt.register(base_trial) + + new_trial = parent_trial.branch( + params={"f": pbt.fidelities[parent_trial.params["f"]]} + ) + pbt.lineages.fork(parent_trial, new_trial) + pbt.lineages.set_jump(base_trial, new_trial) + + new_trial.status = "broken" + pbt.register(new_trial) + + # Should queue the parent of the broken trial + pbt._queue_trials_for_promotions([new_trial]) + assert len(pbt._queue) == 1 + assert pbt._queue[0].id == base_trial.id + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_queue_completed_trials_for_promotions(self, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + trial = sample_trials(pbt.space, num=1, status="completed", objective=1)[0] + pbt.register(trial) + + # Should queue the trial itself + pbt._queue_trials_for_promotions([trial]) + assert len(pbt._queue) == 1 + assert pbt._queue[0].id == trial.id + + new_trial = trial.branch(params={"f": pbt.fidelities[trial.params["f"]]}) + pbt.lineages.fork(trial, new_trial) + + new_trial.status = "completed" + new_trial._results.append( + Trial.Result(name="objective", type="objective", value=1) + ) + pbt.register(new_trial) + + # Should queue the parent of the broken trial + pbt._queue_trials_for_promotions([new_trial]) + assert len(pbt._queue) == 2 + assert pbt._queue[1].id == new_trial.id + + @pytest.mark.parametrize("status", ["new", "reserved", "interrupted"]) + def test_dont_queue_pending_trials_for_promotions(self, space, status): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + + trial = sample_trials(pbt.space, num=1, status=status)[0] + pbt.register(trial) + + # Should not queue anything + pbt._queue_trials_for_promotions([trial]) + assert len(pbt._queue) == 0 + + +class TestPBTSuggest: + def test_generate_offspring_unknown_trial(self, space): + + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + trial = sample_trials(pbt.space, 1)[0] + with pytest.raises(RuntimeError, match="Trying to fork a trial that"): + pbt._generate_offspring(trial) + + def test_generate_offspring_exploit_skip(self, space): + + pbt = SpaceTransformAlgoWrapper( + PBT, space, exploit=ExploitStub(skip=True).configuration + ).algorithm + trial = sample_trials(pbt.space, 1, status="completed", objective=1)[0] + pbt.register(trial) + + trial_to_branch, new_trial = pbt._generate_offspring(trial) + assert trial_to_branch is None + assert new_trial is None + + def test_generate_offspring_exploit_promote(self, space): + + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub().configuration, + explore=ExploreStub(no_call=True).configuration, + ).algorithm + trial = sample_trials(pbt.space, 1, status="completed", objective=1)[0] + + # Apply the transformation and revert it to have lossy effect (like small precision) + trial = pbt.space.transform(pbt.space.reverse(pbt.space.transform(trial))) + + pbt.register(trial) + + new_params_expected = trial.params + new_params_expected["f"] = 10.9 + + trial_to_branch, new_trial = pbt._generate_offspring(trial) + assert trial_to_branch is trial + assert new_trial.params == new_params_expected + + def test_generate_offspring_exploit_branch(self, space): + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval="toset").configuration, + explore=ExploreStub(rval="toset").configuration, + ).algorithm + + trials = sample_trials(pbt.space, 3, status="completed", objective=1) + + trial_to_promote = trials[0] + exploited_trial = trials[1] + new_params_expected = trials[2].params + + pbt.exploit_func.rval = exploited_trial + pbt.explore_func.rval = new_params_expected + + # Make sure they are different + assert new_params_expected != trial_to_promote.params + assert new_params_expected != exploited_trial.params + + pbt.register(trials[0]) + pbt.register(trials[1]) + + trial_to_branch, new_trial = pbt._generate_offspring(trial_to_promote) + + new_params_expected["f"] = 10.9 + + assert trial_to_branch is exploited_trial + assert new_trial.params["f"] == new_params_expected["f"] + assert new_trial.params == new_params_expected + + def test_generate_offspring_timeout(self, space): + + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval=None).configuration, + explore=ExploreStub(rval="toset").configuration, + fork_timeout=0.05, + ).algorithm + trial = sample_trials(pbt.space, 1, status="completed", objective=1)[0] + pbt.explore_func.rval = trial.params + + pbt.register(trial) + parent = trial.branch(params={"f": pbt.fidelities[space["f"].low]}) + pbt.register(parent) + + with pytest.raises(RuntimeError): + pbt._generate_offspring(trial) + + def test_generate_offspring_retry_using_same_trial(self, space, monkeypatch): + """Test that when exploit returns another trial, the base one is reused and case of + duplicate samples + """ + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval="toset", should_receive="toset").configuration, + explore=ExploreStub(rval="toset").configuration, + fork_timeout=0.0001, + ).algorithm + + trials = sample_trials(pbt.space, 3, status="completed", objective=1) + parent_trial = trials[0] + base_trial = trials[1] + sample_params = trials[2].params + + pbt.exploit_func.rval = parent_trial + pbt.exploit_func.should_receive = base_trial + pbt.explore_func.rval = sample_params + + pbt.register(parent_trial) + pbt.register(base_trial) + + # The trial sampled will already be registered + sample_params["f"] = pbt.fidelities[space["f"].low] + child = parent_trial.branch(params=sample_params) + pbt.register(child) + + # Exploit will return parent_trial, but Explore will return params of child, sampling + # a duplite, since child is already registered. ExploitStub.should_receive will + # test that base_trial is passed as expected to exploit when attempting more attemps + # of exploit and explore. + with pytest.raises(RuntimeError): + pbt._generate_offspring(base_trial) + + def test_fork_lineages_empty_queue(self, space): + pbt = SpaceTransformAlgoWrapper(PBT, space).algorithm + assert pbt._fork_lineages(10) == [] + + def test_fork_lineages_skip_and_requeue_trials(self, space): + num = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(skip=True).configuration, + ).algorithm + + trials = sample_trials(pbt.space, num, status="completed", objective=1) + + for trial in trials: + pbt.register(trial) + + pbt._queue = trials[:] + + assert pbt._fork_lineages(num) == [] + assert len(pbt._queue) == num + assert pbt._queue == trials + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_fork_lineages_promote_trial(self, space): + num = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(skip=None).configuration, + ).algorithm + + trials = sample_trials(pbt.space, num, status="completed", objective=1) + + for trial in trials: + pbt.register(trial) + + pbt._queue = trials[:] + + branched_trials = pbt._fork_lineages(num) + assert len(trials) == num + assert len(branched_trials) == num + assert pbt._queue == [] + + for trial, branched_trial in zip(trials, branched_trials): + expected_params = trial.params + expected_params["f"] = 10.9 + assert branched_trial.params == expected_params + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_fork_lineages_branch_trials(self, space): + num = 10 + + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval="toset").configuration, + fork_timeout=0.05, + ).algorithm + + trials = sample_trials(pbt.space, num + 1, status="completed", objective=1) + trial_to_branch = trials[-1] + pbt.exploit_func.rval = trial_to_branch + for trial in trials: + pbt.register(trial) + + pbt._queue = trials[:-1] + + branched_trials = pbt._fork_lineages(num) + + assert len(trials) == num + 1 + assert len(branched_trials) == num + assert pbt._queue == [] + + for trial, branched_trial in zip(trials, branched_trials): + # Check if parent is correct + assert branched_trial.parent == trial_to_branch.id + # Check in lineage if jump is set from correct base trial + assert pbt.lineages.get_lineage(branched_trial).base.item.id == trial.id + # Check if params are not duplicated + should_not_be_params = trial_to_branch.params + should_not_be_params["f"] = 10.9 + assert branched_trial.params["f"] == should_not_be_params["f"] + assert branched_trial.params != should_not_be_params + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_fork_lineages_branch_duplicates(self, space): + num = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval="toset").configuration, + explore=ExploreStub(rval="toset").configuration, + fork_timeout=0.05, + ).algorithm + + trials = sample_trials(pbt.space, num + 1, status="completed", objective=1) + new_params_expected = trials[-1].params + pbt.exploit_func.rval = trials[-1] + pbt.explore_func.rval = new_params_expected + for trial in trials: + pbt.register(trial) + + pbt._queue = trials[:-1] + + with pytest.raises(RuntimeError): + pbt._fork_lineages(num) + + # First queue.pop is fine, fails on second queue.pop. + assert len(pbt._queue) == num - 2 + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_fork_lineages_num_larger_than_queue(self, space): + num = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval=None).configuration, + ).algorithm + + trials = sample_trials(pbt.space, num, status="completed", objective=1) + for trial in trials: + pbt.register(trial) + + pbt._queue = trials[:] + + num_fork = 4 + branched_trials = pbt._fork_lineages(num_fork) + + assert len(branched_trials) == num_fork + assert len(pbt._queue) == num - num_fork + + trial_ids = [trial.id for trial in trials] + + assert [trial.parent for trial in branched_trials] == trial_ids[:num_fork] + assert [trial.id for trial in pbt._queue] == trial_ids[num_fork:] + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_fork_lineages_num_smaller_than_queue(self, space): + num = 4 + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + exploit=ExploitStub(rval=None).configuration, + ).algorithm + + trials = sample_trials(pbt.space, num, status="completed", objective=1) + for trial in trials: + pbt.register(trial) + + pbt._queue = trials[:] + + num_fork = 10 + branched_trials = pbt._fork_lineages(num_fork) + + assert len(branched_trials) == num + assert len(pbt._queue) == 0 + + trial_ids = [trial.id for trial in trials] + + assert [trial.parent for trial in branched_trials] == trial_ids + + def test_suggest_num_population_size_sample(self, space, mocker): + population_size = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, space, population_size=population_size + ).algorithm + + pbt_sample_mock = mocker.spy(pbt, "_sample") + pbt_fork_mock = mocker.spy(pbt, "_fork_lineages") + + num = 6 + assert len(pbt.suggest(num)) == num + + pbt_sample_mock.assert_called_with(num) + pbt_fork_mock.assert_called_with(0) + + assert len(pbt.suggest(num)) == 4 + + pbt_sample_mock.assert_called_with(4) + pbt_fork_mock.assert_called_with(2) + + def test_suggest_num_population_size_sample_broken(self, space, mocker): + population_size = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, space, population_size=population_size + ).algorithm + + pbt_sample_mock = mocker.spy(pbt, "_sample") + pbt_fork_mock = mocker.spy(pbt, "_fork_lineages") + + num = 10 + trials = pbt.suggest(num) + assert len(trials) == num + + pbt_sample_mock.assert_called_with(num) + pbt_fork_mock.assert_called_with(0) + + n_broken = 3 + for trial in trials[:n_broken]: + trial.status = "broken" + + pbt.observe(trials) + + assert len(pbt.suggest(num)) == n_broken + + # 3 trials are broken, need to resample 3 trials, and can try to fork 7 trials + pbt_sample_mock.assert_called_with(n_broken) + pbt_fork_mock.assert_called_with(7) + + @pytest.mark.usefixtures("no_shutil_copytree") + def test_suggest_num_population_size_fork_completed(self, space, mocker): + population_size = 10 + pbt = SpaceTransformAlgoWrapper( + PBT, + space, + population_size=population_size, + exploit=ExploitStub(rval=None).configuration, + ).algorithm + + pbt_sample_mock = mocker.spy(pbt, "_sample") + pbt_fork_mock = mocker.spy(pbt, "_fork_lineages") + + num = 4 + trials = pbt.suggest(num) + assert len(trials) == num + + pbt_sample_mock.assert_called_with(num) + pbt_fork_mock.assert_called_with(0) + + n_completed = 3 + for trial in trials[:n_completed]: + trial.exp_working_dir = "/nothing" + trial.status = "completed" + trial._results.append( + Trial.Result(name="objective", type="objective", value=1) + ) + + pbt.observe(trials) + assert len(pbt._queue) == n_completed + + # There are 4 trials sampled, out of which 3 are completed. Still missing 6 trials + # for base population. + assert len(pbt.suggest(num)) == num + pbt_sample_mock.assert_called_with(num) + pbt_fork_mock.assert_called_with(0) + + # There are 8 trials sampled, out of which 3 are completed. Still missing 2 trials + # for base population. + assert len(pbt.suggest(num)) == num + pbt_sample_mock.assert_called_with(2) + pbt_fork_mock.assert_called_with(2) + + +population_size = 10 +generations = 5 + + +@pytest.mark.usefixtures("no_shutil_copytree") +class TestGenericPBT(BaseAlgoTests): + algo_name = "pbt" + max_trials = population_size * generations + config = { + "seed": 123456, + "population_size": population_size, + "generations": generations, + "exploit": { + "of_type": "PipelineExploit", + "exploit_configs": [ + { + "of_type": "BacktrackExploit", + "min_forking_population": population_size / 2, + "candidate_pool_ratio": 0.0, + "truncation_quantile": 1.0, + }, + { + "of_type": "TruncateExploit", + "min_forking_population": population_size / 2, + "candidate_pool_ratio": 0.3, + "truncation_quantile": 0.9, + }, + ], + }, + "explore": { + "of_type": "PipelineExplore", + "explore_configs": [ + { + "of_type": "ResampleExplore", + "probability": 0.3, + }, + { + "of_type": "PerturbExplore", + "factor": 1.5, + "volatility": 0.005, + }, + ], + }, + "fork_timeout": 5, + } + space = {"x": "uniform(0, 1)", "y": "uniform(0, 1)", "f": "fidelity(1, 10, base=1)"} + + def test_no_fidelity(self): + space = self.create_space({"x": "uniform(0, 1)", "y": "uniform(0, 1)"}) + + with pytest.raises( + RuntimeError, match="PBT cannot be used if space does not contain" + ): + self.create_algo(space=space) + + @pytest.mark.skip( + reason="There are no good reasons to use PBT if search space is so small" + ) + def test_is_done_cardinality(self): + pass + + @pytest.mark.parametrize("num", [100000, 1]) + def test_is_done_max_trials(self, num): + space = self.create_space() + + MAX_TRIALS = 10 + algo = self.create_algo(space=space) + algo.algorithm.max_trials = MAX_TRIALS + + objective = 0 + while not algo.is_done: + trials = algo.suggest(num) + assert trials is not None + if trials: + self.observe_trials(trials, algo, objective) + objective += len(trials) + + # BPT should ignore max trials. + assert algo.n_observed > MAX_TRIALS + # BPT should stop when all trials of last generation are completed. + assert algo.n_observed == population_size * (generations + 1) + assert algo.is_done + + @pytest.mark.skip(reason="See https://github.com/Epistimio/orion/issues/599") + def test_optimize_branin(self): + pass + + def assert_callbacks(self, spy, num, algo): + def check_population_size(gen_population_size, depth, expected): + assert ( + gen_population_size == expected + ), f"population of {gen_population_size} at depth {depth}, should be {expected}" + + pbt = algo.algorithm + remaining_num = num + + for depth in range(generations): + gen_population_size = len(pbt.lineages.get_trials_at_depth(depth)) + if remaining_num > population_size: + expected_population_size = population_size + else: + expected_population_size = remaining_num + + check_population_size(gen_population_size, depth, expected_population_size) + + remaining_num = max(remaining_num - expected_population_size, 0) + + +TestGenericPBT.set_phases( + [ + ("random", 5, "space.sample"), + ("generation_2", 2 * population_size, "_generate_offspring"), + ("generation_3", 3 * population_size, "_generate_offspring"), + ] +) diff --git a/tests/unittests/algo/test_asha.py b/tests/unittests/algo/test_asha.py index f86dec829..ba4d12094 100644 --- a/tests/unittests/algo/test_asha.py +++ b/tests/unittests/algo/test_asha.py @@ -705,7 +705,7 @@ def assert_callbacks(self, spy, num, algo): if num == 0: return - repetition_id, rung_id = self.infer_repetition_and_rung(num) + repetition_id, rung_id = self.infer_repetition_and_rung(num - 1) brackets = algo.algorithm.brackets diff --git a/tests/unittests/algo/test_evolution_es.py b/tests/unittests/algo/test_evolution_es.py index 0823a14ee..ee315f198 100644 --- a/tests/unittests/algo/test_evolution_es.py +++ b/tests/unittests/algo/test_evolution_es.py @@ -473,7 +473,7 @@ def assert_callbacks(self, spy, num, algo): if num == 0: return - repetition_id, rung_id = self.infer_repetition_and_rung(num) + repetition_id, rung_id = self.infer_repetition_and_rung(num - 1) brackets = algo.algorithm.brackets diff --git a/tests/unittests/algo/test_hyperband.py b/tests/unittests/algo/test_hyperband.py index 58da7da18..fcb34d061 100644 --- a/tests/unittests/algo/test_hyperband.py +++ b/tests/unittests/algo/test_hyperband.py @@ -11,6 +11,7 @@ from orion.algo.hyperband import Hyperband, HyperbandBracket, compute_budgets from orion.algo.space import Fidelity, Integer, Real, Space from orion.core.utils.flatten import flatten +from orion.core.worker.trial import Trial from orion.testing.algo import BaseAlgoTests, phase from orion.testing.trial import compare_trials, create_trial @@ -749,12 +750,22 @@ def test_full_process(self, monkeypatch, hyperband): mock_samples(hyperband, copy.deepcopy(sample_trials)) # Fill all brackets' first rung + first_rung = hyperband.suggest(100) + first_bracket_first_rung = first_rung[6:] + second_bracket_first_rung = first_rung[3:6] + third_bracket_first_rung = first_rung[:3] - trials = hyperband.suggest(100) - - compare_trials(trials[:3], [create_trial_for_hb((9, i)) for i in range(3)]) - compare_trials(trials[3:6], [create_trial_for_hb((3, i)) for i in range(3, 6)]) - compare_trials(trials[6:], [create_trial_for_hb((1, i)) for i in range(6, 15)]) + compare_trials( + first_bracket_first_rung, + [create_trial_for_hb((1, i)) for i in range(6, 15)], + ) + compare_trials( + second_bracket_first_rung, + [create_trial_for_hb((3, i)) for i in range(3, 6)], + ) + compare_trials( + third_bracket_first_rung, [create_trial_for_hb((9, i)) for i in range(3)] + ) assert hyperband.brackets[0].has_rung_filled(0) assert not hyperband.brackets[0].is_ready() @@ -762,18 +773,22 @@ def test_full_process(self, monkeypatch, hyperband): assert hyperband.suggest(100) == [] # Observe first bracket first rung - - for i in range(9): - hyperband.observe([create_trial_for_hb((1, i + 3 + 3), objective=16 - i)]) + for i, trial in enumerate(first_bracket_first_rung): + trial.status = "completed" + trial._results.append( + Trial.Result(name="objective", type="objective", value=16 - i) + ) + hyperband.observe(first_bracket_first_rung) assert hyperband.brackets[0].is_ready() assert not hyperband.brackets[1].is_ready() assert not hyperband.brackets[2].is_ready() # Promote first bracket first rung - trials = hyperband.suggest(100) + first_bracket_second_rung = hyperband.suggest(100) compare_trials( - trials, [create_trial_for_hb((3, 3 + 3 + 9 - 1 - i)) for i in range(3)] + first_bracket_second_rung, + [create_trial_for_hb((3, 3 + 3 + 9 - 1 - i)) for i in range(3)], ) assert hyperband.brackets[0].has_rung_filled(1) @@ -782,18 +797,20 @@ def test_full_process(self, monkeypatch, hyperband): assert not hyperband.brackets[2].is_ready() # Observe first bracket second rung - for i in range(3): - hyperband.observe( - [create_trial_for_hb((3, 3 + 3 + 9 - 1 - i), objective=8 - i)] + for i, trial in enumerate(first_bracket_second_rung): + trial.status = "completed" + trial._results.append( + Trial.Result(name="objective", type="objective", value=8 - i) ) + hyperband.observe(first_bracket_second_rung) assert hyperband.brackets[0].is_ready() assert not hyperband.brackets[1].is_ready() assert not hyperband.brackets[2].is_ready() # Promote first bracket second rung - trials = hyperband.suggest(100) - compare_trials(trials, [create_trial_for_hb((9, 12))]) + first_bracket_third_rung = hyperband.suggest(100) + compare_trials(first_bracket_third_rung, [create_trial_for_hb((9, 12))]) assert hyperband.brackets[0].has_rung_filled(2) assert not hyperband.brackets[0].is_ready() @@ -801,16 +818,20 @@ def test_full_process(self, monkeypatch, hyperband): assert not hyperband.brackets[2].is_ready() # Observe second bracket first rung - for i in range(3): - hyperband.observe([create_trial_for_hb((3, i + 3), objective=8 - i)]) + for i, trial in enumerate(second_bracket_first_rung): + trial.status = "completed" + trial._results.append( + Trial.Result(name="objective", type="objective", value=8 - i) + ) + hyperband.observe(second_bracket_first_rung) assert not hyperband.brackets[0].is_ready() assert hyperband.brackets[1].is_ready() assert not hyperband.brackets[2].is_ready() # Promote second bracket first rung - trials = hyperband.suggest(100) - compare_trials(trials, [create_trial_for_hb((9, 5))]) + second_bracket_second_rung = hyperband.suggest(100) + compare_trials(second_bracket_second_rung, [create_trial_for_hb((9, 5))]) assert not hyperband.brackets[0].is_ready() assert hyperband.brackets[1].has_rung_filled(1) @@ -818,8 +839,12 @@ def test_full_process(self, monkeypatch, hyperband): assert not hyperband.brackets[2].is_ready() # Observe third bracket first rung - for i in range(3): - hyperband.observe([create_trial_for_hb((9, i), objective=3 - i)]) + for i, trial in enumerate(third_bracket_first_rung): + trial.status = "completed" + trial._results.append( + Trial.Result(name="objective", type="objective", value=3 - i) + ) + hyperband.observe(third_bracket_first_rung) assert not hyperband.brackets[0].is_ready(2) assert not hyperband.brackets[1].is_ready(1) @@ -827,17 +852,18 @@ def test_full_process(self, monkeypatch, hyperband): assert hyperband.brackets[2].is_done # Observe second bracket second rung - for i in range(1): - hyperband.observe( - [create_trial_for_hb((9, 3 + 3 - 1 - i), objective=5 - i)] + for i, trial in enumerate(second_bracket_second_rung): + trial.status = "completed" + trial._results.append( + Trial.Result(name="objective", type="objective", value=5 - i) ) + hyperband.observe(second_bracket_second_rung) assert not hyperband.brackets[0].is_ready(2) assert hyperband.brackets[1].is_ready(1) assert hyperband.brackets[1].is_done - # Observe first bracket third rung - hyperband.observe(trials) + hyperband.observe(first_bracket_third_rung) assert hyperband.is_done assert hyperband.brackets[0].is_done @@ -965,7 +991,7 @@ def assert_callbacks(self, spy, num, algo): if num == 0: return - repetition_id, rung_id = self.infer_repetition_and_rung(num) + repetition_id, rung_id = self.infer_repetition_and_rung(num - 1) brackets = algo.algorithm.brackets diff --git a/tests/unittests/core/conftest.py b/tests/unittests/core/conftest.py index 64d9f4988..18176ad36 100644 --- a/tests/unittests/core/conftest.py +++ b/tests/unittests/core/conftest.py @@ -189,7 +189,7 @@ def with_user_dendi(monkeypatch): {"name": "/decoding_layer", "type": "categorical", "value": "rnn"}, {"name": "/encoding_layer", "type": "categorical", "value": "lstm"}, ], - "parents": [], + "parent": None, }, { "status": "completed", @@ -210,7 +210,7 @@ def with_user_dendi(monkeypatch): }, {"name": "/encoding_layer", "type": "categorical", "value": "gru"}, ], - "parents": [], + "parent": None, }, { "status": "completed", @@ -226,7 +226,7 @@ def with_user_dendi(monkeypatch): {"name": "/decoding_layer", "type": "categorical", "value": "rnn"}, {"name": "/encoding_layer", "type": "categorical", "value": "rnn"}, ], - "parents": [], + "parent": None, }, { "status": "new", @@ -239,7 +239,7 @@ def with_user_dendi(monkeypatch): {"name": "/decoding_layer", "type": "categorical", "value": "rnn"}, {"name": "/encoding_layer", "type": "categorical", "value": "gru"}, ], - "parents": [], + "parent": None, }, { "status": "new", @@ -256,7 +256,7 @@ def with_user_dendi(monkeypatch): }, {"name": "/encoding_layer", "type": "categorical", "value": "rnn"}, ], - "parents": [], + "parent": None, }, { "status": "interrupted", @@ -273,7 +273,7 @@ def with_user_dendi(monkeypatch): }, {"name": "/encoding_layer", "type": "categorical", "value": "lstm"}, ], - "parents": [], + "parent": None, }, { "status": "suspended", @@ -286,7 +286,7 @@ def with_user_dendi(monkeypatch): {"name": "/decoding_layer", "type": "categorical", "value": "gru"}, {"name": "/encoding_layer", "type": "categorical", "value": "lstm"}, ], - "parents": [], + "parent": None, }, ] diff --git a/tests/unittests/core/experiment.yaml b/tests/unittests/core/experiment.yaml index d74dfba5d..df7073944 100644 --- a/tests/unittests/core/experiment.yaml +++ b/tests/unittests/core/experiment.yaml @@ -510,7 +510,7 @@ - name: /encoding_layer type: categorical value: lstm - parents: [] + parent: None - experiment: supernaedo2-dendi status: completed @@ -535,7 +535,7 @@ - name: /encoding_layer type: categorical value: gru - parents: [] + parent: None - experiment: supernaedo2-dendi @@ -558,7 +558,7 @@ - name: /encoding_layer type: categorical value: rnn - parents: [] + parent: None - experiment: supernaedo2-dendi @@ -579,7 +579,7 @@ - name: /encoding_layer type: categorical value: gru - parents: [] + parent: None - experiment: supernaedo2-dendi @@ -599,7 +599,7 @@ - name: /encoding_layer type: categorical value: rnn - parents: [] + parent: None - experiment: supernaedo2-dendi status: interrupted @@ -618,7 +618,7 @@ - name: /encoding_layer type: categorical value: lstm - parents: [] + parent: None - experiment: supernaedo2-dendi status: suspended @@ -637,7 +637,7 @@ - name: /encoding_layer type: categorical value: lstm - parents: [] + parent: None - experiment: supernaedo4 @@ -660,7 +660,7 @@ - name: /decoding_layer type: categorical value: rnn - parents: [] + parent: None --- diff --git a/tests/unittests/core/test_transformer.py b/tests/unittests/core/test_transformer.py index 76448134d..44a73d741 100644 --- a/tests/unittests/core/test_transformer.py +++ b/tests/unittests/core/test_transformer.py @@ -1417,13 +1417,13 @@ def test_change_trial_params(space, rspace): trial = space.sample()[0] point = format_trials.trial_to_tuple(trial, space) - rtrial.working_dir = working_dir + rtrial.exp_working_dir = working_dir rtrial.status = status restored_trial = change_trial_params(rtrial, point, space) # Test that attributes are conserved - assert restored_trial.working_dir == working_dir + assert restored_trial.exp_working_dir == working_dir assert restored_trial.status == status # Test params are updated diff --git a/tests/unittests/core/evc/test_tree.py b/tests/unittests/core/utils/test_tree.py similarity index 84% rename from tests/unittests/core/evc/test_tree.py rename to tests/unittests/core/utils/test_tree.py index bb2364581..01bcd8fce 100644 --- a/tests/unittests/core/evc/test_tree.py +++ b/tests/unittests/core/utils/test_tree.py @@ -1,6 +1,6 @@ -"""Test for generic :class:`orion.core.evc.tree`""" +"""Test for generic :class:`orion.core.utils.tree`""" -from orion.core.evc.tree import ( +from orion.core.utils.tree import ( DepthFirstTraversal, PreOrderTraversal, TreeNode, @@ -8,6 +8,33 @@ ) +def build_full_tree(depth, child_per_parent=2): + """Build a full tree + + Parameters + ---------- + depth: int + Depth of the tree + + child_per_parent: int, optional + Number of child per node. Default: 2 + """ + + root = TreeNode(0) + node_buffer = [root] + next_nodes = [] + node_item = 1 + for i in range(depth - 1): + for node in node_buffer: + for k in range(child_per_parent): + next_nodes.append(TreeNode(node_item, parent=node)) + node_item += 1 + node_buffer = next_nodes + next_nodes = [] + + return root + + def test_node_creation(): """Test empty initialization of tree node""" TreeNode("test") @@ -427,6 +454,51 @@ def increment_parent(node, parent): assert [node.item for node in rval.root] == [4, 3, 2] +def test_leafs(): + root = build_full_tree(4) + + assert [node.item for node in root.leafs] == list(range(7, 15)) + + root.children[0].children[0].children[0].drop_parent() + assert [node.item for node in root.leafs] == list(range(8, 15)) + + root.children[0].children[1].drop_parent() + assert [node.item for node in root.leafs] == [8, 11, 12, 13, 14] + + root.children[1].children[0].drop_children() + assert [node.item for node in root.leafs] == [8, 5, 13, 14] + + root.children[1].drop_children() + assert [node.item for node in root.leafs] == [8, 2] + + root.drop_children() + assert [node.item for node in root.leafs] == [0] + + +def test_node_depth(): + root = build_full_tree(3) + assert root.node_depth == 0 + assert root.children[0].node_depth == 1 + assert root.children[0].children[0].node_depth == 2 + + +def test_get_nodes_at_depth(): + root = build_full_tree(5) + + def test_for_node(node): + + assert node.get_nodes_at_depth(0) == [node] + assert node.get_nodes_at_depth(1) == node.children + assert ( + node.get_nodes_at_depth(2) + == node.children[0].children + node.children[1].children + ) + + test_for_node(root) + test_for_node(root.children[0]) + test_for_node(root.children[1]) + + def test_flattened(): """Test flattened tree into a list, retrieving items""" # a diff --git a/tests/unittests/core/utils/test_working_dir.py b/tests/unittests/core/utils/test_working_dir.py index d0cd292f5..c0ec260a3 100644 --- a/tests/unittests/core/utils/test_working_dir.py +++ b/tests/unittests/core/utils/test_working_dir.py @@ -3,41 +3,70 @@ """Collection of tests for :mod:`orion.core.utils.working_dir`.""" import os import shutil +from pathlib import Path import pytest -from orion.core.utils.working_dir import WorkingDir +from orion.core.utils.working_dir import SetupWorkingDir -@pytest.fixture -def path(tmp_path): - """Return a path as a string.""" - return str(tmp_path) + "/hi_hello" +class ExperimentStub: + def __init__(self, working_dir=None): + self.name = "exp-name" + self.version = 1 + self.working_dir = working_dir -def test_create_permanent_dir(tmp_path, path): +def test_exp_with_new_working_dir(tmp_path): """Check if a permanent directory is created.""" - with WorkingDir(tmp_path, temp=False, prefix="hi", suffix="_hello"): - assert os.path.exists(path) + tmp_path = os.path.join(tmp_path, "orion") - assert os.path.exists(path) + experiment = ExperimentStub(tmp_path) + assert not os.path.exists(tmp_path) -def test_temp_dir_when_exists(tmp_path, path): - """Check if a permanent directory is deleted.""" - os.mkdir(path) + with SetupWorkingDir(experiment): + assert os.path.exists(tmp_path) + + assert experiment.working_dir == tmp_path + assert os.path.exists(tmp_path) + + shutil.rmtree(tmp_path) + + +def test_exp_with_existing_working_dir(tmp_path): + """Check if an existing permanent directory is not overwritten.""" + tmp_path = os.path.join(tmp_path, "orion") + + experiment = ExperimentStub(tmp_path) - with WorkingDir(tmp_path, temp=True, prefix="hi", suffix="_hello"): - assert os.path.exists(path) + os.makedirs(tmp_path) - assert os.path.exists(path) + assert os.path.exists(tmp_path) - shutil.rmtree(path) + file_path = os.path.join(tmp_path, "some_file") + Path(file_path).touch() + assert os.path.exists(file_path) + + with SetupWorkingDir(experiment): + assert os.path.exists(tmp_path) + + assert experiment.working_dir == tmp_path + assert os.path.exists(tmp_path) + assert os.path.exists(file_path) + + shutil.rmtree(tmp_path) + + +def test_exp_with_no_working_dir(): + """Check if a permanent directory is deleted.""" + experiment = ExperimentStub(None) -def test_create_temp_dir(tmp_path): - """Check if a temporary directory is created.""" - with WorkingDir(tmp_path, prefix="hi", suffix="_hello") as w: - assert os.path.exists(w) + with SetupWorkingDir(experiment): + assert experiment.working_dir is not None + assert os.path.exists(experiment.working_dir) + tmp_path = experiment.working_dir - assert not os.path.exists(w) + assert experiment.working_dir is None + assert not os.path.exists(tmp_path) diff --git a/tests/unittests/core/worker/test_consumer.py b/tests/unittests/core/worker/test_consumer.py index b95718271..a1a706250 100644 --- a/tests/unittests/core/worker/test_consumer.py +++ b/tests/unittests/core/worker/test_consumer.py @@ -3,6 +3,7 @@ """Collection of tests for :mod:`orion.core.worker.consumer`.""" import logging import os +import shutil import signal import subprocess import tempfile @@ -47,27 +48,33 @@ def mock_popen(self, *args, **kwargs): monkeypatch.setattr(subprocess.Popen, "wait", mock_popen) trial = tuple_to_trial((1.0,), exp.space) + exp.register_trial(trial) con = Consumer(exp) with pytest.raises(KeyboardInterrupt): con(trial) + shutil.rmtree(trial.working_dir) + @pytest.mark.usefixtures("storage") -def test_trial_working_dir_is_changed(config): - """Check that trial has its working_dir attribute changed.""" +def test_trial_working_dir_is_created(config): + """Check that trial working dir is created.""" exp = experiment_builder.build(**config) trial = tuple_to_trial((1.0,), exp.space) exp.register_trial(trial, status="reserved") + assert not os.path.exists(trial.working_dir) + con = Consumer(exp) con(trial) - assert trial.working_dir is not None - assert trial.working_dir == con.working_dir + "/exp_" + trial.id + assert os.path.exists(trial.working_dir) + + shutil.rmtree(trial.working_dir) def setup_code_change_mock(config, monkeypatch, ignore_code_changes): @@ -104,6 +111,8 @@ def test_code_changed_evc_disabled(config, monkeypatch, caplog): con(trial) assert "Code changed between execution of 2 trials" in caplog.text + shutil.rmtree(trial.working_dir) + @pytest.mark.usefixtures("storage") def test_code_changed_evc_enabled(config, monkeypatch): @@ -116,6 +125,8 @@ def test_code_changed_evc_enabled(config, monkeypatch): assert exc.match("Code changed between execution of 2 trials") + shutil.rmtree(trial.working_dir) + @pytest.mark.usefixtures("storage") def test_retrieve_result_nofile(config): diff --git a/tests/unittests/core/worker/test_experiment.py b/tests/unittests/core/worker/test_experiment.py index 47b3aec2c..ab0463e4a 100644 --- a/tests/unittests/core/worker/test_experiment.py +++ b/tests/unittests/core/worker/test_experiment.py @@ -347,11 +347,12 @@ def test_update_completed_trial(random_dt): @pytest.mark.usefixtures("with_user_tsirif") -def test_register_trials(random_dt): +def test_register_trials(tmp_path, random_dt): """Register a list of newly proposed trials/parameters.""" with OrionState(): exp = Experiment("supernaekei", mode="x") exp._id = 0 + exp.working_dir = tmp_path trials = [ Trial(params=[{"name": "a", "type": "integer", "value": 5}]), @@ -368,6 +369,8 @@ def test_register_trials(random_dt): assert yo[1]["status"] == "new" assert yo[0]["submit_time"] == random_dt assert yo[1]["submit_time"] == random_dt + assert yo[0]["exp_working_dir"] == tmp_path + assert yo[1]["exp_working_dir"] == tmp_path class TestToPandas: diff --git a/tests/unittests/core/worker/test_trial.py b/tests/unittests/core/worker/test_trial.py index 6ff9f8a2a..386c7c9ae 100644 --- a/tests/unittests/core/worker/test_trial.py +++ b/tests/unittests/core/worker/test_trial.py @@ -1,6 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Collection of tests for :mod:`orion.core.worker.trial`.""" +import copy +import os + import bson import numpy import pytest @@ -14,7 +17,55 @@ def base_trial(): y = {"name": "/y", "value": [1, 2], "type": "integer"} objective = {"name": "objective", "value": 10, "type": "objective"} - return Trial(experiment=1, status="completed", params=[x, y], results=[objective]) + return Trial( + experiment=1, + status="completed", + params=[x, y], + results=[objective], + exp_working_dir="/some/path", + ) + + +@pytest.fixture +def params(): + return [ + dict( + name="/decoding_layer", + type="categorical", + value="lstm_with_attention", + ), + dict(name="/encoding_layer", type="categorical", value="gru"), + ] + + +@pytest.fixture +def trial_config(params): + return dict( + _id="ebcf6c6c8604f96444af1c3e519aea7f", + id_override=None, + experiment="supernaedo2-dendi", + exp_working_dir=None, + status="completed", + worker="23415151", + submit_time="2017-11-22T23:00:00", + start_time=150, + end_time="2017-11-23T00:00:00", + heartbeat=None, + results=[ + dict( + name="objective-name", + type="objective", + value=2, + ), + dict( + name="gradient-name", + type="gradient", + value=[-0.1, 2], + ), + ], + params=params, + parent=None, + ) class TestTrial(object): @@ -31,25 +82,23 @@ def test_init_empty(self): assert t.end_time is None assert t.results == [] assert t.params == {} - assert t.working_dir is None + assert t.exp_working_dir is None - def test_init_full(self, exp_config): + def test_init_full(self, trial_config): """Initialize with a dictionary with complete specification.""" - t = Trial(**exp_config[1][1]) - assert t.experiment == exp_config[1][1]["experiment"] - assert t.status == exp_config[1][1]["status"] - assert t.worker == exp_config[1][1]["worker"] - assert t.submit_time == exp_config[1][1]["submit_time"] - assert t.start_time == exp_config[1][1]["start_time"] - assert t.end_time == exp_config[1][1]["end_time"] - assert ( - list(map(lambda x: x.to_dict(), t.results)) == exp_config[1][1]["results"] - ) - assert t.results[0].name == exp_config[1][1]["results"][0]["name"] - assert t.results[0].type == exp_config[1][1]["results"][0]["type"] - assert t.results[0].value == exp_config[1][1]["results"][0]["value"] - assert list(map(lambda x: x.to_dict(), t._params)) == exp_config[1][1]["params"] - assert t.working_dir is None + t = Trial(**trial_config) + assert t.experiment == trial_config["experiment"] + assert t.status == trial_config["status"] + assert t.worker == trial_config["worker"] + assert t.submit_time == trial_config["submit_time"] + assert t.start_time == trial_config["start_time"] + assert t.end_time == trial_config["end_time"] + assert list(map(lambda x: x.to_dict(), t.results)) == trial_config["results"] + assert t.results[0].name == trial_config["results"][0]["name"] + assert t.results[0].type == trial_config["results"][0]["type"] + assert t.results[0].value == trial_config["results"][0]["value"] + assert list(map(lambda x: x.to_dict(), t._params)) == trial_config["params"] + assert t.exp_working_dir is None def test_higher_shapes_not_ndarray(self): """Test that `numpy.ndarray` values are converted to list.""" @@ -111,10 +160,10 @@ def test_value_not_allowed_type(self): with pytest.raises(ValueError): v.type = "asfda" - def test_conversion_to_dict(self, exp_config): + def test_conversion_to_dict(self, trial_config): """Convert to dictionary form for database using ``dict``.""" - t = Trial(**exp_config[1][1]) - assert t.to_dict() == exp_config[1][1] + t = Trial(**trial_config) + assert t.to_dict() == trial_config def test_build_trials(self, exp_config): """Convert to objects form using `Trial.build`.""" @@ -128,25 +177,25 @@ def test_value_equal(self, exp_config): assert trials[0]._params[0] == Trial.Param(**exp_config[1][0]["params"][0]) assert trials[0]._params[1] != Trial.Param(**exp_config[1][0]["params"][0]) - def test_str_trial(self, exp_config): + def test_str_trial(self, trial_config): """Test representation of `Trial`.""" - t = Trial(**exp_config[1][1]) + t = Trial(**trial_config) assert ( str(t) == "Trial(experiment='supernaedo2-dendi', status='completed', " "params=/decoding_layer:lstm_with_attention,/encoding_layer:gru)" ) - def test_str_value(self, exp_config): + def test_str_value(self, trial_config): """Test representation of `Trial.Value`.""" - t = Trial(**exp_config[1][1]) + t = Trial(**trial_config) assert ( str(t._params[1]) == "Param(name='/encoding_layer', type='categorical', value='gru')" ) - def test_invalid_result(self, exp_config): + def test_invalid_result(self, trial_config): """Test that invalid objectives cannot be set""" - t = Trial(**exp_config[1][1]) + t = Trial(**trial_config) # Make sure valid ones pass t.results = [ @@ -292,9 +341,9 @@ def test_statistics_property(self): assert expected == trial.statistics - def test_params_repr_property(self, exp_config): + def test_params_repr_property(self, trial_config): """Check property `Trial.params_repr`.""" - t = Trial(**exp_config[1][1]) + t = Trial(**trial_config) assert ( Trial.format_params(t._params) == "/decoding_layer:lstm_with_attention,/encoding_layer:gru" @@ -307,9 +356,9 @@ def test_params_repr_property(self, exp_config): t = Trial() assert Trial.format_params(t._params) == "" - def test_hash_name_property(self, exp_config): + def test_hash_name_property(self, trial_config): """Check property `Trial.hash_name`.""" - t = Trial(**exp_config[1][1]) + t = Trial(**trial_config) assert t.hash_name == "ebcf6c6c8604f96444af1c3e519aea7f" t = Trial() @@ -317,49 +366,63 @@ def test_hash_name_property(self, exp_config): t.hash_name assert "params" in str(exc.value) - def test_param_name_property(self, exp_config): + def test_param_name_property(self, trial_config): """Check property `Trial.hash_params`.""" - exp_config[1][1]["params"].append( + trial_config["params"].append( {"name": "/max_epoch", "type": "fidelity", "value": "1"} ) - t1 = Trial(**exp_config[1][1]) - exp_config[1][1]["params"][-1]["value"] = "2" # changing the fidelity - t2 = Trial(**exp_config[1][1]) + t1 = Trial(**trial_config) + trial_config["params"][-1]["value"] = "2" # changing the fidelity + t2 = Trial(**trial_config) assert t1.hash_name != t2.hash_name assert t1.hash_params == t2.hash_params - def test_hash_ignore_experiment(self, exp_config): + def test_hash_ignore_experiment(self, trial_config): """Check property `Trial.compute_trial_hash(ignore_experiment=True)`.""" - exp_config[1][1]["params"].append( + trial_config["params"].append( {"name": "/max_epoch", "type": "fidelity", "value": "1"} ) - t1 = Trial(**exp_config[1][1]) - exp_config[1][1]["experiment"] = "test" # changing the experiment name - t2 = Trial(**exp_config[1][1]) + t1 = Trial(**trial_config) + trial_config["experiment"] = "test" # changing the experiment name + t2 = Trial(**trial_config) assert t1.hash_name != t2.hash_name assert t1.hash_params != t2.hash_params assert Trial.compute_trial_hash( t1, ignore_experiment=True ) == Trial.compute_trial_hash(t2, ignore_experiment=True) - def test_hash_ignore_lie(self, exp_config): + def test_hash_ignore_lie(self, trial_config): """Check property `Trial.compute_trial_hash(ignore_lie=True)`.""" - exp_config[1][1]["params"].append( + trial_config["params"].append( {"name": "/max_epoch", "type": "fidelity", "value": "1"} ) - t1 = Trial(**exp_config[1][1]) + t1 = Trial(**trial_config) # Add a lie - exp_config[1][1]["results"].append({"name": "lie", "type": "lie", "value": 1}) - t2 = Trial(**exp_config[1][1]) + trial_config["results"].append({"name": "lie", "type": "lie", "value": 1}) + t2 = Trial(**trial_config) assert t1.hash_name != t2.hash_name assert t1.hash_params == t2.hash_params assert Trial.compute_trial_hash( t1, ignore_lie=True ) == Trial.compute_trial_hash(t2, ignore_lie=True) - def test_full_name_property(self, exp_config): + def test_hash_ignore_parent(self, trial_config): + """Check property `Trial.compute_trial_hash(ignore_parent=True)`.""" + trial_config["params"].append( + {"name": "/max_epoch", "type": "fidelity", "value": "1"} + ) + t1 = Trial(**trial_config) + trial_config["parent"] = 0 + t2 = Trial(**trial_config) + assert t1.hash_name != t2.hash_name + assert t1.hash_params == t2.hash_params + assert Trial.compute_trial_hash( + t1, ignore_parent=True + ) == Trial.compute_trial_hash(t2, ignore_parent=True) + + def test_full_name_property(self, trial_config): """Check property `Trial.full_name`.""" - t = Trial(**exp_config[1][1]) + t = Trial(**trial_config) assert t.full_name == ".decoding_layer:lstm_with_attention-.encoding_layer:gru" t = Trial() @@ -375,6 +438,54 @@ def test_higher_shape_id_is_same(self): trial.id == Trial(**bson.BSON.decode(bson.BSON.encode(trial.to_dict()))).id ) + def test_equal(self, trial_config): + """Check that two trials are equal based on id""" + + trial_config["params"].append( + {"name": "/max_epoch", "type": "fidelity", "value": "1"} + ) + t1 = Trial(**trial_config) + + def change_attr(attrname, attrvalue): + t2 = Trial(**trial_config) + assert t1 == t2 + setattr(t2, attrname, attrvalue) + return t2 + + t2 = change_attr("parent", 0) + assert t1 != t2 + + params = copy.deepcopy(t1._params) + params[-1].value = "2" + t2 = change_attr("_params", params) + assert t1 != t2 + + t2 = change_attr("exp_working_dir", "whatever") + assert t1 == t2 + + t2 = change_attr("status", "broken") + assert t1 == t2 + + def test_no_exp_working_dir(self): + trial = Trial() + + with pytest.raises(RuntimeError, match="Cannot infer trial's working_dir"): + trial.working_dir + + def test_working_dir(self, tmp_path, params): + trial = Trial(experiment=0, exp_working_dir=tmp_path, params=params, parent=1) + assert trial.working_dir == os.path.join(tmp_path, trial.id) + assert trial.get_working_dir() == os.path.join(tmp_path, trial.id) + + trial._params.append(Trial.Param(name="/epoch", type="fidelity", value=1)) + + assert trial.id != trial.hash_params + assert trial.get_working_dir( + ignore_fidelity=True, ignore_lie=True, ignore_parent=True + ) == os.path.join(tmp_path, trial.hash_params) + + assert trial.get_working_dir(ignore_parent=True) != trial.working_dir + def test_branch_empty(self, base_trial): """Test that branching with no args is only copying""" branched_trial = base_trial.branch() @@ -386,6 +497,9 @@ def test_branch_empty(self, base_trial): assert branched_trial.heartbeat is None assert branched_trial.params == base_trial.params assert branched_trial.objective is None + assert branched_trial.parent == base_trial.id + assert branched_trial.exp_working_dir == base_trial.exp_working_dir + assert branched_trial.id != base_trial.id def test_branch_base_attr(self, base_trial): """Test branching with base attributes (not params)""" @@ -393,6 +507,9 @@ def test_branch_base_attr(self, base_trial): assert branched_trial.status != base_trial.status assert branched_trial.status == "interrupted" assert branched_trial.params == base_trial.params + assert branched_trial.parent == base_trial.id + assert branched_trial.exp_working_dir == base_trial.exp_working_dir + assert branched_trial.id != base_trial.id def test_branch_params(self, base_trial): """Test branching with params""" @@ -401,6 +518,9 @@ def test_branch_params(self, base_trial): assert branched_trial.status == "interrupted" assert branched_trial.params != base_trial.params assert branched_trial.params == {"/x": [1, 2], "/y": [3, 0]} + assert branched_trial.parent == base_trial.id + assert branched_trial.exp_working_dir == base_trial.exp_working_dir + assert branched_trial.id != base_trial.id def test_branch_new_params(self, base_trial): """Test branching with params that are not in base trial"""