Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use EVC tree trials in producer #347

Merged
merged 2 commits into from
Mar 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/orion/core/worker/producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import orion.core
from orion.core.io.database import DuplicateKeyError
from orion.core.utils import format_trials
from orion.core.worker.trial import Trial
from orion.core.worker.trials_history import TrialsHistory

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,6 +52,7 @@ def __init__(self, experiment, max_idle_time=None):
# TODO: Move trials_history into PrimaryAlgo during the refactoring of Algorithm with
# Strategist and Scheduler.
self.trials_history = TrialsHistory()
self.params_hashes = set()
self.naive_trials_history = None

@property
Expand Down Expand Up @@ -78,7 +80,6 @@ def produce(self):
self.max_idle_time))

log.debug("### Algorithm suggests new points.")

new_points = self.naive_algorithm.suggest(self.pool_size)
# Sync state of original algo so that state continues evolving.
self.algorithm.set_state(self.naive_algorithm.state_dict)
Expand All @@ -91,20 +92,33 @@ def produce(self):
log.debug("#### Convert point to `Trial` object.")
new_trial = format_trials.tuple_to_trial(new_point, self.space)
try:
self._prevalidate_trial(new_trial)
new_trial.parents = self.naive_trials_history.children
log.debug("#### Register new trial to database: %s", new_trial)
self.experiment.register_trial(new_trial)
self._update_params_hashes([new_trial])
sampled_points += 1
except DuplicateKeyError:
log.debug("#### Duplicate sample.")
self.backoff()
break

def _prevalidate_trial(self, new_trial):
"""Verify if trial is not in parent history"""
if Trial.compute_trial_hash(new_trial, ignore_experiment=True) in self.params_hashes:
raise DuplicateKeyError

def _update_params_hashes(self, trials):
"""Register locally all param hashes of trials"""
for trial in trials:
self.params_hashes.add(
Trial.compute_trial_hash(trial, ignore_experiment=True, ignore_lie=True))

def update(self):
"""Pull all trials to update model with completed ones and naive model with non completed
ones.
"""
trials = self.experiment.fetch_trials()
trials = self.experiment.fetch_trials(with_evc_tree=True)

self._update_algorithm([trial for trial in trials if trial.status == 'completed'])
self._update_naive_algorithm([trial for trial in trials if trial.status != 'completed'])
Expand All @@ -130,6 +144,7 @@ def _update_algorithm(self, completed_trials):
self.trials_history.update(new_completed_trials)
self.algorithm.observe(points, results)
self.strategy.observe(points, results)
self._update_params_hashes(new_completed_trials)

def _produce_lies(self, incomplete_trials):
"""Add fake objective results to incomplete trials
Expand Down Expand Up @@ -172,3 +187,4 @@ def _update_naive_algorithm(self, incomplete_trials):
log.debug("### Observe them.")
self.naive_trials_history.update(lying_trials)
self.naive_algorithm.observe(points, results)
self._update_params_hashes(lying_trials)
12 changes: 8 additions & 4 deletions src/orion/core/worker/trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def hash_params(self):

.. note:: The params contributing to the hash do not include the fidelity.
"""
return self.compute_trial_hash(self, ignore_fidelity=True)
return self.compute_trial_hash(self, ignore_fidelity=True, ignore_lie=True)

def __hash__(self):
"""Return the hashname for this trial"""
Expand Down Expand Up @@ -362,17 +362,21 @@ def format_params(params, sep=',', ignore_fidelity=False):
return Trial.format_values(params, sep)

@staticmethod
def compute_trial_hash(trial, ignore_fidelity=False):
def compute_trial_hash(trial, ignore_fidelity=False, ignore_experiment=False,
ignore_lie=False):
"""Generate a unique param md5sum hash for a given `Trial`"""
if not trial._params and not trial.experiment:
raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' "
"have not been set.")

params = Trial.format_params(trial._params, ignore_fidelity=ignore_fidelity)
experiment_repr = str(trial.experiment)

experiment_repr = ""
if not ignore_experiment:
experiment_repr = str(trial.experiment)

lie_repr = ""
if not ignore_fidelity and trial.lie:
if not ignore_lie and trial.lie:
lie_repr = Trial.format_values([trial.lie])

return hashlib.md5((params + experiment_repr + lie_repr).encode('utf-8')).hexdigest()
23 changes: 23 additions & 0 deletions tests/unittests/core/test_trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,29 @@ def test_param_name_property(self, exp_config):
assert t1.hash_name != t2.hash_name
assert t1.hash_params == t2.hash_params

def test_hash_ignore_experiment(self, exp_config):
"""Check property `Trial.compute_trial_hash(ignore_experiment=True)`."""
exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'})
t1 = Trial(**exp_config[1][1])
exp_config[1][1]['experiment'] = 'test' # changing the experiment name
t2 = Trial(**exp_config[1][1])
assert t1.hash_name != t2.hash_name
assert t1.hash_params != t2.hash_params
assert (Trial.compute_trial_hash(t1, ignore_experiment=True) ==
Trial.compute_trial_hash(t2, ignore_experiment=True))

def test_hash_ignore_lie(self, exp_config):
"""Check property `Trial.compute_trial_hash(ignore_lie=True)`."""
exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'})
t1 = Trial(**exp_config[1][1])
# Add a lie
exp_config[1][1]['results'].append({'name': 'lie', 'type': 'lie', 'value': 1})
t2 = Trial(**exp_config[1][1])
assert t1.hash_name != t2.hash_name
assert t1.hash_params == t2.hash_params
assert (Trial.compute_trial_hash(t1, ignore_lie=True) ==
Trial.compute_trial_hash(t2, ignore_lie=True))

def test_full_name_property(self, exp_config):
"""Check property `Trial.full_name`."""
t = Trial(**exp_config[1][1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import pytest

from orion.core.io.experiment_builder import build
from orion.core.utils.format_trials import trial_to_tuple
from orion.core.worker.producer import Producer
from orion.core.worker.trial import Trial

Expand Down Expand Up @@ -619,3 +621,57 @@ def test_original_seeding(producer, database):

assert prev_suggested != producer.algorithm.algorithm._suggested
assert prev_index < producer.algorithm.algorithm._index


def test_evc(monkeypatch, producer):
"""Verify that producer is using available trials from EVC"""
experiment = producer.experiment
new_experiment = build(experiment.name, algorithms='random')

# Replace parent with hacked exp, otherwise parent ID does not match trials in DB
# and fetch_trials() won't return anything.
new_experiment._node.parent._item = experiment

assert len(new_experiment.fetch_trials(with_evc_tree=True)) == len(experiment.fetch_trials())

producer.experiment = new_experiment

def update_algo(trials):
assert len(trials) == 3

def update_naive_algo(trials):
assert len(trials) == 4

monkeypatch.setattr(producer, '_update_algorithm', update_algo)
monkeypatch.setattr(producer, '_update_naive_algorithm', update_naive_algo)

producer.update()


def test_evc_duplicates(monkeypatch, producer):
"""Verify that producer wont register samples that are available in parent experiment"""
experiment = producer.experiment
new_experiment = build(experiment.name, algorithms='random')

# Replace parent with hacked exp, otherwise parent ID does not match trials in DB
# and fetch_trials() won't return anything.
new_experiment._node.parent._item = experiment

assert len(new_experiment.fetch_trials(with_evc_tree=True)) == len(experiment.fetch_trials())

def suggest(pool_size):
return [trial_to_tuple(experiment.fetch_trials()[-1], experiment.space)]

producer.experiment = new_experiment
producer.algorithm = new_experiment.algorithms
producer.max_idle_time = 1

monkeypatch.setattr(new_experiment.algorithms, 'suggest', suggest)

producer.update()
with pytest.raises(RuntimeError) as exc:
producer.produce()

assert exc.match('Algorithm could not sample new points in less')

assert len(new_experiment.fetch_trials(with_evc_tree=False)) == 0