Epistimio · bouthilx · Mar 3, 2020 · Mar 3, 2020 · Mar 3, 2020
diff --git a/src/orion/core/worker/producer.py b/src/orion/core/worker/producer.py
@@ -16,6 +16,7 @@
 import orion.core
 from orion.core.io.database import DuplicateKeyError
 from orion.core.utils import format_trials
+from orion.core.worker.trial import Trial
 from orion.core.worker.trials_history import TrialsHistory
 
 log = logging.getLogger(__name__)
@@ -51,6 +52,7 @@ def __init__(self, experiment, max_idle_time=None):
         # TODO: Move trials_history into PrimaryAlgo during the refactoring of Algorithm with
         #       Strategist and Scheduler.
         self.trials_history = TrialsHistory()
+        self.params_hashes = set()
         self.naive_trials_history = None
 
     @property
@@ -78,7 +80,6 @@ def produce(self):
                         self.max_idle_time))
 
             log.debug("### Algorithm suggests new points.")
-
             new_points = self.naive_algorithm.suggest(self.pool_size)
             # Sync state of original algo so that state continues evolving.
             self.algorithm.set_state(self.naive_algorithm.state_dict)
@@ -91,20 +92,33 @@ def produce(self):
                 log.debug("#### Convert point to `Trial` object.")
                 new_trial = format_trials.tuple_to_trial(new_point, self.space)
                 try:
+                    self._prevalidate_trial(new_trial)
                     new_trial.parents = self.naive_trials_history.children
                     log.debug("#### Register new trial to database: %s", new_trial)
                     self.experiment.register_trial(new_trial)
+                    self._update_params_hashes([new_trial])
                     sampled_points += 1
                 except DuplicateKeyError:
                     log.debug("#### Duplicate sample.")
                     self.backoff()
                     break
 
+    def _prevalidate_trial(self, new_trial):
+        """Verify if trial is not in parent history"""
+        if Trial.compute_trial_hash(new_trial, ignore_experiment=True) in self.params_hashes:
+            raise DuplicateKeyError
+
+    def _update_params_hashes(self, trials):
+        """Register locally all param hashes of trials"""
+        for trial in trials:
+            self.params_hashes.add(
+                Trial.compute_trial_hash(trial, ignore_experiment=True, ignore_lie=True))
+
     def update(self):
         """Pull all trials to update model with completed ones and naive model with non completed
         ones.
         """
-        trials = self.experiment.fetch_trials()
+        trials = self.experiment.fetch_trials(with_evc_tree=True)
 
         self._update_algorithm([trial for trial in trials if trial.status == 'completed'])
         self._update_naive_algorithm([trial for trial in trials if trial.status != 'completed'])
@@ -130,6 +144,7 @@ def _update_algorithm(self, completed_trials):
             self.trials_history.update(new_completed_trials)
             self.algorithm.observe(points, results)
             self.strategy.observe(points, results)
+            self._update_params_hashes(new_completed_trials)
 
     def _produce_lies(self, incomplete_trials):
         """Add fake objective results to incomplete trials
@@ -172,3 +187,4 @@ def _update_naive_algorithm(self, incomplete_trials):
             log.debug("### Observe them.")
             self.naive_trials_history.update(lying_trials)
             self.naive_algorithm.observe(points, results)
+            self._update_params_hashes(lying_trials)
diff --git a/src/orion/core/worker/trial.py b/src/orion/core/worker/trial.py
@@ -308,7 +308,7 @@ def hash_params(self):
 
         .. note:: The params contributing to the hash do not include the fidelity.
         """
-        return self.compute_trial_hash(self, ignore_fidelity=True)
+        return self.compute_trial_hash(self, ignore_fidelity=True, ignore_lie=True)
 
     def __hash__(self):
         """Return the hashname for this trial"""
@@ -362,17 +362,21 @@ def format_params(params, sep=',', ignore_fidelity=False):
         return Trial.format_values(params, sep)
 
     @staticmethod
-    def compute_trial_hash(trial, ignore_fidelity=False):
+    def compute_trial_hash(trial, ignore_fidelity=False, ignore_experiment=False,
+                           ignore_lie=False):
         """Generate a unique param md5sum hash for a given `Trial`"""
         if not trial._params and not trial.experiment:
             raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' "
                              "have not been set.")
 
         params = Trial.format_params(trial._params, ignore_fidelity=ignore_fidelity)
-        experiment_repr = str(trial.experiment)
+
+        experiment_repr = ""
+        if not ignore_experiment:
+            experiment_repr = str(trial.experiment)
 
         lie_repr = ""
-        if not ignore_fidelity and trial.lie:
+        if not ignore_lie and trial.lie:
             lie_repr = Trial.format_values([trial.lie])
 
         return hashlib.md5((params + experiment_repr + lie_repr).encode('utf-8')).hexdigest()
diff --git a/tests/unittests/core/test_trial.py b/tests/unittests/core/test_trial.py
@@ -238,6 +238,29 @@ def test_param_name_property(self, exp_config):
         assert t1.hash_name != t2.hash_name
         assert t1.hash_params == t2.hash_params
 
+    def test_hash_ignore_experiment(self, exp_config):
+        """Check property `Trial.compute_trial_hash(ignore_experiment=True)`."""
+        exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'})
+        t1 = Trial(**exp_config[1][1])
+        exp_config[1][1]['experiment'] = 'test'  # changing the experiment name
+        t2 = Trial(**exp_config[1][1])
+        assert t1.hash_name != t2.hash_name
+        assert t1.hash_params != t2.hash_params
+        assert (Trial.compute_trial_hash(t1, ignore_experiment=True) ==
+                Trial.compute_trial_hash(t2, ignore_experiment=True))
+
+    def test_hash_ignore_lie(self, exp_config):
+        """Check property `Trial.compute_trial_hash(ignore_lie=True)`."""
+        exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'})
+        t1 = Trial(**exp_config[1][1])
+        # Add a lie
+        exp_config[1][1]['results'].append({'name': 'lie', 'type': 'lie', 'value': 1})
+        t2 = Trial(**exp_config[1][1])
+        assert t1.hash_name != t2.hash_name
+        assert t1.hash_params == t2.hash_params
+        assert (Trial.compute_trial_hash(t1, ignore_lie=True) ==
+                Trial.compute_trial_hash(t2, ignore_lie=True))
+
     def test_full_name_property(self, exp_config):
         """Check property `Trial.full_name`."""
         t = Trial(**exp_config[1][1])

diff --git a/tests/unittests/core/test_producer.py → tests/unittests/core/worker/test_producer.py b/tests/unittests/core/test_producer.py → tests/unittests/core/worker/test_producer.py
@@ -7,6 +7,8 @@
 
 import pytest
 
+from orion.core.io.experiment_builder import build
+from orion.core.utils.format_trials import trial_to_tuple
 from orion.core.worker.producer import Producer
 from orion.core.worker.trial import Trial
 
@@ -619,3 +621,57 @@ def test_original_seeding(producer, database):
 
     assert prev_suggested != producer.algorithm.algorithm._suggested
     assert prev_index < producer.algorithm.algorithm._index
+
+
+def test_evc(monkeypatch, producer):
+    """Verify that producer is using available trials from EVC"""
+    experiment = producer.experiment
+    new_experiment = build(experiment.name, algorithms='random')
+
+    # Replace parent with hacked exp, otherwise parent ID does not match trials in DB
+    # and fetch_trials() won't return anything.
+    new_experiment._node.parent._item = experiment
+
+    assert len(new_experiment.fetch_trials(with_evc_tree=True)) == len(experiment.fetch_trials())
+
+    producer.experiment = new_experiment
+
+    def update_algo(trials):
+        assert len(trials) == 3
+
+    def update_naive_algo(trials):
+        assert len(trials) == 4
+
+    monkeypatch.setattr(producer, '_update_algorithm', update_algo)
+    monkeypatch.setattr(producer, '_update_naive_algorithm', update_naive_algo)
+
+    producer.update()
+
+
+def test_evc_duplicates(monkeypatch, producer):
+    """Verify that producer wont register samples that are available in parent experiment"""
+    experiment = producer.experiment
+    new_experiment = build(experiment.name, algorithms='random')
+
+    # Replace parent with hacked exp, otherwise parent ID does not match trials in DB
+    # and fetch_trials() won't return anything.
+    new_experiment._node.parent._item = experiment
+
+    assert len(new_experiment.fetch_trials(with_evc_tree=True)) == len(experiment.fetch_trials())
+
+    def suggest(pool_size):
+        return [trial_to_tuple(experiment.fetch_trials()[-1], experiment.space)]
+
+    producer.experiment = new_experiment
+    producer.algorithm = new_experiment.algorithms
+    producer.max_idle_time = 1
+
+    monkeypatch.setattr(new_experiment.algorithms, 'suggest', suggest)
+
+    producer.update()
+    with pytest.raises(RuntimeError) as exc:
+        producer.produce()
+
+    assert exc.match('Algorithm could not sample new points in less')
+
+    assert len(new_experiment.fetch_trials(with_evc_tree=False)) == 0