From dce2c9dab25804b5d36e61a612b6ed5abbdfbf40 Mon Sep 17 00:00:00 2001
From: donglinjy <cloudbox0711@163.com>
Date: Fri, 8 May 2020 21:41:02 +0800
Subject: [PATCH 1/6] tpe discrete categorical space support

---
 docs/src/user/algorithms.rst         |   4 +
 src/orion/algo/tpe.py                | 173 +++++++++++++++++----
 tests/functional/algos/test_algos.py |   6 +-
 tests/unittests/algo/test_tpe.py     | 216 ++++++++++++++++++++++++---
 4 files changed, 345 insertions(+), 54 deletions(-)

diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst
index 399edcc1e..e276e5671 100644
--- a/docs/src/user/algorithms.rst
+++ b/docs/src/user/algorithms.rst
@@ -210,6 +210,10 @@ the most promising point among the candidates.
 .. _Tree-structured Parzen Estimator:
     https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
 
+.. note::
+
+   Current implementation only supports uniform, uniform discrete and choices as prior.
+   As for choices prior, the probabilities if any given will be ignored.
 
 Configuration
 ~~~~~~~~~~~~~
diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
index d3db72022..a931e5f38 100644
--- a/src/orion/algo/tpe.py
+++ b/src/orion/algo/tpe.py
@@ -36,6 +36,23 @@ def compute_max_ei_point(points, below_likelis, above_likelis):
     return points[point_index]
 
 
+def ramp_up_weights(total_num, flat_num, equal_weight):
+    """Adjust weights of observed trials.
+
+    :param total_num: total number of observed trials.
+    :param flat_num: the number of the most recent trials which
+        get the full weight where the others will be applied with a linear ramp
+        from 0 to 1.0. It will only take effect if equal_weight is False.
+    :param equal_weight: whether all the observed trails share the same weights.
+    """
+    if total_num < flat_num or equal_weight:
+        return numpy.ones(total_num)
+
+    ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num)
+    flat_weights = numpy.ones(flat_num)
+    return numpy.concatenate([ramp_weights, flat_weights])
+
+
 # pylint:disable=assignment-from-no-return
 def adaptive_parzen_estimator(mus, low, high,
                               prior_weight=1.0,
@@ -56,15 +73,6 @@ def adaptive_parzen_estimator(mus, low, high,
         get the full weight where the others will be applied with a linear ramp
         from 0 to 1.0. It will only take effect if equal_weight is False.
     """
-    def update_weights(total_num):
-        """Generate weights for all components"""
-        if total_num < flat_num or equal_weight:
-            return numpy.ones(total_num)
-
-        ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num)
-        flat_weights = numpy.ones(flat_num)
-        return numpy.concatenate([ramp_weights, flat_weights])
-
     mus = numpy.asarray(mus)
 
     prior_mu = (low + high) * 0.5
@@ -76,7 +84,7 @@ def update_weights(total_num):
         sorted_mus = mus[order]
         prior_mu_pos = numpy.searchsorted(sorted_mus, prior_mu)
 
-        weights = update_weights(size)
+        weights = ramp_up_weights(size, flat_num, equal_weight)
 
         mixture_mus = numpy.zeros(size + 1)
         mixture_mus[:prior_mu_pos] = sorted_mus[:prior_mu_pos]
@@ -163,7 +171,7 @@ class TPE(BaseAlgorithm):
 
     # pylint:disable=too-many-arguments
     def __init__(self, space, seed=None,
-                 n_initial_points=20, n_ei_candidates=24,
+                 n_initial_points=5, n_ei_candidates=24,
                  gamma=0.25, equal_weight=False,
                  prior_weight=1.0, full_weight_num=25):
 
@@ -177,12 +185,13 @@ def __init__(self, space, seed=None,
                                   full_weight_num=full_weight_num)
 
         for dimension in self.space.values():
+            if dimension.type not in ['real', 'integer', 'categorical']:
+                raise ValueError("TPE now only supports Real, Integer "
+                                 "and Categorical Dimension.")
 
-            if dimension.type not in ['real']:
-                raise ValueError("TPE now only supports Real Dimension.")
-
-            if dimension.prior_name not in ['uniform']:
-                raise ValueError("TPE now only supports uniform as prior.")
+            if dimension.prior_name not in ['uniform', 'int_uniform', 'choices']:
+                raise ValueError("TPE now only supports uniform, uniform discrete "
+                                 "and choices as prior.")
 
             shape = dimension.shape
             if shape and len(shape) != 1:
@@ -245,8 +254,11 @@ def suggest(self, num=1):
         else:
             point = []
             below_points, above_points = self.split_trials()
-            below_points = numpy.array([flatten_dims(point, self.space) for point in below_points])
-            above_points = numpy.array([flatten_dims(point, self.space) for point in above_points])
+
+            below_points = [flatten_dims(point, self.space) for point in below_points]
+            above_points = [flatten_dims(point, self.space) for point in above_points]
+            below_points = list(map(list, zip(*below_points)))
+            above_points = list(map(list, zip(*above_points)))
 
             idx = 0
             for dimension in self.space.values():
@@ -255,12 +267,23 @@ def suggest(self, num=1):
                 if not shape:
                     shape = (1,)
 
-                if dimension.type == 'real':
-                    points = self.sample_real_dimension(dimension, shape[0],
-                                                        below_points[:, idx: idx + shape[0]],
-                                                        above_points[:, idx: idx + shape[0]])
+                if dimension.type == 'real' and dimension.prior_name == 'uniform':
+                    points = self.sample_one_dimension(dimension, shape[0],
+                                                       below_points[idx: idx + shape[0]],
+                                                       above_points[idx: idx + shape[0]],
+                                                       self._sample_real_point)
+                elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform':
+                    points = self.sample_one_dimension(dimension, shape[0],
+                                                       below_points[idx: idx + shape[0]],
+                                                       above_points[idx: idx + shape[0]],
+                                                       self._sample_int_point)
+                elif dimension.type == 'categorical' and dimension.prior_name == 'choices':
+                    points = self.sample_one_dimension(dimension, shape[0],
+                                                       below_points[idx: idx + shape[0]],
+                                                       above_points[idx: idx + shape[0]],
+                                                       self._sample_categorical_point)
                 else:
-                    raise ValueError("TPE now only support Real Dimension.")
+                    raise NotImplementedError()
 
                 if len(points) < shape[0]:
                     logger.warning('TPE failed to sample new point with configuration %s',
@@ -275,25 +298,27 @@ def suggest(self, num=1):
 
         return samples
 
-    def sample_real_dimension(self, dimension, shape_size, below_points, above_points):
-        """Sample values for a real dimension
+    # pylint:disable=no-self-use
+    def sample_one_dimension(self, dimension, shape_size, below_points, above_points, sampler):
+        """Sample values for a dimension
 
-        :param dimension: Real Dimension.
+        :param dimension: Dimension.
         :param shape_size: 1D Shape Size of the Real Dimension.
         :param below_points: good points with shape (m, n), n=shape_size.
         :param above_points: bad points with shape (m, n), n=shape_size.
+        :param sampler: method to sample one value for upon the dimension.
         """
         points = []
 
         for j in range(shape_size):
-            new_point = self._sample_real_point(dimension, below_points[:, j], above_points[:, j])
-            if new_point:
+            new_point = sampler(dimension, below_points[j], above_points[j])
+            if new_point is not None:
                 points.append(new_point)
 
         return points
 
     def _sample_real_point(self, dimension, below_points, above_points):
-        """Sample one value for a real dimension based on the observed good and bad points"""
+        """Sample one value for real dimension based on the observed good and bad points"""
         low, high = dimension.interval()
         below_mus, below_sigmas, below_weights = \
             adaptive_parzen_estimator(below_points, low, high, self.prior_weight,
@@ -314,6 +339,51 @@ def _sample_real_point(self, dimension, below_points, above_points):
 
         return None
 
+    def _sample_int_point(self, dimension, below_points, above_points):
+        """Sample one value for integer dimension based on the observed good and bad points"""
+        low, high = dimension.interval()
+        choices = range(low, high)
+
+        below_points = numpy.array(below_points).astype(int) - low
+        above_points = numpy.array(above_points).astype(int) - low
+
+        sampler_below = CategoricalSampler(self, below_points, choices)
+        candidate_points = sampler_below.sample(self.n_ei_candidates)
+
+        if list(candidate_points):
+            sampler_above = CategoricalSampler(self, above_points, choices)
+
+            lik_blow = sampler_below.get_loglikelis(candidate_points)
+            lik_above = sampler_above.get_loglikelis(candidate_points)
+
+            new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above)
+            new_point = new_point + low
+            return new_point
+
+        return None
+
+    def _sample_categorical_point(self, dimension, below_points, above_points):
+        """Sample one value for categorical dimension based on the observed good and bad points"""
+        choices = dimension.categories
+
+        below_points = [choices.index(point) for point in below_points]
+        above_points = [choices.index(point) for point in above_points]
+
+        sampler_below = CategoricalSampler(self, below_points, choices)
+        candidate_points = sampler_below.sample(self.n_ei_candidates)
+
+        if list(candidate_points):
+            sampler_above = CategoricalSampler(self, above_points, choices)
+
+            lik_blow = sampler_below.get_loglikelis(candidate_points)
+            lik_above = sampler_above.get_loglikelis(candidate_points)
+
+            new_point_index = compute_max_ei_point(candidate_points, lik_blow, lik_above)
+            new_point = choices[new_point_index]
+
+            return new_point
+        return None
+
     def split_trials(self):
         """Split the observed trials into good and bad ones based on the ratio `gamma``"""
         sorted_trials = sorted(self._trials_info.values(), key=lambda x: x[1]['objective'])
@@ -402,3 +472,48 @@ def get_loglikelis(self, points):
                                                axis=1))
 
         return point_likeli
+
+
+class CategoricalSampler():
+    """Categorical Sampler for discrete integer and categorical choices
+
+    Parameters
+    ----------
+    tpe: `TPE` algorithm
+        The tpe algorithm object which this sampler will be part of.
+    observations: list
+        Observed values in the dimension
+    choices: list
+        Candidate values for the dimension
+
+    """
+
+    def __init__(self, tpe, observations, choices):
+        self.tpe = tpe
+        self.obs = observations
+        self.choices = choices
+
+        self._build_multinomial_weights()
+
+    def _build_multinomial_weights(self):
+        """Build weights for categorical distribution based on observations"""
+        weights_obs = ramp_up_weights(len(self.obs),
+                                      self.tpe.full_weight_num, self.tpe.equal_weight)
+        counts_obs = numpy.bincount(self.obs, minlength=len(self.choices), weights=weights_obs)
+        counts_obs = counts_obs + self.tpe.prior_weight
+        self.weights = counts_obs / counts_obs.sum()
+
+    def sample(self, num=1):
+        """Sample required number of points"""
+        samples = self.tpe.rng.multinomial(n=1, pvals=self.weights, size=num)
+
+        assert samples.shape == (num,) + (len(self.weights),)
+
+        samples_index = samples.argmax(-1)
+        assert samples_index.shape == (num,)
+
+        return samples_index
+
+    def get_loglikelis(self, points):
+        """Return the log likelihood for the points"""
+        return numpy.log(numpy.asarray(self.weights)[points])
diff --git a/tests/functional/algos/test_algos.py b/tests/functional/algos/test_algos.py
index b30197a86..8cbdaaf6d 100644
--- a/tests/functional/algos/test_algos.py
+++ b/tests/functional/algos/test_algos.py
@@ -71,11 +71,9 @@ def test_simple(monkeypatch, config_file):
 
 @pytest.mark.usefixtures("clean_db")
 @pytest.mark.usefixtures("null_db_instances")
-def test_random_stop(monkeypatch):
+@pytest.mark.parametrize('config_file', config_files)
+def test_random_stop(monkeypatch, config_file):
     """Test a simple usage scenario."""
-    # TODO: TPE should support this case once discrete is added,
-    #  then parametrized config_file should be used
-    config_file = 'random_config.yaml'
     monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__)))
     orion.core.cli.main(["hunt", "--config", config_file,
                          "./black_box.py", "-x~uniform(-10, 5, discrete=True)"])
diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py
index 87f5e3788..cfbaac570 100644
--- a/tests/unittests/algo/test_tpe.py
+++ b/tests/unittests/algo/test_tpe.py
@@ -6,19 +6,26 @@
 import pytest
 from scipy.stats import norm
 
-from orion.algo.space import Integer, Real, Space
-from orion.algo.tpe import adaptive_parzen_estimator, compute_max_ei_point, GMMSampler, TPE
+from orion.algo.space import Categorical, Fidelity, Integer, Real, Space
+from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \
+    compute_max_ei_point, GMMSampler, ramp_up_weights, TPE
 
 
 @pytest.fixture()
 def space():
     """Return an optimization space"""
     space = Space()
+
     dim1 = Real('yolo1', 'uniform', -10, 20)
     space.register(dim1)
-    dim2 = Real('yolo2', 'uniform', -5, 10)
+
+    dim2 = Integer('yolo2', 'uniform', -5, 10)
     space.register(dim2)
 
+    categories = ['a', 0.1, 2, 'c']
+    dim3 = Categorical('yolo3', categories)
+    space.register(dim3)
+
     return space
 
 
@@ -42,6 +49,26 @@ def test_compute_max_ei_point():
     assert max_ei_point == points[max_ei_index]
 
 
+def test_ramp_up_weights():
+    """Test TPE adjust observed points correctly"""
+    weights = ramp_up_weights(25, 15, True)
+    assert len(weights) == 25
+    assert numpy.all(weights == 1.0)
+
+    weights = ramp_up_weights(25, 15, False)
+    assert len(weights) == 25
+    assert numpy.all(weights[:10] == (numpy.linspace(1.0 / 25, 1.0, num=10)))
+    assert numpy.all(weights[10:] == 1.0)
+
+    weights = ramp_up_weights(10, 15, False)
+    assert len(weights) == 10
+    assert numpy.all(weights == 1.0)
+
+    weights = ramp_up_weights(25, 0, False)
+    assert len(weights) == 25
+    assert numpy.all(weights == (numpy.linspace(1.0 / 25, 1.0, num=25)))
+
+
 def test_adaptive_parzen_normal_estimator():
     """Test adaptive parzen estimator"""
     low = -1
@@ -157,6 +184,87 @@ def test_adaptive_parzen_normal_estimator_sigma_clip():
     assert numpy.all(sigmas <= 6) and numpy.all(sigmas >= 6 / 100)
 
 
+class TestCategoricalSampler():
+    """Tests for TPE Categorical Sampler"""
+
+    def test_cat_sampler_creation(self, tpe):
+        """Test CategoricalSampler creation"""
+        obs = [0, 3, 9]
+        choices = list(range(-5, 5))
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+        assert len(cat_sampler.weights) == len(choices)
+
+        obs = [0, 3, 9]
+        choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c']
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+
+        assert len(cat_sampler.weights) == len(choices)
+
+        tpe.equal_weight = True
+        tpe.prior_weight = 1.0
+        obs = numpy.random.randint(0, 10, 100)
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+        counts_obs = numpy.bincount(obs) + 1.0
+        weights = counts_obs / counts_obs.sum()
+
+        assert numpy.all(cat_sampler.weights == weights)
+
+        tpe.equal_weight = False
+        tpe.prior_weight = 0.5
+        tpe.full_weight_num = 30
+        obs = numpy.random.randint(0, 10, 100)
+
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+
+        ramp = numpy.linspace(1.0 / 100, 1.0, num=100 - 30)
+        full = numpy.ones(30)
+        ramp_weights = (numpy.concatenate([ramp, full]))
+
+        counts_obs = numpy.bincount(obs, weights=ramp_weights) + 0.5
+        weights = counts_obs / counts_obs.sum()
+
+        assert numpy.all(cat_sampler.weights == weights)
+
+    def test_sample(self, tpe):
+        """Test CategoricalSampler sample function"""
+        obs = numpy.random.randint(0, 10, 100)
+        choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c']
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+
+        points = cat_sampler.sample(25)
+
+        assert len(points) == 25
+        assert numpy.all(points >= 0)
+        assert numpy.all(points < 10)
+
+        weights = numpy.linspace(1, 10, num=10) ** 3
+        numpy.random.shuffle(weights)
+        weights = weights / weights.sum()
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+        cat_sampler.weights = weights
+
+        points = cat_sampler.sample(10000)
+        points = numpy.array(points)
+        hist = numpy.bincount(points)
+
+        assert numpy.all(hist.argsort() == weights.argsort())
+        assert len(points) == 10000
+        assert numpy.all(points >= 0)
+        assert numpy.all(points < 10)
+
+    def test_get_loglikelis(self, tpe):
+        """Test to get log likelis of points"""
+        obs = numpy.random.randint(0, 10, 100)
+        choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c']
+        cat_sampler = CategoricalSampler(tpe, obs, choices)
+
+        points = cat_sampler.sample(25)
+
+        likelis = cat_sampler.get_loglikelis(points)
+
+        assert numpy.all(likelis == numpy.log(numpy.asarray(cat_sampler.weights)[points]))
+
+
 class TestGMMSampler():
     """Tests for TPE GMM Sampler"""
 
@@ -262,13 +370,13 @@ def test_set_state(self, tpe):
     def test_unsupported_space(self):
         """Test tpe only work for supported search space"""
         space = Space()
-        dim = Integer('yolo1', 'uniform', -2, 4)
+        dim = Fidelity('epoch', 1, 9, 3)
         space.register(dim)
 
         with pytest.raises(ValueError) as ex:
             TPE(space)
 
-        assert 'TPE now only supports Real Dimension' in str(ex.value)
+        assert 'TPE now only supports Real, Integer and Categorical Dimension' in str(ex.value)
 
         space = Space()
         dim = Real('yolo1', 'norm', 0.9)
@@ -277,7 +385,7 @@ def test_unsupported_space(self):
         with pytest.raises(ValueError) as ex:
             TPE(space)
 
-        assert 'TPE now only supports uniform as prior' in str(ex.value)
+        assert 'TPE now only supports uniform, uniform discrete and choices' in str(ex.value)
 
         space = Space()
         dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1))
@@ -316,6 +424,69 @@ def test_split_trials(self, tpe):
         assert below_points == [[-3.0], [-2.4]]
         assert len(above_points) == 8
 
+    def test_sample_int_dimension(self):
+        """Test sample values for a integer dimension"""
+        space = Space()
+        dim1 = Integer('yolo1', 'uniform', -10, 20)
+        space.register(dim1)
+
+        dim2 = Integer('yolo2', 'uniform', -5, 10, shape=(2))
+        space.register(dim2)
+
+        tpe = TPE(space)
+
+        obs_points = numpy.random.randint(-10, 10, 100)
+        below_points = [obs_points[:25]]
+        above_points = [obs_points[25:]]
+        points = tpe.sample_one_dimension(dim1, 1,
+                                          below_points, above_points, tpe._sample_int_point)
+        assert len(points) == 1
+
+        obs_points = numpy.random.randint(-5, 5, 100)
+        below_points = [obs_points[:25], obs_points[25:50]]
+        above_points = [obs_points[50:75], obs_points[75:]]
+        points = tpe.sample_one_dimension(dim2, 2,
+                                          below_points, above_points, tpe._sample_int_point)
+        assert len(points) == 2
+
+        tpe.n_ei_candidates = 0
+        points = tpe.sample_one_dimension(dim2, 2,
+                                          below_points, above_points, tpe._sample_int_point)
+        assert len(points) == 0
+
+    def test_sample_categorical_dimension(self):
+        """Test sample values for a categorical dimension"""
+        space = Space()
+        categories = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c']
+        dim1 = Categorical('yolo1', categories)
+        space.register(dim1)
+        dim2 = Categorical('yolo2', categories, shape=(2))
+        space.register(dim2)
+
+        tpe = TPE(space)
+
+        obs_points = numpy.random.randint(0, 10, 100)
+        obs_points = [categories[point] for point in obs_points]
+        below_points = [obs_points[:25]]
+        above_points = [obs_points[25:]]
+        points = tpe.sample_one_dimension(dim1, 1,
+                                          below_points, above_points, tpe._sample_categorical_point)
+        assert len(points) == 1
+
+        obs_points = numpy.random.randint(0, 10, 100)
+        obs_points = [categories[point] for point in obs_points]
+        below_points = [obs_points[:25], obs_points[25:50]]
+        above_points = [obs_points[50:75], obs_points[75:]]
+
+        points = tpe.sample_one_dimension(dim2, 2,
+                                          below_points, above_points, tpe._sample_categorical_point)
+        assert len(points) == 2
+
+        tpe.n_ei_candidates = 0
+        points = tpe.sample_one_dimension(dim2, 2,
+                                          below_points, above_points, tpe._sample_categorical_point)
+        assert len(points) == 0
+
     def test_sample_real_dimension(self):
         """Test sample values for a real dimension"""
         space = Space()
@@ -325,20 +496,23 @@ def test_sample_real_dimension(self):
         space.register(dim2)
 
         tpe = TPE(space)
-        points = numpy.random.uniform(-10, 10, 20).reshape(20, 1)
-        below_points = points[:6, :]
-        above_points = points[6:, :]
-        points = tpe.sample_real_dimension(dim1, 1, below_points, above_points)
+        points = numpy.random.uniform(-10, 10, 20)
+        below_points = [points[:8]]
+        above_points = [points[8:]]
+        points = tpe.sample_one_dimension(dim1, 1,
+                                          below_points, above_points, tpe._sample_real_point)
         assert len(points) == 1
 
-        points = numpy.random.uniform(-5, 5, 32).reshape(16, 2)
-        below_points = points[:4, :]
-        above_points = points[4:, :]
-        points = tpe.sample_real_dimension(dim2, 2, below_points, above_points)
+        points = numpy.random.uniform(-5, 5, 32)
+        below_points = [points[:8], points[8:16]]
+        above_points = [points[16:24], points[24:]]
+        points = tpe.sample_one_dimension(dim2, 2,
+                                          below_points, above_points, tpe._sample_real_point)
         assert len(points) == 2
 
         tpe.n_ei_candidates = 0
-        points = tpe.sample_real_dimension(dim2, 2, below_points, above_points)
+        points = tpe.sample_one_dimension(dim2, 2,
+                                          below_points, above_points, tpe._sample_real_point)
         assert len(points) == 0
 
     def test_suggest(self, tpe):
@@ -348,13 +522,13 @@ def test_suggest(self, tpe):
         for i in range(10):
             point = tpe.suggest(1)
             assert len(point) == 1
-            assert len(point[0]) == 2
+            assert len(point[0]) == 3
             assert not isinstance(point[0][0], tuple)
             tpe.observe(point, [{'objective': results[i]}])
 
         point = tpe.suggest(1)
         assert len(point) == 1
-        assert len(point[0]) == 2
+        assert len(point[0]) == 3
         assert not isinstance(point[0][0], tuple)
 
     def test_1d_shape(self, tpe):
@@ -383,7 +557,7 @@ def test_1d_shape(self, tpe):
 
     def test_suggest_initial_points(self, tpe, monkeypatch):
         """Test that initial points can be sampled correctly"""
-        points = [(i, i**2) for i in range(1, 12)]
+        points = [(i, i - 6, 'c') for i in range(1, 12)]
 
         global index
         index = 0
@@ -400,11 +574,11 @@ def sample(num=1, seed=None):
         results = numpy.random.random(10)
         for i in range(1, 11):
             point = tpe.suggest(1)[0]
-            assert point == (i, i**2)
+            assert point == (i, i - 6, 'c')
             tpe.observe([point], [{'objective': results[i - 1]}])
 
         point = tpe.suggest(1)[0]
-        assert point != (11, 11 * 2)
+        assert point != (11, 5, 'c')
 
     def test_suggest_ei_candidates(self, tpe):
         """Test suggest with no shape dimensions"""
@@ -415,7 +589,7 @@ def test_suggest_ei_candidates(self, tpe):
         for i in range(2):
             point = tpe.suggest(1)
             assert len(point) == 1
-            assert len(point[0]) == 2
+            assert len(point[0]) == 3
             assert not isinstance(point[0][0], tuple)
             tpe.observe(point, [{'objective': results[i]}])
 

From 7bc68088ab1f9fb49bf580ab203691ba1213a54d Mon Sep 17 00:00:00 2001
From: donglinjy <cloudbox0711@163.com>
Date: Tue, 12 May 2020 17:29:45 +0800
Subject: [PATCH 2/6] refine some code

---
 src/orion/algo/tpe.py            | 12 ++++-----
 tests/unittests/algo/test_tpe.py | 42 ++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
index a931e5f38..3263b36a0 100644
--- a/src/orion/algo/tpe.py
+++ b/src/orion/algo/tpe.py
@@ -304,8 +304,8 @@ def sample_one_dimension(self, dimension, shape_size, below_points, above_points
 
         :param dimension: Dimension.
         :param shape_size: 1D Shape Size of the Real Dimension.
-        :param below_points: good points with shape (m, n), n=shape_size.
-        :param above_points: bad points with shape (m, n), n=shape_size.
+        :param below_points: good points with shape (m, n), m=shape_size.
+        :param above_points: bad points with shape (m, n), m=shape_size.
         :param sampler: method to sample one value for upon the dimension.
         """
         points = []
@@ -353,10 +353,10 @@ def _sample_int_point(self, dimension, below_points, above_points):
         if list(candidate_points):
             sampler_above = CategoricalSampler(self, above_points, choices)
 
-            lik_blow = sampler_below.get_loglikelis(candidate_points)
+            lik_below = sampler_below.get_loglikelis(candidate_points)
             lik_above = sampler_above.get_loglikelis(candidate_points)
 
-            new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above)
+            new_point = compute_max_ei_point(candidate_points, lik_below, lik_above)
             new_point = new_point + low
             return new_point
 
@@ -375,10 +375,10 @@ def _sample_categorical_point(self, dimension, below_points, above_points):
         if list(candidate_points):
             sampler_above = CategoricalSampler(self, above_points, choices)
 
-            lik_blow = sampler_below.get_loglikelis(candidate_points)
+            lik_below = sampler_below.get_loglikelis(candidate_points)
             lik_above = sampler_above.get_loglikelis(candidate_points)
 
-            new_point_index = compute_max_ei_point(candidate_points, lik_blow, lik_above)
+            new_point_index = compute_max_ei_point(candidate_points, lik_below, lik_above)
             new_point = choices[new_point_index]
 
             return new_point
diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py
index cfbaac570..15cb1ec81 100644
--- a/tests/unittests/algo/test_tpe.py
+++ b/tests/unittests/algo/test_tpe.py
@@ -440,14 +440,29 @@ def test_sample_int_dimension(self):
         above_points = [obs_points[25:]]
         points = tpe.sample_one_dimension(dim1, 1,
                                           below_points, above_points, tpe._sample_int_point)
+        points = numpy.asarray(points)
         assert len(points) == 1
+        assert all(points >= -10)
+        assert all(points < 10)
+
+        obs_points_below = numpy.random.randint(-10, 0, 25).reshape(1, 25)
+        obs_points_above = numpy.random.randint(0, 10, 75).reshape(1, 75)
+        points = tpe.sample_one_dimension(dim1, 1,
+                                          obs_points_below, obs_points_above, tpe._sample_int_point)
+        points = numpy.asarray(points)
+        assert len(points) == 1
+        assert all(points >= -10)
+        assert all(points < 0)
 
         obs_points = numpy.random.randint(-5, 5, 100)
         below_points = [obs_points[:25], obs_points[25:50]]
         above_points = [obs_points[50:75], obs_points[75:]]
         points = tpe.sample_one_dimension(dim2, 2,
                                           below_points, above_points, tpe._sample_int_point)
+        points = numpy.asarray(points)
         assert len(points) == 2
+        assert all(points >= -10)
+        assert all(points < 10)
 
         tpe.n_ei_candidates = 0
         points = tpe.sample_one_dimension(dim2, 2,
@@ -472,6 +487,16 @@ def test_sample_categorical_dimension(self):
         points = tpe.sample_one_dimension(dim1, 1,
                                           below_points, above_points, tpe._sample_categorical_point)
         assert len(points) == 1
+        assert points[0] in categories
+
+        obs_points_below = numpy.random.randint(0, 3, 25)
+        obs_points_above = numpy.random.randint(3, 10, 75)
+        below_points = [[categories[point] for point in obs_points_below]]
+        above_points = [[categories[point] for point in obs_points_above]]
+        points = tpe.sample_one_dimension(dim1, 1,
+                                          below_points, above_points, tpe._sample_categorical_point)
+        assert len(points) == 1
+        assert points[0] in categories[:3]
 
         obs_points = numpy.random.randint(0, 10, 100)
         obs_points = [categories[point] for point in obs_points]
@@ -481,6 +506,8 @@ def test_sample_categorical_dimension(self):
         points = tpe.sample_one_dimension(dim2, 2,
                                           below_points, above_points, tpe._sample_categorical_point)
         assert len(points) == 2
+        assert points[0] in categories
+        assert points[1] in categories
 
         tpe.n_ei_candidates = 0
         points = tpe.sample_one_dimension(dim2, 2,
@@ -501,14 +528,29 @@ def test_sample_real_dimension(self):
         above_points = [points[8:]]
         points = tpe.sample_one_dimension(dim1, 1,
                                           below_points, above_points, tpe._sample_real_point)
+        points = numpy.asarray(points)
+        assert len(points) == 1
+        assert all(points >= -10)
+        assert all(points < 10)
+
+        below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25)
+        above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75)
+        points = tpe.sample_one_dimension(dim1, 1,
+                                          below_points, above_points, tpe._sample_real_point)
+        points = numpy.asarray(points)
         assert len(points) == 1
+        assert all(points >= -10)
+        assert all(points < 0)
 
         points = numpy.random.uniform(-5, 5, 32)
         below_points = [points[:8], points[8:16]]
         above_points = [points[16:24], points[24:]]
         points = tpe.sample_one_dimension(dim2, 2,
                                           below_points, above_points, tpe._sample_real_point)
+        points = numpy.asarray(points)
         assert len(points) == 2
+        assert all(points >= -10)
+        assert all(points < 10)
 
         tpe.n_ei_candidates = 0
         points = tpe.sample_one_dimension(dim2, 2,

From 5536624b60b414c97775ea641262d11460c95cad Mon Sep 17 00:00:00 2001
From: donglinjy <cloudbox0711@163.com>
Date: Sat, 23 May 2020 22:44:43 +0800
Subject: [PATCH 3/6] add loguniform

---
 docs/src/user/algorithms.rst             |  2 +-
 src/orion/algo/space.py                  | 11 +---
 src/orion/algo/tpe.py                    | 63 ++++++++++++++++-----
 src/orion/core/worker/transformer.py     | 11 ++--
 tests/unittests/algo/test_space.py       |  6 +-
 tests/unittests/algo/test_tpe.py         | 70 ++++++++++++++++++++----
 tests/unittests/core/test_transformer.py |  2 +-
 7 files changed, 119 insertions(+), 46 deletions(-)

diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst
index e276e5671..7044aa3ce 100644
--- a/docs/src/user/algorithms.rst
+++ b/docs/src/user/algorithms.rst
@@ -212,7 +212,7 @@ the most promising point among the candidates.
 
 .. note::
 
-   Current implementation only supports uniform, uniform discrete and choices as prior.
+   Current implementation only supports uniform, loguniform, uniform discrete and choices as prior.
    As for choices prior, the probabilities if any given will be ignored.
 
 Configuration
diff --git a/src/orion/algo/space.py b/src/orion/algo/space.py
index 581257c10..1b551e399 100644
--- a/src/orion/algo/space.py
+++ b/src/orion/algo/space.py
@@ -611,15 +611,8 @@ def sample(self, n_samples=1, seed=None):
         return samples
 
     def interval(self, alpha=1.0):
-        """Return a tuple of possible values that this categorical dimension
-        can take.
-
-        .. warning:: This method makes no sense for categorical variables. Use
-           ``self.categories`` instead.
-
-        """
-        raise RuntimeError("Categories have no ``interval`` (as they are not ordered).\n"
-                           "Use ``self.categories`` instead.")
+        """Return a tuple of possible values that this categorical dimension can take."""
+        return self.categories
 
     def __contains__(self, point):
         """Check if constraints hold for this `point` of `Dimension`.
diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
index 3263b36a0..0266008c0 100644
--- a/src/orion/algo/tpe.py
+++ b/src/orion/algo/tpe.py
@@ -11,7 +11,7 @@
 import logging
 
 import numpy
-from scipy.stats import norm
+from scipy.stats import lognorm, norm
 
 from orion.algo.base import BaseAlgorithm
 from orion.core.utils.points import flatten_dims, regroup_dims
@@ -171,7 +171,7 @@ class TPE(BaseAlgorithm):
 
     # pylint:disable=too-many-arguments
     def __init__(self, space, seed=None,
-                 n_initial_points=5, n_ei_candidates=24,
+                 n_initial_points=20, n_ei_candidates=24,
                  gamma=0.25, equal_weight=False,
                  prior_weight=1.0, full_weight_num=25):
 
@@ -185,12 +185,13 @@ def __init__(self, space, seed=None,
                                   full_weight_num=full_weight_num)
 
         for dimension in self.space.values():
+
             if dimension.type not in ['real', 'integer', 'categorical']:
                 raise ValueError("TPE now only supports Real, Integer "
                                  "and Categorical Dimension.")
 
-            if dimension.prior_name not in ['uniform', 'int_uniform', 'choices']:
-                raise ValueError("TPE now only supports uniform, uniform discrete "
+            if dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']:
+                raise ValueError("TPE now only supports uniform, loguniform, uniform discrete "
                                  "and choices as prior.")
 
             shape = dimension.shape
@@ -267,11 +268,10 @@ def suggest(self, num=1):
                 if not shape:
                     shape = (1,)
 
-                if dimension.type == 'real' and dimension.prior_name == 'uniform':
-                    points = self.sample_one_dimension(dimension, shape[0],
-                                                       below_points[idx: idx + shape[0]],
-                                                       above_points[idx: idx + shape[0]],
-                                                       self._sample_real_point)
+                if dimension.type == 'real':
+                    points = self._sample_real_dimension(dimension, shape[0],
+                                                         below_points[idx: idx + shape[0]],
+                                                         above_points[idx: idx + shape[0]])
                 elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform':
                     points = self.sample_one_dimension(dimension, shape[0],
                                                        below_points[idx: idx + shape[0]],
@@ -317,9 +317,32 @@ def sample_one_dimension(self, dimension, shape_size, below_points, above_points
 
         return points
 
-    def _sample_real_point(self, dimension, below_points, above_points):
+    def _sample_real_dimension(self, dimension, shape_size, below_points, above_points):
+        """Sample values for real dimension"""
+        if dimension.prior_name == 'uniform':
+            return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
+                                             self._sample_real_point)
+        elif dimension.prior_name == 'reciprocal':
+            return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
+                                             self._sample_loguniform_real_point)
+        else:
+            raise NotImplementedError()
+
+    def _sample_loguniform_real_point(self, dimension, below_points, above_points):
+        """Sample one value for real dimension in a loguniform way"""
+        return self._sample_real_point(dimension, below_points, above_points, is_log=True)
+
+    def _sample_real_point(self, dimension, below_points, above_points, is_log=False):
         """Sample one value for real dimension based on the observed good and bad points"""
         low, high = dimension.interval()
+        if is_log:
+            below_points = numpy.log(below_points)
+            above_points = numpy.log(above_points)
+
+            # scipy.stats loguniform
+            low = numpy.log(low)
+            high = numpy.log(high)
+
         below_mus, below_sigmas, below_weights = \
             adaptive_parzen_estimator(below_points, low, high, self.prior_weight,
                                       self.equal_weight, flat_num=self.full_weight_num)
@@ -327,8 +350,10 @@ def _sample_real_point(self, dimension, below_points, above_points):
             adaptive_parzen_estimator(above_points, low, high, self.prior_weight,
                                       self.equal_weight, flat_num=self.full_weight_num)
 
-        gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, low, high, below_weights)
-        gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, low, high, above_weights)
+        gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas,
+                                       low, high, below_weights, is_log=is_log)
+        gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas,
+                                       low, high, above_weights, is_log=is_log)
 
         candidate_points = gmm_sampler_below.sample(self.n_ei_candidates)
         if candidate_points:
@@ -364,7 +389,7 @@ def _sample_int_point(self, dimension, below_points, above_points):
 
     def _sample_categorical_point(self, dimension, below_points, above_points):
         """Sample one value for categorical dimension based on the observed good and bad points"""
-        choices = dimension.categories
+        _, choices = dimension.interval()
 
         below_points = [choices.index(point) for point in below_points]
         above_points = [choices.index(point) for point in above_points]
@@ -427,7 +452,7 @@ class GMMSampler():
 
     """
 
-    def __init__(self, tpe, mus, sigmas, low, high, weights=None):
+    def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False):
         self.tpe = tpe
 
         self.mus = mus
@@ -435,6 +460,10 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None):
         self.low = low
         self.high = high
         self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)]
+        self.is_log = is_log
+        if is_log:
+            self.low = numpy.exp(low)
+            self.high = numpy.exp(high)
 
         self.pdfs = []
         self._build_mixture()
@@ -442,7 +471,10 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None):
     def _build_mixture(self):
         """Build the Gaussian components in the GMM"""
         for mu, sigma in zip(self.mus, self.sigmas):
-            self.pdfs.append(norm(mu, sigma))
+            if self.is_log:
+                self.pdfs.append(lognorm(s=sigma, loc=0, scale=numpy.exp(mu)))
+            else:
+                self.pdfs.append(norm(mu, sigma))
 
     def sample(self, num=1):
         """Sample required number of points"""
@@ -463,6 +495,7 @@ def get_loglikelis(self, points):
         weight_likelis = [numpy.log(self.weights[i] * pdf.pdf(points))
                           for i, pdf in enumerate(self.pdfs)]
         weight_likelis = numpy.array(weight_likelis)
+        # (num_weights, num_points) => (num_points, num_weights)
         weight_likelis = weight_likelis.transpose()
 
         # log-sum-exp trick
diff --git a/src/orion/core/worker/transformer.py b/src/orion/core/worker/transformer.py
index b19392abe..75fa7fd48 100644
--- a/src/orion/core/worker/transformer.py
+++ b/src/orion/core/worker/transformer.py
@@ -411,12 +411,11 @@ def sample(self, n_samples=1, seed=None):
 
     def interval(self, alpha=1.0):
         """Map the interval bounds to the transformed ones."""
-        try:
-            low, high = self.original_dimension.interval(alpha)
-        except RuntimeError as exc:
-            if "Categories" in str(exc):
-                return (-0.1, 1.1)
-            raise
+        if self.original_dimension.prior_name == 'choices':
+            return self.original_dimension.categories
+
+        low, high = self.original_dimension.interval(alpha)
+
         return self.transform(low), self.transform(high)
 
     def __contains__(self, point):
diff --git a/tests/unittests/algo/test_space.py b/tests/unittests/algo/test_space.py
index db5316868..d2ef9d604 100644
--- a/tests/unittests/algo/test_space.py
+++ b/tests/unittests/algo/test_space.py
@@ -466,14 +466,12 @@ def test_bad_probabilities(self):
         with pytest.raises(ValueError):
             Categorical('yolo', categories, shape=2)
 
-    def test_interval_is_banned(self):
+    def test_interval(self):
         """Check that calling `Categorical.interval` raises `RuntimeError`."""
         categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4}
         dim = Categorical('yolo', categories, shape=2)
 
-        with pytest.raises(RuntimeError) as exc:
-            dim.interval()
-        assert 'not ordered' in str(exc.value)
+        assert dim.interval() == ('asdfa', 2, 3, 4)
 
     def test_that_objects_types_are_ok(self):
         """Check that output samples are of the correct type.
diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py
index 15cb1ec81..73ee7f5af 100644
--- a/tests/unittests/algo/test_tpe.py
+++ b/tests/unittests/algo/test_tpe.py
@@ -4,7 +4,7 @@
 
 import numpy
 import pytest
-from scipy.stats import norm
+from scipy.stats import lognorm, norm
 
 from orion.algo.space import Categorical, Fidelity, Integer, Real, Space
 from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \
@@ -278,6 +278,11 @@ def test_gmm_sampler_creation(self, tpe):
         assert len(gmm_sampler.weights) == 12
         assert len(gmm_sampler.pdfs) == 12
 
+        gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3, is_log=True)
+
+        assert len(gmm_sampler.weights) == 12
+        assert len(gmm_sampler.pdfs) == 12
+
     def test_sample(self, tpe):
         """Test GMMSampler sample function"""
         mus = numpy.linspace(-3, 3, num=12, endpoint=False)
@@ -306,6 +311,18 @@ def test_sample(self, tpe):
         assert numpy.all(points >= -11)
         assert numpy.all(points < 9)
 
+        # loguniform
+        gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
+        points = gmm_sampler.sample(10000)
+        points = numpy.array(points)
+
+        bins = numpy.array([-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9])
+        hist = numpy.histogram(points, bins=numpy.exp(bins))
+
+        assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort())
+        assert numpy.all(points >= numpy.exp(-11))
+        assert numpy.all(points < numpy.exp(9))
+
     def test_get_loglikelis(self):
         """Test to get log likelis of points"""
         mus = numpy.linspace(-10, 10, num=10, endpoint=False)
@@ -344,6 +361,26 @@ def test_get_loglikelis(self):
         assert point_likeli == gmm_likeli
         assert len(likelis) == len(points)
 
+        # loguniform
+        gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
+
+        log_pdf = []
+        pdfs = []
+        for i in range(10):
+            pdfs.append(lognorm(s=sigmas[i], loc=0, scale=numpy.exp(mus[i])))
+        for pdf, weight in zip(pdfs, weights):
+            log_pdf.append(numpy.log(pdf.pdf(0) * weight))
+        point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf)))
+
+        points = numpy.random.uniform(-11, 9, 30)
+        points = numpy.insert(points, 10, 0)
+        likelis = gmm_sampler.get_loglikelis(points)
+
+        point_likeli = numpy.format_float_scientific(point_likeli, precision=10)
+        gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10)
+        assert point_likeli == gmm_likeli
+        assert len(likelis) == len(points)
+
 
 class TestTPE():
     """Tests for the algo TPE."""
@@ -385,7 +422,8 @@ def test_unsupported_space(self):
         with pytest.raises(ValueError) as ex:
             TPE(space)
 
-        assert 'TPE now only supports uniform, uniform discrete and choices' in str(ex.value)
+        assert 'TPE now only supports uniform, loguniform, uniform discrete and choices' \
+               in str(ex.value)
 
         space = Space()
         dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1))
@@ -521,22 +559,34 @@ def test_sample_real_dimension(self):
         space.register(dim1)
         dim2 = Real('yolo2', 'uniform', -5, 10, shape=(2))
         space.register(dim2)
+        dim3 = Real('yolo3', 'reciprocal', 1, 20)
+        space.register(dim3)
 
         tpe = TPE(space)
         points = numpy.random.uniform(-10, 10, 20)
         below_points = [points[:8]]
         above_points = [points[8:]]
-        points = tpe.sample_one_dimension(dim1, 1,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim1, 1,
+                                            below_points, above_points)
         points = numpy.asarray(points)
         assert len(points) == 1
         assert all(points >= -10)
         assert all(points < 10)
 
+        points = numpy.random.uniform(1, 20, 20)
+        below_points = [points[:8]]
+        above_points = [points[8:]]
+        points = tpe._sample_real_dimension(dim3, 1,
+                                            below_points, above_points)
+        points = numpy.asarray(points)
+        assert len(points) == 1
+        assert all(points >= 1)
+        assert all(points < 20)
+
         below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25)
         above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75)
-        points = tpe.sample_one_dimension(dim1, 1,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim1, 1,
+                                            below_points, above_points)
         points = numpy.asarray(points)
         assert len(points) == 1
         assert all(points >= -10)
@@ -545,16 +595,16 @@ def test_sample_real_dimension(self):
         points = numpy.random.uniform(-5, 5, 32)
         below_points = [points[:8], points[8:16]]
         above_points = [points[16:24], points[24:]]
-        points = tpe.sample_one_dimension(dim2, 2,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim2, 2,
+                                            below_points, above_points)
         points = numpy.asarray(points)
         assert len(points) == 2
         assert all(points >= -10)
         assert all(points < 10)
 
         tpe.n_ei_candidates = 0
-        points = tpe.sample_one_dimension(dim2, 2,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim2, 2,
+                                            below_points, above_points)
         assert len(points) == 0
 
     def test_suggest(self, tpe):
diff --git a/tests/unittests/core/test_transformer.py b/tests/unittests/core/test_transformer.py
index 011973495..0bb2b6955 100644
--- a/tests/unittests/core/test_transformer.py
+++ b/tests/unittests/core/test_transformer.py
@@ -531,7 +531,7 @@ def test_interval(self, tdim):
 
     def test_interval_from_categorical(self, tdim2):
         """Check how we should treat interval when original dimension is categorical."""
-        assert tdim2.interval() == (-0.1, 1.1)
+        assert tdim2.interval() == ('asdfa', '2', '3', '4')
 
     def test_contains(self, tdim):
         """Check method `__contains__`."""

From 1c51f5fb1285e21891948fb90f4779d7b6da143c Mon Sep 17 00:00:00 2001
From: donglinjy <cloudbox0711@163.com>
Date: Sat, 23 May 2020 23:16:15 +0800
Subject: [PATCH 4/6] sync categorical change

---
 src/orion/algo/tpe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
index 0266008c0..db506a965 100644
--- a/src/orion/algo/tpe.py
+++ b/src/orion/algo/tpe.py
@@ -389,7 +389,7 @@ def _sample_int_point(self, dimension, below_points, above_points):
 
     def _sample_categorical_point(self, dimension, below_points, above_points):
         """Sample one value for categorical dimension based on the observed good and bad points"""
-        _, choices = dimension.interval()
+        choices = dimension.interval()
 
         below_points = [choices.index(point) for point in below_points]
         above_points = [choices.index(point) for point in above_points]

From 055d9009d9a337276c1d9b2a25a09378be873347 Mon Sep 17 00:00:00 2001
From: donglinjy <cloudbox0711@163.com>
Date: Wed, 27 May 2020 22:58:35 +0800
Subject: [PATCH 5/6] add fidelity support

---
 src/orion/algo/tpe.py            | 37 +++++++++------------
 tests/unittests/algo/test_tpe.py | 56 +++++++-------------------------
 2 files changed, 26 insertions(+), 67 deletions(-)

diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
index db506a965..4a585c1cc 100644
--- a/src/orion/algo/tpe.py
+++ b/src/orion/algo/tpe.py
@@ -11,7 +11,7 @@
 import logging
 
 import numpy
-from scipy.stats import lognorm, norm
+from scipy.stats import norm
 
 from orion.algo.base import BaseAlgorithm
 from orion.core.utils.points import flatten_dims, regroup_dims
@@ -186,11 +186,8 @@ def __init__(self, space, seed=None,
 
         for dimension in self.space.values():
 
-            if dimension.type not in ['real', 'integer', 'categorical']:
-                raise ValueError("TPE now only supports Real, Integer "
-                                 "and Categorical Dimension.")
-
-            if dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']:
+            if dimension.type != 'fidelity' and \
+                    dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']:
                 raise ValueError("TPE now only supports uniform, loguniform, uniform discrete "
                                  "and choices as prior.")
 
@@ -283,7 +280,8 @@ def suggest(self, num=1):
                                                        above_points[idx: idx + shape[0]],
                                                        self._sample_categorical_point)
                 else:
-                    raise NotImplementedError()
+                    # fidelity dimension
+                    points = dimension.sample(num)
 
                 if len(points) < shape[0]:
                     logger.warning('TPE failed to sample new point with configuration %s',
@@ -336,12 +334,10 @@ def _sample_real_point(self, dimension, below_points, above_points, is_log=False
         """Sample one value for real dimension based on the observed good and bad points"""
         low, high = dimension.interval()
         if is_log:
-            below_points = numpy.log(below_points)
-            above_points = numpy.log(above_points)
-
-            # scipy.stats loguniform
             low = numpy.log(low)
             high = numpy.log(high)
+            below_points = numpy.log(below_points)
+            above_points = numpy.log(above_points)
 
         below_mus, below_sigmas, below_weights = \
             adaptive_parzen_estimator(below_points, low, high, self.prior_weight,
@@ -351,15 +347,19 @@ def _sample_real_point(self, dimension, below_points, above_points, is_log=False
                                       self.equal_weight, flat_num=self.full_weight_num)
 
         gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas,
-                                       low, high, below_weights, is_log=is_log)
+                                       low, high, below_weights)
         gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas,
-                                       low, high, above_weights, is_log=is_log)
+                                       low, high, above_weights)
 
         candidate_points = gmm_sampler_below.sample(self.n_ei_candidates)
         if candidate_points:
             lik_blow = gmm_sampler_below.get_loglikelis(candidate_points)
             lik_above = gmm_sampler_above.get_loglikelis(candidate_points)
             new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above)
+
+            if is_log:
+                new_point = numpy.exp(new_point)
+
             return new_point
 
         return None
@@ -452,7 +452,7 @@ class GMMSampler():
 
     """
 
-    def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False):
+    def __init__(self, tpe, mus, sigmas, low, high, weights=None):
         self.tpe = tpe
 
         self.mus = mus
@@ -460,10 +460,6 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False):
         self.low = low
         self.high = high
         self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)]
-        self.is_log = is_log
-        if is_log:
-            self.low = numpy.exp(low)
-            self.high = numpy.exp(high)
 
         self.pdfs = []
         self._build_mixture()
@@ -471,10 +467,7 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False):
     def _build_mixture(self):
         """Build the Gaussian components in the GMM"""
         for mu, sigma in zip(self.mus, self.sigmas):
-            if self.is_log:
-                self.pdfs.append(lognorm(s=sigma, loc=0, scale=numpy.exp(mu)))
-            else:
-                self.pdfs.append(norm(mu, sigma))
+            self.pdfs.append(norm(mu, sigma))
 
     def sample(self, num=1):
         """Sample required number of points"""
diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py
index 73ee7f5af..3f44adfdd 100644
--- a/tests/unittests/algo/test_tpe.py
+++ b/tests/unittests/algo/test_tpe.py
@@ -4,7 +4,7 @@
 
 import numpy
 import pytest
-from scipy.stats import lognorm, norm
+from scipy.stats import norm
 
 from orion.algo.space import Categorical, Fidelity, Integer, Real, Space
 from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \
@@ -278,11 +278,6 @@ def test_gmm_sampler_creation(self, tpe):
         assert len(gmm_sampler.weights) == 12
         assert len(gmm_sampler.pdfs) == 12
 
-        gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3, is_log=True)
-
-        assert len(gmm_sampler.weights) == 12
-        assert len(gmm_sampler.pdfs) == 12
-
     def test_sample(self, tpe):
         """Test GMMSampler sample function"""
         mus = numpy.linspace(-3, 3, num=12, endpoint=False)
@@ -311,18 +306,6 @@ def test_sample(self, tpe):
         assert numpy.all(points >= -11)
         assert numpy.all(points < 9)
 
-        # loguniform
-        gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
-        points = gmm_sampler.sample(10000)
-        points = numpy.array(points)
-
-        bins = numpy.array([-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9])
-        hist = numpy.histogram(points, bins=numpy.exp(bins))
-
-        assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort())
-        assert numpy.all(points >= numpy.exp(-11))
-        assert numpy.all(points < numpy.exp(9))
-
     def test_get_loglikelis(self):
         """Test to get log likelis of points"""
         mus = numpy.linspace(-10, 10, num=10, endpoint=False)
@@ -361,26 +344,6 @@ def test_get_loglikelis(self):
         assert point_likeli == gmm_likeli
         assert len(likelis) == len(points)
 
-        # loguniform
-        gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
-
-        log_pdf = []
-        pdfs = []
-        for i in range(10):
-            pdfs.append(lognorm(s=sigmas[i], loc=0, scale=numpy.exp(mus[i])))
-        for pdf, weight in zip(pdfs, weights):
-            log_pdf.append(numpy.log(pdf.pdf(0) * weight))
-        point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf)))
-
-        points = numpy.random.uniform(-11, 9, 30)
-        points = numpy.insert(points, 10, 0)
-        likelis = gmm_sampler.get_loglikelis(points)
-
-        point_likeli = numpy.format_float_scientific(point_likeli, precision=10)
-        gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10)
-        assert point_likeli == gmm_likeli
-        assert len(likelis) == len(points)
-
 
 class TestTPE():
     """Tests for the algo TPE."""
@@ -407,13 +370,16 @@ def test_set_state(self, tpe):
     def test_unsupported_space(self):
         """Test tpe only work for supported search space"""
         space = Space()
-        dim = Fidelity('epoch', 1, 9, 3)
-        space.register(dim)
-
-        with pytest.raises(ValueError) as ex:
-            TPE(space)
-
-        assert 'TPE now only supports Real, Integer and Categorical Dimension' in str(ex.value)
+        dim1 = Real('yolo1', 'uniform', -10, 10)
+        space.register(dim1)
+        dim2 = Real('yolo2', 'reciprocal', 10, 20)
+        space.register(dim2)
+        categories = ['a', 0.1, 2, 'c']
+        dim3 = Categorical('yolo3', categories)
+        space.register(dim3)
+        dim4 = Fidelity('epoch', 1, 9, 3)
+        space.register(dim4)
+        TPE(space)
 
         space = Space()
         dim = Real('yolo1', 'norm', 0.9)

From 49718252f970ac0f973a7cc505af51cdae614198 Mon Sep 17 00:00:00 2001
From: donglinjy <cloudbox0711@163.com>
Date: Tue, 9 Jun 2020 10:00:45 +0800
Subject: [PATCH 6/6] fix issues

---
 docs/src/user/api.rst | 2 +-
 src/orion/algo/tpe.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/src/user/api.rst b/docs/src/user/api.rst
index 48c6cd12e..5c433dc15 100644
--- a/docs/src/user/api.rst
+++ b/docs/src/user/api.rst
@@ -78,7 +78,7 @@ you can optimize a function with a single line of code.
    experiment = workon(foo, space=dict(x='uniform(-50,50)'))
 
 
-The experiment object returned is can be used to fetch the database of trials
+The experiment object returned can be used to fetch the database of trials
 and analyze the optimization process. Note that the storage for `workon` is
 in-memory and requires no setup. This means however that :py:func:`orion.client.workon`
 cannot be used for parallel optimisation.
diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
index 4a585c1cc..5a26a1d0b 100644
--- a/src/orion/algo/tpe.py
+++ b/src/orion/algo/tpe.py
@@ -279,9 +279,11 @@ def suggest(self, num=1):
                                                        below_points[idx: idx + shape[0]],
                                                        above_points[idx: idx + shape[0]],
                                                        self._sample_categorical_point)
-                else:
+                elif dimension.type == 'fidelity':
                     # fidelity dimension
                     points = dimension.sample(num)
+                else:
+                    raise NotImplementedError()
 
                 if len(points) < shape[0]:
                     logger.warning('TPE failed to sample new point with configuration %s',