Epistimio · Thomsch · Jun 12, 2020 · May 8, 2020 · May 12, 2020 · May 23, 2020
diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst
@@ -212,7 +212,7 @@ the most promising point among the candidates.
 
 .. note::
 
-   Current implementation only supports uniform, uniform discrete and choices as prior.
+   Current implementation only supports uniform, loguniform, uniform discrete and choices as prior.
    As for choices prior, the probabilities if any given will be ignored.
 
 Configuration

diff --git a/src/orion/algo/space.py b/src/orion/algo/space.py
@@ -611,15 +611,8 @@ def sample(self, n_samples=1, seed=None):
         return samples
 
     def interval(self, alpha=1.0):
-        """Return a tuple of possible values that this categorical dimension
-        can take.
-
-        .. warning:: This method makes no sense for categorical variables. Use
-           ``self.categories`` instead.
-
-        """
-        raise RuntimeError("Categories have no ``interval`` (as they are not ordered).\n"
-                           "Use ``self.categories`` instead.")
+        """Return a tuple of possible values that this categorical dimension can take."""
+        return self.categories
 
     def __contains__(self, point):
         """Check if constraints hold for this `point` of `Dimension`.

diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
@@ -11,7 +11,7 @@
 import logging
 
 import numpy
-from scipy.stats import norm
+from scipy.stats import lognorm, norm
 
 from orion.algo.base import BaseAlgorithm
 from orion.core.utils.points import flatten_dims, regroup_dims
@@ -171,7 +171,7 @@ class TPE(BaseAlgorithm):
 
     # pylint:disable=too-many-arguments
     def __init__(self, space, seed=None,
-                 n_initial_points=5, n_ei_candidates=24,
+                 n_initial_points=20, n_ei_candidates=24,
                  gamma=0.25, equal_weight=False,
                  prior_weight=1.0, full_weight_num=25):
 
@@ -185,12 +185,13 @@ def __init__(self, space, seed=None,
                                   full_weight_num=full_weight_num)
 
         for dimension in self.space.values():
+
             if dimension.type not in ['real', 'integer', 'categorical']:
                 raise ValueError("TPE now only supports Real, Integer "
                                  "and Categorical Dimension.")
 
-            if dimension.prior_name not in ['uniform', 'int_uniform', 'choices']:
-                raise ValueError("TPE now only supports uniform, uniform discrete "
+            if dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']:
+                raise ValueError("TPE now only supports uniform, loguniform, uniform discrete "
                                  "and choices as prior.")
 
             shape = dimension.shape
@@ -267,11 +268,10 @@ def suggest(self, num=1):
                 if not shape:
                     shape = (1,)
 
-                if dimension.type == 'real' and dimension.prior_name == 'uniform':
-                    points = self.sample_one_dimension(dimension, shape[0],
-                                                       below_points[idx: idx + shape[0]],
-                                                       above_points[idx: idx + shape[0]],
-                                                       self._sample_real_point)
+                if dimension.type == 'real':
+                    points = self._sample_real_dimension(dimension, shape[0],
+                                                         below_points[idx: idx + shape[0]],
+                                                         above_points[idx: idx + shape[0]])
                 elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform':
                     points = self.sample_one_dimension(dimension, shape[0],
                                                        below_points[idx: idx + shape[0]],
@@ -317,18 +317,43 @@ def sample_one_dimension(self, dimension, shape_size, below_points, above_points
 
         return points
 
-    def _sample_real_point(self, dimension, below_points, above_points):
+    def _sample_real_dimension(self, dimension, shape_size, below_points, above_points):
+        """Sample values for real dimension"""
+        if dimension.prior_name == 'uniform':
+            return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
+                                             self._sample_real_point)
+        elif dimension.prior_name == 'reciprocal':
+            return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
+                                             self._sample_loguniform_real_point)
+        else:
+            raise NotImplementedError()
+
+    def _sample_loguniform_real_point(self, dimension, below_points, above_points):
+        """Sample one value for real dimension in a loguniform way"""
+        return self._sample_real_point(dimension, below_points, above_points, is_log=True)
+
+    def _sample_real_point(self, dimension, below_points, above_points, is_log=False):
         """Sample one value for real dimension based on the observed good and bad points"""
         low, high = dimension.interval()
+        if is_log:
+            below_points = numpy.log(below_points)
+            above_points = numpy.log(above_points)
+
+            # scipy.stats loguniform
+            low = numpy.log(low)
+            high = numpy.log(high)
+
         below_mus, below_sigmas, below_weights = \
             adaptive_parzen_estimator(below_points, low, high, self.prior_weight,
                                       self.equal_weight, flat_num=self.full_weight_num)
         above_mus, above_sigmas, above_weights = \
             adaptive_parzen_estimator(above_points, low, high, self.prior_weight,
                                       self.equal_weight, flat_num=self.full_weight_num)
 
-        gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, low, high, below_weights)
-        gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, low, high, above_weights)
+        gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas,
+                                       low, high, below_weights, is_log=is_log)
+        gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas,
+                                       low, high, above_weights, is_log=is_log)
 
         candidate_points = gmm_sampler_below.sample(self.n_ei_candidates)
         if candidate_points:
@@ -364,7 +389,7 @@ def _sample_int_point(self, dimension, below_points, above_points):
 
     def _sample_categorical_point(self, dimension, below_points, above_points):
         """Sample one value for categorical dimension based on the observed good and bad points"""
-        choices = dimension.categories
+        _, choices = dimension.interval()
 
         below_points = [choices.index(point) for point in below_points]
         above_points = [choices.index(point) for point in above_points]
@@ -427,22 +452,29 @@ class GMMSampler():
 
     """
 
-    def __init__(self, tpe, mus, sigmas, low, high, weights=None):
+    def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False):
         self.tpe = tpe
 
         self.mus = mus
         self.sigmas = sigmas
         self.low = low
         self.high = high
         self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)]
+        self.is_log = is_log
+        if is_log:
+            self.low = numpy.exp(low)
+            self.high = numpy.exp(high)
 
         self.pdfs = []
         self._build_mixture()
 
     def _build_mixture(self):
         """Build the Gaussian components in the GMM"""
         for mu, sigma in zip(self.mus, self.sigmas):
-            self.pdfs.append(norm(mu, sigma))
+            if self.is_log:
+                self.pdfs.append(lognorm(s=sigma, loc=0, scale=numpy.exp(mu)))
+            else:
+                self.pdfs.append(norm(mu, sigma))
 
     def sample(self, num=1):
         """Sample required number of points"""
@@ -463,6 +495,7 @@ def get_loglikelis(self, points):
         weight_likelis = [numpy.log(self.weights[i] * pdf.pdf(points))
                           for i, pdf in enumerate(self.pdfs)]
         weight_likelis = numpy.array(weight_likelis)
+        # (num_weights, num_points) => (num_points, num_weights)
         weight_likelis = weight_likelis.transpose()
 
         # log-sum-exp trick

diff --git a/src/orion/core/worker/transformer.py b/src/orion/core/worker/transformer.py
@@ -411,12 +411,11 @@ def sample(self, n_samples=1, seed=None):
 
     def interval(self, alpha=1.0):
         """Map the interval bounds to the transformed ones."""
-        try:
-            low, high = self.original_dimension.interval(alpha)
-        except RuntimeError as exc:
-            if "Categories" in str(exc):
-                return (-0.1, 1.1)
-            raise
+        if self.original_dimension.prior_name == 'choices':
+            return self.original_dimension.categories
+
+        low, high = self.original_dimension.interval(alpha)
+
         return self.transform(low), self.transform(high)
 
     def __contains__(self, point):

diff --git a/tests/unittests/algo/test_space.py b/tests/unittests/algo/test_space.py
@@ -466,14 +466,12 @@ def test_bad_probabilities(self):
         with pytest.raises(ValueError):
             Categorical('yolo', categories, shape=2)
 
-    def test_interval_is_banned(self):
+    def test_interval(self):
         """Check that calling `Categorical.interval` raises `RuntimeError`."""
         categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4}
         dim = Categorical('yolo', categories, shape=2)
 
-        with pytest.raises(RuntimeError) as exc:
-            dim.interval()
-        assert 'not ordered' in str(exc.value)
+        assert dim.interval() == ('asdfa', 2, 3, 4)
 
     def test_that_objects_types_are_ok(self):
         """Check that output samples are of the correct type.

diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py
@@ -4,7 +4,7 @@
 
 import numpy
 import pytest
-from scipy.stats import norm
+from scipy.stats import lognorm, norm
 
 from orion.algo.space import Categorical, Fidelity, Integer, Real, Space
 from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \
@@ -278,6 +278,11 @@ def test_gmm_sampler_creation(self, tpe):
         assert len(gmm_sampler.weights) == 12
         assert len(gmm_sampler.pdfs) == 12
 
+        gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3, is_log=True)
+
+        assert len(gmm_sampler.weights) == 12
+        assert len(gmm_sampler.pdfs) == 12
+
     def test_sample(self, tpe):
         """Test GMMSampler sample function"""
         mus = numpy.linspace(-3, 3, num=12, endpoint=False)
@@ -306,6 +311,18 @@ def test_sample(self, tpe):
         assert numpy.all(points >= -11)
         assert numpy.all(points < 9)
 
+        # loguniform
+        gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
+        points = gmm_sampler.sample(10000)
+        points = numpy.array(points)
+
+        bins = numpy.array([-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9])
+        hist = numpy.histogram(points, bins=numpy.exp(bins))
+
+        assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort())
+        assert numpy.all(points >= numpy.exp(-11))
+        assert numpy.all(points < numpy.exp(9))
+
     def test_get_loglikelis(self):
         """Test to get log likelis of points"""
         mus = numpy.linspace(-10, 10, num=10, endpoint=False)
@@ -344,6 +361,26 @@ def test_get_loglikelis(self):
         assert point_likeli == gmm_likeli
         assert len(likelis) == len(points)
 
+        # loguniform
+        gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
+
+        log_pdf = []
+        pdfs = []
+        for i in range(10):
+            pdfs.append(lognorm(s=sigmas[i], loc=0, scale=numpy.exp(mus[i])))
+        for pdf, weight in zip(pdfs, weights):
+            log_pdf.append(numpy.log(pdf.pdf(0) * weight))
+        point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf)))
+
+        points = numpy.random.uniform(-11, 9, 30)
+        points = numpy.insert(points, 10, 0)
+        likelis = gmm_sampler.get_loglikelis(points)
+
+        point_likeli = numpy.format_float_scientific(point_likeli, precision=10)
+        gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10)
+        assert point_likeli == gmm_likeli
+        assert len(likelis) == len(points)
+
 
 class TestTPE():
     """Tests for the algo TPE."""
@@ -385,7 +422,8 @@ def test_unsupported_space(self):
         with pytest.raises(ValueError) as ex:
             TPE(space)
 
-        assert 'TPE now only supports uniform, uniform discrete and choices' in str(ex.value)
+        assert 'TPE now only supports uniform, loguniform, uniform discrete and choices' \
+               in str(ex.value)
 
         space = Space()
         dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1))
@@ -521,22 +559,34 @@ def test_sample_real_dimension(self):
         space.register(dim1)
         dim2 = Real('yolo2', 'uniform', -5, 10, shape=(2))
         space.register(dim2)
+        dim3 = Real('yolo3', 'reciprocal', 1, 20)
+        space.register(dim3)
 
         tpe = TPE(space)
         points = numpy.random.uniform(-10, 10, 20)
         below_points = [points[:8]]
         above_points = [points[8:]]
-        points = tpe.sample_one_dimension(dim1, 1,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim1, 1,
+                                            below_points, above_points)
         points = numpy.asarray(points)
         assert len(points) == 1
         assert all(points >= -10)
         assert all(points < 10)
 
+        points = numpy.random.uniform(1, 20, 20)
+        below_points = [points[:8]]
+        above_points = [points[8:]]
+        points = tpe._sample_real_dimension(dim3, 1,
+                                            below_points, above_points)
+        points = numpy.asarray(points)
+        assert len(points) == 1
+        assert all(points >= 1)
+        assert all(points < 20)
+
         below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25)
         above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75)
-        points = tpe.sample_one_dimension(dim1, 1,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim1, 1,
+                                            below_points, above_points)
         points = numpy.asarray(points)
         assert len(points) == 1
         assert all(points >= -10)
@@ -545,16 +595,16 @@ def test_sample_real_dimension(self):
         points = numpy.random.uniform(-5, 5, 32)
         below_points = [points[:8], points[8:16]]
         above_points = [points[16:24], points[24:]]
-        points = tpe.sample_one_dimension(dim2, 2,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim2, 2,
+                                            below_points, above_points)
         points = numpy.asarray(points)
         assert len(points) == 2
         assert all(points >= -10)
         assert all(points < 10)
 
         tpe.n_ei_candidates = 0
-        points = tpe.sample_one_dimension(dim2, 2,
-                                          below_points, above_points, tpe._sample_real_point)
+        points = tpe._sample_real_dimension(dim2, 2,
+                                            below_points, above_points)
         assert len(points) == 0
 
     def test_suggest(self, tpe):

diff --git a/tests/unittests/core/test_transformer.py b/tests/unittests/core/test_transformer.py
@@ -531,7 +531,7 @@ def test_interval(self, tdim):
 
     def test_interval_from_categorical(self, tdim2):
         """Check how we should treat interval when original dimension is categorical."""
-        assert tdim2.interval() == (-0.1, 1.1)
+        assert tdim2.interval() == ('asdfa', '2', '3', '4')
 
     def test_contains(self, tdim):
         """Check method `__contains__`."""