Skip to content

TPE discrete-categorical-loguniform space support #389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 12, 2020
2 changes: 1 addition & 1 deletion docs/src/user/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ the most promising point among the candidates.

.. note::

Current implementation only supports uniform, uniform discrete and choices as prior.
Current implementation only supports uniform, loguniform, uniform discrete and choices as prior.
As for choices prior, the probabilities if any given will be ignored.

Configuration
Expand Down
11 changes: 2 additions & 9 deletions src/orion/algo/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,15 +611,8 @@ def sample(self, n_samples=1, seed=None):
return samples

def interval(self, alpha=1.0):
"""Return a tuple of possible values that this categorical dimension
can take.

.. warning:: This method makes no sense for categorical variables. Use
``self.categories`` instead.

"""
raise RuntimeError("Categories have no ``interval`` (as they are not ordered).\n"
"Use ``self.categories`` instead.")
"""Return a tuple of possible values that this categorical dimension can take."""
return self.categories

def __contains__(self, point):
"""Check if constraints hold for this `point` of `Dimension`.
Expand Down
63 changes: 48 additions & 15 deletions src/orion/algo/tpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import logging

import numpy
from scipy.stats import norm
from scipy.stats import lognorm, norm

from orion.algo.base import BaseAlgorithm
from orion.core.utils.points import flatten_dims, regroup_dims
Expand Down Expand Up @@ -171,7 +171,7 @@ class TPE(BaseAlgorithm):

# pylint:disable=too-many-arguments
def __init__(self, space, seed=None,
n_initial_points=5, n_ei_candidates=24,
n_initial_points=20, n_ei_candidates=24,
gamma=0.25, equal_weight=False,
prior_weight=1.0, full_weight_num=25):

Expand All @@ -185,12 +185,13 @@ def __init__(self, space, seed=None,
full_weight_num=full_weight_num)

for dimension in self.space.values():

if dimension.type not in ['real', 'integer', 'categorical']:
raise ValueError("TPE now only supports Real, Integer "
"and Categorical Dimension.")

if dimension.prior_name not in ['uniform', 'int_uniform', 'choices']:
raise ValueError("TPE now only supports uniform, uniform discrete "
if dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']:
raise ValueError("TPE now only supports uniform, loguniform, uniform discrete "
"and choices as prior.")

shape = dimension.shape
Expand Down Expand Up @@ -267,11 +268,10 @@ def suggest(self, num=1):
if not shape:
shape = (1,)

if dimension.type == 'real' and dimension.prior_name == 'uniform':
points = self.sample_one_dimension(dimension, shape[0],
below_points[idx: idx + shape[0]],
above_points[idx: idx + shape[0]],
self._sample_real_point)
if dimension.type == 'real':
points = self._sample_real_dimension(dimension, shape[0],
below_points[idx: idx + shape[0]],
above_points[idx: idx + shape[0]])
elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform':
points = self.sample_one_dimension(dimension, shape[0],
below_points[idx: idx + shape[0]],
Expand Down Expand Up @@ -317,18 +317,43 @@ def sample_one_dimension(self, dimension, shape_size, below_points, above_points

return points

def _sample_real_point(self, dimension, below_points, above_points):
def _sample_real_dimension(self, dimension, shape_size, below_points, above_points):
"""Sample values for real dimension"""
if dimension.prior_name == 'uniform':
return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
self._sample_real_point)
elif dimension.prior_name == 'reciprocal':
return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
self._sample_loguniform_real_point)
else:
raise NotImplementedError()

def _sample_loguniform_real_point(self, dimension, below_points, above_points):
"""Sample one value for real dimension in a loguniform way"""
return self._sample_real_point(dimension, below_points, above_points, is_log=True)

def _sample_real_point(self, dimension, below_points, above_points, is_log=False):
"""Sample one value for real dimension based on the observed good and bad points"""
low, high = dimension.interval()
if is_log:
below_points = numpy.log(below_points)
above_points = numpy.log(above_points)

# scipy.stats loguniform
low = numpy.log(low)
high = numpy.log(high)

below_mus, below_sigmas, below_weights = \
adaptive_parzen_estimator(below_points, low, high, self.prior_weight,
self.equal_weight, flat_num=self.full_weight_num)
above_mus, above_sigmas, above_weights = \
adaptive_parzen_estimator(above_points, low, high, self.prior_weight,
self.equal_weight, flat_num=self.full_weight_num)

gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, low, high, below_weights)
gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, low, high, above_weights)
gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas,
low, high, below_weights, is_log=is_log)
gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas,
low, high, above_weights, is_log=is_log)

candidate_points = gmm_sampler_below.sample(self.n_ei_candidates)
if candidate_points:
Expand Down Expand Up @@ -364,7 +389,7 @@ def _sample_int_point(self, dimension, below_points, above_points):

def _sample_categorical_point(self, dimension, below_points, above_points):
"""Sample one value for categorical dimension based on the observed good and bad points"""
choices = dimension.categories
_, choices = dimension.interval()

below_points = [choices.index(point) for point in below_points]
above_points = [choices.index(point) for point in above_points]
Expand Down Expand Up @@ -427,22 +452,29 @@ class GMMSampler():

"""

def __init__(self, tpe, mus, sigmas, low, high, weights=None):
def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False):
self.tpe = tpe

self.mus = mus
self.sigmas = sigmas
self.low = low
self.high = high
self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)]
self.is_log = is_log
if is_log:
self.low = numpy.exp(low)
self.high = numpy.exp(high)

self.pdfs = []
self._build_mixture()

def _build_mixture(self):
"""Build the Gaussian components in the GMM"""
for mu, sigma in zip(self.mus, self.sigmas):
self.pdfs.append(norm(mu, sigma))
if self.is_log:
self.pdfs.append(lognorm(s=sigma, loc=0, scale=numpy.exp(mu)))
else:
self.pdfs.append(norm(mu, sigma))

def sample(self, num=1):
"""Sample required number of points"""
Expand All @@ -463,6 +495,7 @@ def get_loglikelis(self, points):
weight_likelis = [numpy.log(self.weights[i] * pdf.pdf(points))
for i, pdf in enumerate(self.pdfs)]
weight_likelis = numpy.array(weight_likelis)
# (num_weights, num_points) => (num_points, num_weights)
weight_likelis = weight_likelis.transpose()

# log-sum-exp trick
Expand Down
11 changes: 5 additions & 6 deletions src/orion/core/worker/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,12 +411,11 @@ def sample(self, n_samples=1, seed=None):

def interval(self, alpha=1.0):
"""Map the interval bounds to the transformed ones."""
try:
low, high = self.original_dimension.interval(alpha)
except RuntimeError as exc:
if "Categories" in str(exc):
return (-0.1, 1.1)
raise
if self.original_dimension.prior_name == 'choices':
return self.original_dimension.categories

low, high = self.original_dimension.interval(alpha)

return self.transform(low), self.transform(high)

def __contains__(self, point):
Expand Down
6 changes: 2 additions & 4 deletions tests/unittests/algo/test_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,14 +466,12 @@ def test_bad_probabilities(self):
with pytest.raises(ValueError):
Categorical('yolo', categories, shape=2)

def test_interval_is_banned(self):
def test_interval(self):
"""Check that calling `Categorical.interval` raises `RuntimeError`."""
categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4}
dim = Categorical('yolo', categories, shape=2)

with pytest.raises(RuntimeError) as exc:
dim.interval()
assert 'not ordered' in str(exc.value)
assert dim.interval() == ('asdfa', 2, 3, 4)

def test_that_objects_types_are_ok(self):
"""Check that output samples are of the correct type.
Expand Down
70 changes: 60 additions & 10 deletions tests/unittests/algo/test_tpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy
import pytest
from scipy.stats import norm
from scipy.stats import lognorm, norm

from orion.algo.space import Categorical, Fidelity, Integer, Real, Space
from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \
Expand Down Expand Up @@ -278,6 +278,11 @@ def test_gmm_sampler_creation(self, tpe):
assert len(gmm_sampler.weights) == 12
assert len(gmm_sampler.pdfs) == 12

gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3, is_log=True)

assert len(gmm_sampler.weights) == 12
assert len(gmm_sampler.pdfs) == 12

def test_sample(self, tpe):
"""Test GMMSampler sample function"""
mus = numpy.linspace(-3, 3, num=12, endpoint=False)
Expand Down Expand Up @@ -306,6 +311,18 @@ def test_sample(self, tpe):
assert numpy.all(points >= -11)
assert numpy.all(points < 9)

# loguniform
gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)
points = gmm_sampler.sample(10000)
points = numpy.array(points)

bins = numpy.array([-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9])
hist = numpy.histogram(points, bins=numpy.exp(bins))

assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort())
assert numpy.all(points >= numpy.exp(-11))
assert numpy.all(points < numpy.exp(9))

def test_get_loglikelis(self):
"""Test to get log likelis of points"""
mus = numpy.linspace(-10, 10, num=10, endpoint=False)
Expand Down Expand Up @@ -344,6 +361,26 @@ def test_get_loglikelis(self):
assert point_likeli == gmm_likeli
assert len(likelis) == len(points)

# loguniform
gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True)

log_pdf = []
pdfs = []
for i in range(10):
pdfs.append(lognorm(s=sigmas[i], loc=0, scale=numpy.exp(mus[i])))
for pdf, weight in zip(pdfs, weights):
log_pdf.append(numpy.log(pdf.pdf(0) * weight))
point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf)))

points = numpy.random.uniform(-11, 9, 30)
points = numpy.insert(points, 10, 0)
likelis = gmm_sampler.get_loglikelis(points)

point_likeli = numpy.format_float_scientific(point_likeli, precision=10)
gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10)
assert point_likeli == gmm_likeli
assert len(likelis) == len(points)


class TestTPE():
"""Tests for the algo TPE."""
Expand Down Expand Up @@ -385,7 +422,8 @@ def test_unsupported_space(self):
with pytest.raises(ValueError) as ex:
TPE(space)

assert 'TPE now only supports uniform, uniform discrete and choices' in str(ex.value)
assert 'TPE now only supports uniform, loguniform, uniform discrete and choices' \
in str(ex.value)

space = Space()
dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1))
Expand Down Expand Up @@ -521,22 +559,34 @@ def test_sample_real_dimension(self):
space.register(dim1)
dim2 = Real('yolo2', 'uniform', -5, 10, shape=(2))
space.register(dim2)
dim3 = Real('yolo3', 'reciprocal', 1, 20)
space.register(dim3)

tpe = TPE(space)
points = numpy.random.uniform(-10, 10, 20)
below_points = [points[:8]]
above_points = [points[8:]]
points = tpe.sample_one_dimension(dim1, 1,
below_points, above_points, tpe._sample_real_point)
points = tpe._sample_real_dimension(dim1, 1,
below_points, above_points)
points = numpy.asarray(points)
assert len(points) == 1
assert all(points >= -10)
assert all(points < 10)

points = numpy.random.uniform(1, 20, 20)
below_points = [points[:8]]
above_points = [points[8:]]
points = tpe._sample_real_dimension(dim3, 1,
below_points, above_points)
points = numpy.asarray(points)
assert len(points) == 1
assert all(points >= 1)
assert all(points < 20)

below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25)
above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75)
points = tpe.sample_one_dimension(dim1, 1,
below_points, above_points, tpe._sample_real_point)
points = tpe._sample_real_dimension(dim1, 1,
below_points, above_points)
points = numpy.asarray(points)
assert len(points) == 1
assert all(points >= -10)
Expand All @@ -545,16 +595,16 @@ def test_sample_real_dimension(self):
points = numpy.random.uniform(-5, 5, 32)
below_points = [points[:8], points[8:16]]
above_points = [points[16:24], points[24:]]
points = tpe.sample_one_dimension(dim2, 2,
below_points, above_points, tpe._sample_real_point)
points = tpe._sample_real_dimension(dim2, 2,
below_points, above_points)
points = numpy.asarray(points)
assert len(points) == 2
assert all(points >= -10)
assert all(points < 10)

tpe.n_ei_candidates = 0
points = tpe.sample_one_dimension(dim2, 2,
below_points, above_points, tpe._sample_real_point)
points = tpe._sample_real_dimension(dim2, 2,
below_points, above_points)
assert len(points) == 0

def test_suggest(self, tpe):
Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/core/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def test_interval(self, tdim):

def test_interval_from_categorical(self, tdim2):
"""Check how we should treat interval when original dimension is categorical."""
assert tdim2.interval() == (-0.1, 1.1)
assert tdim2.interval() == ('asdfa', '2', '3', '4')

def test_contains(self, tdim):
"""Check method `__contains__`."""
Expand Down