Skip to content

TPE discrete-categorical-loguniform space support #389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 12, 2020
4 changes: 4 additions & 0 deletions docs/src/user/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ the most promising point among the candidates.
.. _Tree-structured Parzen Estimator:
https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf

.. note::

Current implementation only supports uniform, loguniform, uniform discrete and choices as prior.
As for choices prior, the probabilities if any given will be ignored.

Configuration
~~~~~~~~~~~~~
Expand Down
11 changes: 2 additions & 9 deletions src/orion/algo/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,15 +611,8 @@ def sample(self, n_samples=1, seed=None):
return samples

def interval(self, alpha=1.0):
"""Return a tuple of possible values that this categorical dimension
can take.

.. warning:: This method makes no sense for categorical variables. Use
``self.categories`` instead.

"""
raise RuntimeError("Categories have no ``interval`` (as they are not ordered).\n"
"Use ``self.categories`` instead.")
"""Return a tuple of possible values that this categorical dimension can take."""
return self.categories

def __contains__(self, point):
"""Check if constraints hold for this `point` of `Dimension`.
Expand Down
205 changes: 173 additions & 32 deletions src/orion/algo/tpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,23 @@ def compute_max_ei_point(points, below_likelis, above_likelis):
return points[point_index]


def ramp_up_weights(total_num, flat_num, equal_weight):
"""Adjust weights of observed trials.

:param total_num: total number of observed trials.
:param flat_num: the number of the most recent trials which
get the full weight where the others will be applied with a linear ramp
from 0 to 1.0. It will only take effect if equal_weight is False.
:param equal_weight: whether all the observed trails share the same weights.
"""
if total_num < flat_num or equal_weight:
return numpy.ones(total_num)

ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num)
flat_weights = numpy.ones(flat_num)
return numpy.concatenate([ramp_weights, flat_weights])


# pylint:disable=assignment-from-no-return
def adaptive_parzen_estimator(mus, low, high,
prior_weight=1.0,
Expand All @@ -56,15 +73,6 @@ def adaptive_parzen_estimator(mus, low, high,
get the full weight where the others will be applied with a linear ramp
from 0 to 1.0. It will only take effect if equal_weight is False.
"""
def update_weights(total_num):
"""Generate weights for all components"""
if total_num < flat_num or equal_weight:
return numpy.ones(total_num)

ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num)
flat_weights = numpy.ones(flat_num)
return numpy.concatenate([ramp_weights, flat_weights])

mus = numpy.asarray(mus)

prior_mu = (low + high) * 0.5
Expand All @@ -76,7 +84,7 @@ def update_weights(total_num):
sorted_mus = mus[order]
prior_mu_pos = numpy.searchsorted(sorted_mus, prior_mu)

weights = update_weights(size)
weights = ramp_up_weights(size, flat_num, equal_weight)

mixture_mus = numpy.zeros(size + 1)
mixture_mus[:prior_mu_pos] = sorted_mus[:prior_mu_pos]
Expand Down Expand Up @@ -178,11 +186,10 @@ def __init__(self, space, seed=None,

for dimension in self.space.values():

if dimension.type not in ['real']:
raise ValueError("TPE now only supports Real Dimension.")

if dimension.prior_name not in ['uniform']:
raise ValueError("TPE now only supports uniform as prior.")
if dimension.type != 'fidelity' and \
dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']:
raise ValueError("TPE now only supports uniform, loguniform, uniform discrete "
"and choices as prior.")

shape = dimension.shape
if shape and len(shape) != 1:
Expand Down Expand Up @@ -245,8 +252,11 @@ def suggest(self, num=1):
else:
point = []
below_points, above_points = self.split_trials()
below_points = numpy.array([flatten_dims(point, self.space) for point in below_points])
above_points = numpy.array([flatten_dims(point, self.space) for point in above_points])

below_points = [flatten_dims(point, self.space) for point in below_points]
above_points = [flatten_dims(point, self.space) for point in above_points]
below_points = list(map(list, zip(*below_points)))
above_points = list(map(list, zip(*above_points)))

idx = 0
for dimension in self.space.values():
Expand All @@ -256,11 +266,22 @@ def suggest(self, num=1):
shape = (1,)

if dimension.type == 'real':
points = self.sample_real_dimension(dimension, shape[0],
below_points[:, idx: idx + shape[0]],
above_points[:, idx: idx + shape[0]])
points = self._sample_real_dimension(dimension, shape[0],
below_points[idx: idx + shape[0]],
above_points[idx: idx + shape[0]])
elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform':
points = self.sample_one_dimension(dimension, shape[0],
below_points[idx: idx + shape[0]],
above_points[idx: idx + shape[0]],
self._sample_int_point)
elif dimension.type == 'categorical' and dimension.prior_name == 'choices':
points = self.sample_one_dimension(dimension, shape[0],
below_points[idx: idx + shape[0]],
above_points[idx: idx + shape[0]],
self._sample_categorical_point)
else:
raise ValueError("TPE now only support Real Dimension.")
# fidelity dimension
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe elif dimension.type == 'fidelity' to be sure to catch unsupported dimensions that would have been missed in __init__

points = dimension.sample(num)

if len(points) < shape[0]:
logger.warning('TPE failed to sample new point with configuration %s',
Expand All @@ -275,45 +296,119 @@ def suggest(self, num=1):

return samples

def sample_real_dimension(self, dimension, shape_size, below_points, above_points):
"""Sample values for a real dimension
# pylint:disable=no-self-use
def sample_one_dimension(self, dimension, shape_size, below_points, above_points, sampler):
"""Sample values for a dimension

:param dimension: Real Dimension.
:param dimension: Dimension.
:param shape_size: 1D Shape Size of the Real Dimension.
:param below_points: good points with shape (m, n), n=shape_size.
:param above_points: bad points with shape (m, n), n=shape_size.
:param below_points: good points with shape (m, n), m=shape_size.
:param above_points: bad points with shape (m, n), m=shape_size.
:param sampler: method to sample one value for upon the dimension.
"""
points = []

for j in range(shape_size):
new_point = self._sample_real_point(dimension, below_points[:, j], above_points[:, j])
if new_point:
new_point = sampler(dimension, below_points[j], above_points[j])
if new_point is not None:
points.append(new_point)

return points

def _sample_real_point(self, dimension, below_points, above_points):
"""Sample one value for a real dimension based on the observed good and bad points"""
def _sample_real_dimension(self, dimension, shape_size, below_points, above_points):
"""Sample values for real dimension"""
if dimension.prior_name == 'uniform':
return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
self._sample_real_point)
elif dimension.prior_name == 'reciprocal':
return self.sample_one_dimension(dimension, shape_size, below_points, above_points,
self._sample_loguniform_real_point)
else:
raise NotImplementedError()

def _sample_loguniform_real_point(self, dimension, below_points, above_points):
"""Sample one value for real dimension in a loguniform way"""
return self._sample_real_point(dimension, below_points, above_points, is_log=True)

def _sample_real_point(self, dimension, below_points, above_points, is_log=False):
"""Sample one value for real dimension based on the observed good and bad points"""
low, high = dimension.interval()
if is_log:
low = numpy.log(low)
high = numpy.log(high)
below_points = numpy.log(below_points)
above_points = numpy.log(above_points)

below_mus, below_sigmas, below_weights = \
adaptive_parzen_estimator(below_points, low, high, self.prior_weight,
self.equal_weight, flat_num=self.full_weight_num)
above_mus, above_sigmas, above_weights = \
adaptive_parzen_estimator(above_points, low, high, self.prior_weight,
self.equal_weight, flat_num=self.full_weight_num)

gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, low, high, below_weights)
gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, low, high, above_weights)
gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas,
low, high, below_weights)
gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas,
low, high, above_weights)

candidate_points = gmm_sampler_below.sample(self.n_ei_candidates)
if candidate_points:
lik_blow = gmm_sampler_below.get_loglikelis(candidate_points)
lik_above = gmm_sampler_above.get_loglikelis(candidate_points)
new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above)

if is_log:
new_point = numpy.exp(new_point)

return new_point

return None

def _sample_int_point(self, dimension, below_points, above_points):
"""Sample one value for integer dimension based on the observed good and bad points"""
low, high = dimension.interval()
choices = range(low, high)

below_points = numpy.array(below_points).astype(int) - low
above_points = numpy.array(above_points).astype(int) - low

sampler_below = CategoricalSampler(self, below_points, choices)
candidate_points = sampler_below.sample(self.n_ei_candidates)

if list(candidate_points):
sampler_above = CategoricalSampler(self, above_points, choices)

lik_below = sampler_below.get_loglikelis(candidate_points)
lik_above = sampler_above.get_loglikelis(candidate_points)

new_point = compute_max_ei_point(candidate_points, lik_below, lik_above)
new_point = new_point + low
return new_point

return None

def _sample_categorical_point(self, dimension, below_points, above_points):
"""Sample one value for categorical dimension based on the observed good and bad points"""
choices = dimension.interval()

below_points = [choices.index(point) for point in below_points]
above_points = [choices.index(point) for point in above_points]

sampler_below = CategoricalSampler(self, below_points, choices)
candidate_points = sampler_below.sample(self.n_ei_candidates)

if list(candidate_points):
sampler_above = CategoricalSampler(self, above_points, choices)

lik_below = sampler_below.get_loglikelis(candidate_points)
lik_above = sampler_above.get_loglikelis(candidate_points)

new_point_index = compute_max_ei_point(candidate_points, lik_below, lik_above)
new_point = choices[new_point_index]

return new_point
return None

def split_trials(self):
"""Split the observed trials into good and bad ones based on the ratio `gamma``"""
sorted_trials = sorted(self._trials_info.values(), key=lambda x: x[1]['objective'])
Expand Down Expand Up @@ -393,6 +488,7 @@ def get_loglikelis(self, points):
weight_likelis = [numpy.log(self.weights[i] * pdf.pdf(points))
for i, pdf in enumerate(self.pdfs)]
weight_likelis = numpy.array(weight_likelis)
# (num_weights, num_points) => (num_points, num_weights)
weight_likelis = weight_likelis.transpose()

# log-sum-exp trick
Expand All @@ -402,3 +498,48 @@ def get_loglikelis(self, points):
axis=1))

return point_likeli


class CategoricalSampler():
"""Categorical Sampler for discrete integer and categorical choices

Parameters
----------
tpe: `TPE` algorithm
The tpe algorithm object which this sampler will be part of.
observations: list
Observed values in the dimension
choices: list
Candidate values for the dimension

"""

def __init__(self, tpe, observations, choices):
self.tpe = tpe
self.obs = observations
self.choices = choices

self._build_multinomial_weights()

def _build_multinomial_weights(self):
"""Build weights for categorical distribution based on observations"""
weights_obs = ramp_up_weights(len(self.obs),
self.tpe.full_weight_num, self.tpe.equal_weight)
counts_obs = numpy.bincount(self.obs, minlength=len(self.choices), weights=weights_obs)
counts_obs = counts_obs + self.tpe.prior_weight
self.weights = counts_obs / counts_obs.sum()

def sample(self, num=1):
"""Sample required number of points"""
samples = self.tpe.rng.multinomial(n=1, pvals=self.weights, size=num)

assert samples.shape == (num,) + (len(self.weights),)

samples_index = samples.argmax(-1)
assert samples_index.shape == (num,)

return samples_index

def get_loglikelis(self, points):
"""Return the log likelihood for the points"""
return numpy.log(numpy.asarray(self.weights)[points])
11 changes: 5 additions & 6 deletions src/orion/core/worker/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,12 +411,11 @@ def sample(self, n_samples=1, seed=None):

def interval(self, alpha=1.0):
"""Map the interval bounds to the transformed ones."""
try:
low, high = self.original_dimension.interval(alpha)
except RuntimeError as exc:
if "Categories" in str(exc):
return (-0.1, 1.1)
raise
if self.original_dimension.prior_name == 'choices':
return self.original_dimension.categories

low, high = self.original_dimension.interval(alpha)

return self.transform(low), self.transform(high)

def __contains__(self, point):
Expand Down
6 changes: 2 additions & 4 deletions tests/functional/algos/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,9 @@ def test_simple(monkeypatch, config_file):

@pytest.mark.usefixtures("clean_db")
@pytest.mark.usefixtures("null_db_instances")
def test_random_stop(monkeypatch):
@pytest.mark.parametrize('config_file', config_files)
def test_random_stop(monkeypatch, config_file):
"""Test a simple usage scenario."""
# TODO: TPE should support this case once discrete is added,
# then parametrized config_file should be used
config_file = 'random_config.yaml'
monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__)))
orion.core.cli.main(["hunt", "--config", config_file,
"./black_box.py", "-x~uniform(-10, 5, discrete=True)"])
Expand Down
6 changes: 2 additions & 4 deletions tests/unittests/algo/test_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,14 +466,12 @@ def test_bad_probabilities(self):
with pytest.raises(ValueError):
Categorical('yolo', categories, shape=2)

def test_interval_is_banned(self):
def test_interval(self):
"""Check that calling `Categorical.interval` raises `RuntimeError`."""
categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4}
dim = Categorical('yolo', categories, shape=2)

with pytest.raises(RuntimeError) as exc:
dim.interval()
assert 'not ordered' in str(exc.value)
assert dim.interval() == ('asdfa', 2, 3, 4)

def test_that_objects_types_are_ok(self):
"""Check that output samples are of the correct type.
Expand Down
Loading