From dce2c9dab25804b5d36e61a612b6ed5abbdfbf40 Mon Sep 17 00:00:00 2001 From: donglinjy Date: Fri, 8 May 2020 21:41:02 +0800 Subject: [PATCH 1/6] tpe discrete categorical space support --- docs/src/user/algorithms.rst | 4 + src/orion/algo/tpe.py | 173 +++++++++++++++++---- tests/functional/algos/test_algos.py | 6 +- tests/unittests/algo/test_tpe.py | 216 ++++++++++++++++++++++++--- 4 files changed, 345 insertions(+), 54 deletions(-) diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst index 399edcc1e..e276e5671 100644 --- a/docs/src/user/algorithms.rst +++ b/docs/src/user/algorithms.rst @@ -210,6 +210,10 @@ the most promising point among the candidates. .. _Tree-structured Parzen Estimator: https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf +.. note:: + + Current implementation only supports uniform, uniform discrete and choices as prior. + As for choices prior, the probabilities if any given will be ignored. Configuration ~~~~~~~~~~~~~ diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py index d3db72022..a931e5f38 100644 --- a/src/orion/algo/tpe.py +++ b/src/orion/algo/tpe.py @@ -36,6 +36,23 @@ def compute_max_ei_point(points, below_likelis, above_likelis): return points[point_index] +def ramp_up_weights(total_num, flat_num, equal_weight): + """Adjust weights of observed trials. + + :param total_num: total number of observed trials. + :param flat_num: the number of the most recent trials which + get the full weight where the others will be applied with a linear ramp + from 0 to 1.0. It will only take effect if equal_weight is False. + :param equal_weight: whether all the observed trails share the same weights. + """ + if total_num < flat_num or equal_weight: + return numpy.ones(total_num) + + ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num) + flat_weights = numpy.ones(flat_num) + return numpy.concatenate([ramp_weights, flat_weights]) + + # pylint:disable=assignment-from-no-return def adaptive_parzen_estimator(mus, low, high, prior_weight=1.0, @@ -56,15 +73,6 @@ def adaptive_parzen_estimator(mus, low, high, get the full weight where the others will be applied with a linear ramp from 0 to 1.0. It will only take effect if equal_weight is False. """ - def update_weights(total_num): - """Generate weights for all components""" - if total_num < flat_num or equal_weight: - return numpy.ones(total_num) - - ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num) - flat_weights = numpy.ones(flat_num) - return numpy.concatenate([ramp_weights, flat_weights]) - mus = numpy.asarray(mus) prior_mu = (low + high) * 0.5 @@ -76,7 +84,7 @@ def update_weights(total_num): sorted_mus = mus[order] prior_mu_pos = numpy.searchsorted(sorted_mus, prior_mu) - weights = update_weights(size) + weights = ramp_up_weights(size, flat_num, equal_weight) mixture_mus = numpy.zeros(size + 1) mixture_mus[:prior_mu_pos] = sorted_mus[:prior_mu_pos] @@ -163,7 +171,7 @@ class TPE(BaseAlgorithm): # pylint:disable=too-many-arguments def __init__(self, space, seed=None, - n_initial_points=20, n_ei_candidates=24, + n_initial_points=5, n_ei_candidates=24, gamma=0.25, equal_weight=False, prior_weight=1.0, full_weight_num=25): @@ -177,12 +185,13 @@ def __init__(self, space, seed=None, full_weight_num=full_weight_num) for dimension in self.space.values(): + if dimension.type not in ['real', 'integer', 'categorical']: + raise ValueError("TPE now only supports Real, Integer " + "and Categorical Dimension.") - if dimension.type not in ['real']: - raise ValueError("TPE now only supports Real Dimension.") - - if dimension.prior_name not in ['uniform']: - raise ValueError("TPE now only supports uniform as prior.") + if dimension.prior_name not in ['uniform', 'int_uniform', 'choices']: + raise ValueError("TPE now only supports uniform, uniform discrete " + "and choices as prior.") shape = dimension.shape if shape and len(shape) != 1: @@ -245,8 +254,11 @@ def suggest(self, num=1): else: point = [] below_points, above_points = self.split_trials() - below_points = numpy.array([flatten_dims(point, self.space) for point in below_points]) - above_points = numpy.array([flatten_dims(point, self.space) for point in above_points]) + + below_points = [flatten_dims(point, self.space) for point in below_points] + above_points = [flatten_dims(point, self.space) for point in above_points] + below_points = list(map(list, zip(*below_points))) + above_points = list(map(list, zip(*above_points))) idx = 0 for dimension in self.space.values(): @@ -255,12 +267,23 @@ def suggest(self, num=1): if not shape: shape = (1,) - if dimension.type == 'real': - points = self.sample_real_dimension(dimension, shape[0], - below_points[:, idx: idx + shape[0]], - above_points[:, idx: idx + shape[0]]) + if dimension.type == 'real' and dimension.prior_name == 'uniform': + points = self.sample_one_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]], + self._sample_real_point) + elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform': + points = self.sample_one_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]], + self._sample_int_point) + elif dimension.type == 'categorical' and dimension.prior_name == 'choices': + points = self.sample_one_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]], + self._sample_categorical_point) else: - raise ValueError("TPE now only support Real Dimension.") + raise NotImplementedError() if len(points) < shape[0]: logger.warning('TPE failed to sample new point with configuration %s', @@ -275,25 +298,27 @@ def suggest(self, num=1): return samples - def sample_real_dimension(self, dimension, shape_size, below_points, above_points): - """Sample values for a real dimension + # pylint:disable=no-self-use + def sample_one_dimension(self, dimension, shape_size, below_points, above_points, sampler): + """Sample values for a dimension - :param dimension: Real Dimension. + :param dimension: Dimension. :param shape_size: 1D Shape Size of the Real Dimension. :param below_points: good points with shape (m, n), n=shape_size. :param above_points: bad points with shape (m, n), n=shape_size. + :param sampler: method to sample one value for upon the dimension. """ points = [] for j in range(shape_size): - new_point = self._sample_real_point(dimension, below_points[:, j], above_points[:, j]) - if new_point: + new_point = sampler(dimension, below_points[j], above_points[j]) + if new_point is not None: points.append(new_point) return points def _sample_real_point(self, dimension, below_points, above_points): - """Sample one value for a real dimension based on the observed good and bad points""" + """Sample one value for real dimension based on the observed good and bad points""" low, high = dimension.interval() below_mus, below_sigmas, below_weights = \ adaptive_parzen_estimator(below_points, low, high, self.prior_weight, @@ -314,6 +339,51 @@ def _sample_real_point(self, dimension, below_points, above_points): return None + def _sample_int_point(self, dimension, below_points, above_points): + """Sample one value for integer dimension based on the observed good and bad points""" + low, high = dimension.interval() + choices = range(low, high) + + below_points = numpy.array(below_points).astype(int) - low + above_points = numpy.array(above_points).astype(int) - low + + sampler_below = CategoricalSampler(self, below_points, choices) + candidate_points = sampler_below.sample(self.n_ei_candidates) + + if list(candidate_points): + sampler_above = CategoricalSampler(self, above_points, choices) + + lik_blow = sampler_below.get_loglikelis(candidate_points) + lik_above = sampler_above.get_loglikelis(candidate_points) + + new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above) + new_point = new_point + low + return new_point + + return None + + def _sample_categorical_point(self, dimension, below_points, above_points): + """Sample one value for categorical dimension based on the observed good and bad points""" + choices = dimension.categories + + below_points = [choices.index(point) for point in below_points] + above_points = [choices.index(point) for point in above_points] + + sampler_below = CategoricalSampler(self, below_points, choices) + candidate_points = sampler_below.sample(self.n_ei_candidates) + + if list(candidate_points): + sampler_above = CategoricalSampler(self, above_points, choices) + + lik_blow = sampler_below.get_loglikelis(candidate_points) + lik_above = sampler_above.get_loglikelis(candidate_points) + + new_point_index = compute_max_ei_point(candidate_points, lik_blow, lik_above) + new_point = choices[new_point_index] + + return new_point + return None + def split_trials(self): """Split the observed trials into good and bad ones based on the ratio `gamma``""" sorted_trials = sorted(self._trials_info.values(), key=lambda x: x[1]['objective']) @@ -402,3 +472,48 @@ def get_loglikelis(self, points): axis=1)) return point_likeli + + +class CategoricalSampler(): + """Categorical Sampler for discrete integer and categorical choices + + Parameters + ---------- + tpe: `TPE` algorithm + The tpe algorithm object which this sampler will be part of. + observations: list + Observed values in the dimension + choices: list + Candidate values for the dimension + + """ + + def __init__(self, tpe, observations, choices): + self.tpe = tpe + self.obs = observations + self.choices = choices + + self._build_multinomial_weights() + + def _build_multinomial_weights(self): + """Build weights for categorical distribution based on observations""" + weights_obs = ramp_up_weights(len(self.obs), + self.tpe.full_weight_num, self.tpe.equal_weight) + counts_obs = numpy.bincount(self.obs, minlength=len(self.choices), weights=weights_obs) + counts_obs = counts_obs + self.tpe.prior_weight + self.weights = counts_obs / counts_obs.sum() + + def sample(self, num=1): + """Sample required number of points""" + samples = self.tpe.rng.multinomial(n=1, pvals=self.weights, size=num) + + assert samples.shape == (num,) + (len(self.weights),) + + samples_index = samples.argmax(-1) + assert samples_index.shape == (num,) + + return samples_index + + def get_loglikelis(self, points): + """Return the log likelihood for the points""" + return numpy.log(numpy.asarray(self.weights)[points]) diff --git a/tests/functional/algos/test_algos.py b/tests/functional/algos/test_algos.py index b30197a86..8cbdaaf6d 100644 --- a/tests/functional/algos/test_algos.py +++ b/tests/functional/algos/test_algos.py @@ -71,11 +71,9 @@ def test_simple(monkeypatch, config_file): @pytest.mark.usefixtures("clean_db") @pytest.mark.usefixtures("null_db_instances") -def test_random_stop(monkeypatch): +@pytest.mark.parametrize('config_file', config_files) +def test_random_stop(monkeypatch, config_file): """Test a simple usage scenario.""" - # TODO: TPE should support this case once discrete is added, - # then parametrized config_file should be used - config_file = 'random_config.yaml' monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) orion.core.cli.main(["hunt", "--config", config_file, "./black_box.py", "-x~uniform(-10, 5, discrete=True)"]) diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py index 87f5e3788..cfbaac570 100644 --- a/tests/unittests/algo/test_tpe.py +++ b/tests/unittests/algo/test_tpe.py @@ -6,19 +6,26 @@ import pytest from scipy.stats import norm -from orion.algo.space import Integer, Real, Space -from orion.algo.tpe import adaptive_parzen_estimator, compute_max_ei_point, GMMSampler, TPE +from orion.algo.space import Categorical, Fidelity, Integer, Real, Space +from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \ + compute_max_ei_point, GMMSampler, ramp_up_weights, TPE @pytest.fixture() def space(): """Return an optimization space""" space = Space() + dim1 = Real('yolo1', 'uniform', -10, 20) space.register(dim1) - dim2 = Real('yolo2', 'uniform', -5, 10) + + dim2 = Integer('yolo2', 'uniform', -5, 10) space.register(dim2) + categories = ['a', 0.1, 2, 'c'] + dim3 = Categorical('yolo3', categories) + space.register(dim3) + return space @@ -42,6 +49,26 @@ def test_compute_max_ei_point(): assert max_ei_point == points[max_ei_index] +def test_ramp_up_weights(): + """Test TPE adjust observed points correctly""" + weights = ramp_up_weights(25, 15, True) + assert len(weights) == 25 + assert numpy.all(weights == 1.0) + + weights = ramp_up_weights(25, 15, False) + assert len(weights) == 25 + assert numpy.all(weights[:10] == (numpy.linspace(1.0 / 25, 1.0, num=10))) + assert numpy.all(weights[10:] == 1.0) + + weights = ramp_up_weights(10, 15, False) + assert len(weights) == 10 + assert numpy.all(weights == 1.0) + + weights = ramp_up_weights(25, 0, False) + assert len(weights) == 25 + assert numpy.all(weights == (numpy.linspace(1.0 / 25, 1.0, num=25))) + + def test_adaptive_parzen_normal_estimator(): """Test adaptive parzen estimator""" low = -1 @@ -157,6 +184,87 @@ def test_adaptive_parzen_normal_estimator_sigma_clip(): assert numpy.all(sigmas <= 6) and numpy.all(sigmas >= 6 / 100) +class TestCategoricalSampler(): + """Tests for TPE Categorical Sampler""" + + def test_cat_sampler_creation(self, tpe): + """Test CategoricalSampler creation""" + obs = [0, 3, 9] + choices = list(range(-5, 5)) + cat_sampler = CategoricalSampler(tpe, obs, choices) + assert len(cat_sampler.weights) == len(choices) + + obs = [0, 3, 9] + choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + cat_sampler = CategoricalSampler(tpe, obs, choices) + + assert len(cat_sampler.weights) == len(choices) + + tpe.equal_weight = True + tpe.prior_weight = 1.0 + obs = numpy.random.randint(0, 10, 100) + cat_sampler = CategoricalSampler(tpe, obs, choices) + counts_obs = numpy.bincount(obs) + 1.0 + weights = counts_obs / counts_obs.sum() + + assert numpy.all(cat_sampler.weights == weights) + + tpe.equal_weight = False + tpe.prior_weight = 0.5 + tpe.full_weight_num = 30 + obs = numpy.random.randint(0, 10, 100) + + cat_sampler = CategoricalSampler(tpe, obs, choices) + + ramp = numpy.linspace(1.0 / 100, 1.0, num=100 - 30) + full = numpy.ones(30) + ramp_weights = (numpy.concatenate([ramp, full])) + + counts_obs = numpy.bincount(obs, weights=ramp_weights) + 0.5 + weights = counts_obs / counts_obs.sum() + + assert numpy.all(cat_sampler.weights == weights) + + def test_sample(self, tpe): + """Test CategoricalSampler sample function""" + obs = numpy.random.randint(0, 10, 100) + choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + cat_sampler = CategoricalSampler(tpe, obs, choices) + + points = cat_sampler.sample(25) + + assert len(points) == 25 + assert numpy.all(points >= 0) + assert numpy.all(points < 10) + + weights = numpy.linspace(1, 10, num=10) ** 3 + numpy.random.shuffle(weights) + weights = weights / weights.sum() + cat_sampler = CategoricalSampler(tpe, obs, choices) + cat_sampler.weights = weights + + points = cat_sampler.sample(10000) + points = numpy.array(points) + hist = numpy.bincount(points) + + assert numpy.all(hist.argsort() == weights.argsort()) + assert len(points) == 10000 + assert numpy.all(points >= 0) + assert numpy.all(points < 10) + + def test_get_loglikelis(self, tpe): + """Test to get log likelis of points""" + obs = numpy.random.randint(0, 10, 100) + choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + cat_sampler = CategoricalSampler(tpe, obs, choices) + + points = cat_sampler.sample(25) + + likelis = cat_sampler.get_loglikelis(points) + + assert numpy.all(likelis == numpy.log(numpy.asarray(cat_sampler.weights)[points])) + + class TestGMMSampler(): """Tests for TPE GMM Sampler""" @@ -262,13 +370,13 @@ def test_set_state(self, tpe): def test_unsupported_space(self): """Test tpe only work for supported search space""" space = Space() - dim = Integer('yolo1', 'uniform', -2, 4) + dim = Fidelity('epoch', 1, 9, 3) space.register(dim) with pytest.raises(ValueError) as ex: TPE(space) - assert 'TPE now only supports Real Dimension' in str(ex.value) + assert 'TPE now only supports Real, Integer and Categorical Dimension' in str(ex.value) space = Space() dim = Real('yolo1', 'norm', 0.9) @@ -277,7 +385,7 @@ def test_unsupported_space(self): with pytest.raises(ValueError) as ex: TPE(space) - assert 'TPE now only supports uniform as prior' in str(ex.value) + assert 'TPE now only supports uniform, uniform discrete and choices' in str(ex.value) space = Space() dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1)) @@ -316,6 +424,69 @@ def test_split_trials(self, tpe): assert below_points == [[-3.0], [-2.4]] assert len(above_points) == 8 + def test_sample_int_dimension(self): + """Test sample values for a integer dimension""" + space = Space() + dim1 = Integer('yolo1', 'uniform', -10, 20) + space.register(dim1) + + dim2 = Integer('yolo2', 'uniform', -5, 10, shape=(2)) + space.register(dim2) + + tpe = TPE(space) + + obs_points = numpy.random.randint(-10, 10, 100) + below_points = [obs_points[:25]] + above_points = [obs_points[25:]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_int_point) + assert len(points) == 1 + + obs_points = numpy.random.randint(-5, 5, 100) + below_points = [obs_points[:25], obs_points[25:50]] + above_points = [obs_points[50:75], obs_points[75:]] + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_int_point) + assert len(points) == 2 + + tpe.n_ei_candidates = 0 + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_int_point) + assert len(points) == 0 + + def test_sample_categorical_dimension(self): + """Test sample values for a categorical dimension""" + space = Space() + categories = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + dim1 = Categorical('yolo1', categories) + space.register(dim1) + dim2 = Categorical('yolo2', categories, shape=(2)) + space.register(dim2) + + tpe = TPE(space) + + obs_points = numpy.random.randint(0, 10, 100) + obs_points = [categories[point] for point in obs_points] + below_points = [obs_points[:25]] + above_points = [obs_points[25:]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 1 + + obs_points = numpy.random.randint(0, 10, 100) + obs_points = [categories[point] for point in obs_points] + below_points = [obs_points[:25], obs_points[25:50]] + above_points = [obs_points[50:75], obs_points[75:]] + + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 2 + + tpe.n_ei_candidates = 0 + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 0 + def test_sample_real_dimension(self): """Test sample values for a real dimension""" space = Space() @@ -325,20 +496,23 @@ def test_sample_real_dimension(self): space.register(dim2) tpe = TPE(space) - points = numpy.random.uniform(-10, 10, 20).reshape(20, 1) - below_points = points[:6, :] - above_points = points[6:, :] - points = tpe.sample_real_dimension(dim1, 1, below_points, above_points) + points = numpy.random.uniform(-10, 10, 20) + below_points = [points[:8]] + above_points = [points[8:]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_real_point) assert len(points) == 1 - points = numpy.random.uniform(-5, 5, 32).reshape(16, 2) - below_points = points[:4, :] - above_points = points[4:, :] - points = tpe.sample_real_dimension(dim2, 2, below_points, above_points) + points = numpy.random.uniform(-5, 5, 32) + below_points = [points[:8], points[8:16]] + above_points = [points[16:24], points[24:]] + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_real_point) assert len(points) == 2 tpe.n_ei_candidates = 0 - points = tpe.sample_real_dimension(dim2, 2, below_points, above_points) + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_real_point) assert len(points) == 0 def test_suggest(self, tpe): @@ -348,13 +522,13 @@ def test_suggest(self, tpe): for i in range(10): point = tpe.suggest(1) assert len(point) == 1 - assert len(point[0]) == 2 + assert len(point[0]) == 3 assert not isinstance(point[0][0], tuple) tpe.observe(point, [{'objective': results[i]}]) point = tpe.suggest(1) assert len(point) == 1 - assert len(point[0]) == 2 + assert len(point[0]) == 3 assert not isinstance(point[0][0], tuple) def test_1d_shape(self, tpe): @@ -383,7 +557,7 @@ def test_1d_shape(self, tpe): def test_suggest_initial_points(self, tpe, monkeypatch): """Test that initial points can be sampled correctly""" - points = [(i, i**2) for i in range(1, 12)] + points = [(i, i - 6, 'c') for i in range(1, 12)] global index index = 0 @@ -400,11 +574,11 @@ def sample(num=1, seed=None): results = numpy.random.random(10) for i in range(1, 11): point = tpe.suggest(1)[0] - assert point == (i, i**2) + assert point == (i, i - 6, 'c') tpe.observe([point], [{'objective': results[i - 1]}]) point = tpe.suggest(1)[0] - assert point != (11, 11 * 2) + assert point != (11, 5, 'c') def test_suggest_ei_candidates(self, tpe): """Test suggest with no shape dimensions""" @@ -415,7 +589,7 @@ def test_suggest_ei_candidates(self, tpe): for i in range(2): point = tpe.suggest(1) assert len(point) == 1 - assert len(point[0]) == 2 + assert len(point[0]) == 3 assert not isinstance(point[0][0], tuple) tpe.observe(point, [{'objective': results[i]}]) From 7bc68088ab1f9fb49bf580ab203691ba1213a54d Mon Sep 17 00:00:00 2001 From: donglinjy Date: Tue, 12 May 2020 17:29:45 +0800 Subject: [PATCH 2/6] refine some code --- src/orion/algo/tpe.py | 12 ++++----- tests/unittests/algo/test_tpe.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py index a931e5f38..3263b36a0 100644 --- a/src/orion/algo/tpe.py +++ b/src/orion/algo/tpe.py @@ -304,8 +304,8 @@ def sample_one_dimension(self, dimension, shape_size, below_points, above_points :param dimension: Dimension. :param shape_size: 1D Shape Size of the Real Dimension. - :param below_points: good points with shape (m, n), n=shape_size. - :param above_points: bad points with shape (m, n), n=shape_size. + :param below_points: good points with shape (m, n), m=shape_size. + :param above_points: bad points with shape (m, n), m=shape_size. :param sampler: method to sample one value for upon the dimension. """ points = [] @@ -353,10 +353,10 @@ def _sample_int_point(self, dimension, below_points, above_points): if list(candidate_points): sampler_above = CategoricalSampler(self, above_points, choices) - lik_blow = sampler_below.get_loglikelis(candidate_points) + lik_below = sampler_below.get_loglikelis(candidate_points) lik_above = sampler_above.get_loglikelis(candidate_points) - new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above) + new_point = compute_max_ei_point(candidate_points, lik_below, lik_above) new_point = new_point + low return new_point @@ -375,10 +375,10 @@ def _sample_categorical_point(self, dimension, below_points, above_points): if list(candidate_points): sampler_above = CategoricalSampler(self, above_points, choices) - lik_blow = sampler_below.get_loglikelis(candidate_points) + lik_below = sampler_below.get_loglikelis(candidate_points) lik_above = sampler_above.get_loglikelis(candidate_points) - new_point_index = compute_max_ei_point(candidate_points, lik_blow, lik_above) + new_point_index = compute_max_ei_point(candidate_points, lik_below, lik_above) new_point = choices[new_point_index] return new_point diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py index cfbaac570..15cb1ec81 100644 --- a/tests/unittests/algo/test_tpe.py +++ b/tests/unittests/algo/test_tpe.py @@ -440,14 +440,29 @@ def test_sample_int_dimension(self): above_points = [obs_points[25:]] points = tpe.sample_one_dimension(dim1, 1, below_points, above_points, tpe._sample_int_point) + points = numpy.asarray(points) assert len(points) == 1 + assert all(points >= -10) + assert all(points < 10) + + obs_points_below = numpy.random.randint(-10, 0, 25).reshape(1, 25) + obs_points_above = numpy.random.randint(0, 10, 75).reshape(1, 75) + points = tpe.sample_one_dimension(dim1, 1, + obs_points_below, obs_points_above, tpe._sample_int_point) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= -10) + assert all(points < 0) obs_points = numpy.random.randint(-5, 5, 100) below_points = [obs_points[:25], obs_points[25:50]] above_points = [obs_points[50:75], obs_points[75:]] points = tpe.sample_one_dimension(dim2, 2, below_points, above_points, tpe._sample_int_point) + points = numpy.asarray(points) assert len(points) == 2 + assert all(points >= -10) + assert all(points < 10) tpe.n_ei_candidates = 0 points = tpe.sample_one_dimension(dim2, 2, @@ -472,6 +487,16 @@ def test_sample_categorical_dimension(self): points = tpe.sample_one_dimension(dim1, 1, below_points, above_points, tpe._sample_categorical_point) assert len(points) == 1 + assert points[0] in categories + + obs_points_below = numpy.random.randint(0, 3, 25) + obs_points_above = numpy.random.randint(3, 10, 75) + below_points = [[categories[point] for point in obs_points_below]] + above_points = [[categories[point] for point in obs_points_above]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 1 + assert points[0] in categories[:3] obs_points = numpy.random.randint(0, 10, 100) obs_points = [categories[point] for point in obs_points] @@ -481,6 +506,8 @@ def test_sample_categorical_dimension(self): points = tpe.sample_one_dimension(dim2, 2, below_points, above_points, tpe._sample_categorical_point) assert len(points) == 2 + assert points[0] in categories + assert points[1] in categories tpe.n_ei_candidates = 0 points = tpe.sample_one_dimension(dim2, 2, @@ -501,14 +528,29 @@ def test_sample_real_dimension(self): above_points = [points[8:]] points = tpe.sample_one_dimension(dim1, 1, below_points, above_points, tpe._sample_real_point) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= -10) + assert all(points < 10) + + below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25) + above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75) + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_real_point) + points = numpy.asarray(points) assert len(points) == 1 + assert all(points >= -10) + assert all(points < 0) points = numpy.random.uniform(-5, 5, 32) below_points = [points[:8], points[8:16]] above_points = [points[16:24], points[24:]] points = tpe.sample_one_dimension(dim2, 2, below_points, above_points, tpe._sample_real_point) + points = numpy.asarray(points) assert len(points) == 2 + assert all(points >= -10) + assert all(points < 10) tpe.n_ei_candidates = 0 points = tpe.sample_one_dimension(dim2, 2, From 5536624b60b414c97775ea641262d11460c95cad Mon Sep 17 00:00:00 2001 From: donglinjy Date: Sat, 23 May 2020 22:44:43 +0800 Subject: [PATCH 3/6] add loguniform --- docs/src/user/algorithms.rst | 2 +- src/orion/algo/space.py | 11 +--- src/orion/algo/tpe.py | 63 ++++++++++++++++----- src/orion/core/worker/transformer.py | 11 ++-- tests/unittests/algo/test_space.py | 6 +- tests/unittests/algo/test_tpe.py | 70 ++++++++++++++++++++---- tests/unittests/core/test_transformer.py | 2 +- 7 files changed, 119 insertions(+), 46 deletions(-) diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst index e276e5671..7044aa3ce 100644 --- a/docs/src/user/algorithms.rst +++ b/docs/src/user/algorithms.rst @@ -212,7 +212,7 @@ the most promising point among the candidates. .. note:: - Current implementation only supports uniform, uniform discrete and choices as prior. + Current implementation only supports uniform, loguniform, uniform discrete and choices as prior. As for choices prior, the probabilities if any given will be ignored. Configuration diff --git a/src/orion/algo/space.py b/src/orion/algo/space.py index 581257c10..1b551e399 100644 --- a/src/orion/algo/space.py +++ b/src/orion/algo/space.py @@ -611,15 +611,8 @@ def sample(self, n_samples=1, seed=None): return samples def interval(self, alpha=1.0): - """Return a tuple of possible values that this categorical dimension - can take. - - .. warning:: This method makes no sense for categorical variables. Use - ``self.categories`` instead. - - """ - raise RuntimeError("Categories have no ``interval`` (as they are not ordered).\n" - "Use ``self.categories`` instead.") + """Return a tuple of possible values that this categorical dimension can take.""" + return self.categories def __contains__(self, point): """Check if constraints hold for this `point` of `Dimension`. diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py index 3263b36a0..0266008c0 100644 --- a/src/orion/algo/tpe.py +++ b/src/orion/algo/tpe.py @@ -11,7 +11,7 @@ import logging import numpy -from scipy.stats import norm +from scipy.stats import lognorm, norm from orion.algo.base import BaseAlgorithm from orion.core.utils.points import flatten_dims, regroup_dims @@ -171,7 +171,7 @@ class TPE(BaseAlgorithm): # pylint:disable=too-many-arguments def __init__(self, space, seed=None, - n_initial_points=5, n_ei_candidates=24, + n_initial_points=20, n_ei_candidates=24, gamma=0.25, equal_weight=False, prior_weight=1.0, full_weight_num=25): @@ -185,12 +185,13 @@ def __init__(self, space, seed=None, full_weight_num=full_weight_num) for dimension in self.space.values(): + if dimension.type not in ['real', 'integer', 'categorical']: raise ValueError("TPE now only supports Real, Integer " "and Categorical Dimension.") - if dimension.prior_name not in ['uniform', 'int_uniform', 'choices']: - raise ValueError("TPE now only supports uniform, uniform discrete " + if dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']: + raise ValueError("TPE now only supports uniform, loguniform, uniform discrete " "and choices as prior.") shape = dimension.shape @@ -267,11 +268,10 @@ def suggest(self, num=1): if not shape: shape = (1,) - if dimension.type == 'real' and dimension.prior_name == 'uniform': - points = self.sample_one_dimension(dimension, shape[0], - below_points[idx: idx + shape[0]], - above_points[idx: idx + shape[0]], - self._sample_real_point) + if dimension.type == 'real': + points = self._sample_real_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]]) elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform': points = self.sample_one_dimension(dimension, shape[0], below_points[idx: idx + shape[0]], @@ -317,9 +317,32 @@ def sample_one_dimension(self, dimension, shape_size, below_points, above_points return points - def _sample_real_point(self, dimension, below_points, above_points): + def _sample_real_dimension(self, dimension, shape_size, below_points, above_points): + """Sample values for real dimension""" + if dimension.prior_name == 'uniform': + return self.sample_one_dimension(dimension, shape_size, below_points, above_points, + self._sample_real_point) + elif dimension.prior_name == 'reciprocal': + return self.sample_one_dimension(dimension, shape_size, below_points, above_points, + self._sample_loguniform_real_point) + else: + raise NotImplementedError() + + def _sample_loguniform_real_point(self, dimension, below_points, above_points): + """Sample one value for real dimension in a loguniform way""" + return self._sample_real_point(dimension, below_points, above_points, is_log=True) + + def _sample_real_point(self, dimension, below_points, above_points, is_log=False): """Sample one value for real dimension based on the observed good and bad points""" low, high = dimension.interval() + if is_log: + below_points = numpy.log(below_points) + above_points = numpy.log(above_points) + + # scipy.stats loguniform + low = numpy.log(low) + high = numpy.log(high) + below_mus, below_sigmas, below_weights = \ adaptive_parzen_estimator(below_points, low, high, self.prior_weight, self.equal_weight, flat_num=self.full_weight_num) @@ -327,8 +350,10 @@ def _sample_real_point(self, dimension, below_points, above_points): adaptive_parzen_estimator(above_points, low, high, self.prior_weight, self.equal_weight, flat_num=self.full_weight_num) - gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, low, high, below_weights) - gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, low, high, above_weights) + gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, + low, high, below_weights, is_log=is_log) + gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, + low, high, above_weights, is_log=is_log) candidate_points = gmm_sampler_below.sample(self.n_ei_candidates) if candidate_points: @@ -364,7 +389,7 @@ def _sample_int_point(self, dimension, below_points, above_points): def _sample_categorical_point(self, dimension, below_points, above_points): """Sample one value for categorical dimension based on the observed good and bad points""" - choices = dimension.categories + _, choices = dimension.interval() below_points = [choices.index(point) for point in below_points] above_points = [choices.index(point) for point in above_points] @@ -427,7 +452,7 @@ class GMMSampler(): """ - def __init__(self, tpe, mus, sigmas, low, high, weights=None): + def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False): self.tpe = tpe self.mus = mus @@ -435,6 +460,10 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None): self.low = low self.high = high self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)] + self.is_log = is_log + if is_log: + self.low = numpy.exp(low) + self.high = numpy.exp(high) self.pdfs = [] self._build_mixture() @@ -442,7 +471,10 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None): def _build_mixture(self): """Build the Gaussian components in the GMM""" for mu, sigma in zip(self.mus, self.sigmas): - self.pdfs.append(norm(mu, sigma)) + if self.is_log: + self.pdfs.append(lognorm(s=sigma, loc=0, scale=numpy.exp(mu))) + else: + self.pdfs.append(norm(mu, sigma)) def sample(self, num=1): """Sample required number of points""" @@ -463,6 +495,7 @@ def get_loglikelis(self, points): weight_likelis = [numpy.log(self.weights[i] * pdf.pdf(points)) for i, pdf in enumerate(self.pdfs)] weight_likelis = numpy.array(weight_likelis) + # (num_weights, num_points) => (num_points, num_weights) weight_likelis = weight_likelis.transpose() # log-sum-exp trick diff --git a/src/orion/core/worker/transformer.py b/src/orion/core/worker/transformer.py index b19392abe..75fa7fd48 100644 --- a/src/orion/core/worker/transformer.py +++ b/src/orion/core/worker/transformer.py @@ -411,12 +411,11 @@ def sample(self, n_samples=1, seed=None): def interval(self, alpha=1.0): """Map the interval bounds to the transformed ones.""" - try: - low, high = self.original_dimension.interval(alpha) - except RuntimeError as exc: - if "Categories" in str(exc): - return (-0.1, 1.1) - raise + if self.original_dimension.prior_name == 'choices': + return self.original_dimension.categories + + low, high = self.original_dimension.interval(alpha) + return self.transform(low), self.transform(high) def __contains__(self, point): diff --git a/tests/unittests/algo/test_space.py b/tests/unittests/algo/test_space.py index db5316868..d2ef9d604 100644 --- a/tests/unittests/algo/test_space.py +++ b/tests/unittests/algo/test_space.py @@ -466,14 +466,12 @@ def test_bad_probabilities(self): with pytest.raises(ValueError): Categorical('yolo', categories, shape=2) - def test_interval_is_banned(self): + def test_interval(self): """Check that calling `Categorical.interval` raises `RuntimeError`.""" categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4} dim = Categorical('yolo', categories, shape=2) - with pytest.raises(RuntimeError) as exc: - dim.interval() - assert 'not ordered' in str(exc.value) + assert dim.interval() == ('asdfa', 2, 3, 4) def test_that_objects_types_are_ok(self): """Check that output samples are of the correct type. diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py index 15cb1ec81..73ee7f5af 100644 --- a/tests/unittests/algo/test_tpe.py +++ b/tests/unittests/algo/test_tpe.py @@ -4,7 +4,7 @@ import numpy import pytest -from scipy.stats import norm +from scipy.stats import lognorm, norm from orion.algo.space import Categorical, Fidelity, Integer, Real, Space from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \ @@ -278,6 +278,11 @@ def test_gmm_sampler_creation(self, tpe): assert len(gmm_sampler.weights) == 12 assert len(gmm_sampler.pdfs) == 12 + gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3, is_log=True) + + assert len(gmm_sampler.weights) == 12 + assert len(gmm_sampler.pdfs) == 12 + def test_sample(self, tpe): """Test GMMSampler sample function""" mus = numpy.linspace(-3, 3, num=12, endpoint=False) @@ -306,6 +311,18 @@ def test_sample(self, tpe): assert numpy.all(points >= -11) assert numpy.all(points < 9) + # loguniform + gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True) + points = gmm_sampler.sample(10000) + points = numpy.array(points) + + bins = numpy.array([-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9]) + hist = numpy.histogram(points, bins=numpy.exp(bins)) + + assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort()) + assert numpy.all(points >= numpy.exp(-11)) + assert numpy.all(points < numpy.exp(9)) + def test_get_loglikelis(self): """Test to get log likelis of points""" mus = numpy.linspace(-10, 10, num=10, endpoint=False) @@ -344,6 +361,26 @@ def test_get_loglikelis(self): assert point_likeli == gmm_likeli assert len(likelis) == len(points) + # loguniform + gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True) + + log_pdf = [] + pdfs = [] + for i in range(10): + pdfs.append(lognorm(s=sigmas[i], loc=0, scale=numpy.exp(mus[i]))) + for pdf, weight in zip(pdfs, weights): + log_pdf.append(numpy.log(pdf.pdf(0) * weight)) + point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf))) + + points = numpy.random.uniform(-11, 9, 30) + points = numpy.insert(points, 10, 0) + likelis = gmm_sampler.get_loglikelis(points) + + point_likeli = numpy.format_float_scientific(point_likeli, precision=10) + gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10) + assert point_likeli == gmm_likeli + assert len(likelis) == len(points) + class TestTPE(): """Tests for the algo TPE.""" @@ -385,7 +422,8 @@ def test_unsupported_space(self): with pytest.raises(ValueError) as ex: TPE(space) - assert 'TPE now only supports uniform, uniform discrete and choices' in str(ex.value) + assert 'TPE now only supports uniform, loguniform, uniform discrete and choices' \ + in str(ex.value) space = Space() dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1)) @@ -521,22 +559,34 @@ def test_sample_real_dimension(self): space.register(dim1) dim2 = Real('yolo2', 'uniform', -5, 10, shape=(2)) space.register(dim2) + dim3 = Real('yolo3', 'reciprocal', 1, 20) + space.register(dim3) tpe = TPE(space) points = numpy.random.uniform(-10, 10, 20) below_points = [points[:8]] above_points = [points[8:]] - points = tpe.sample_one_dimension(dim1, 1, - below_points, above_points, tpe._sample_real_point) + points = tpe._sample_real_dimension(dim1, 1, + below_points, above_points) points = numpy.asarray(points) assert len(points) == 1 assert all(points >= -10) assert all(points < 10) + points = numpy.random.uniform(1, 20, 20) + below_points = [points[:8]] + above_points = [points[8:]] + points = tpe._sample_real_dimension(dim3, 1, + below_points, above_points) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= 1) + assert all(points < 20) + below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25) above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75) - points = tpe.sample_one_dimension(dim1, 1, - below_points, above_points, tpe._sample_real_point) + points = tpe._sample_real_dimension(dim1, 1, + below_points, above_points) points = numpy.asarray(points) assert len(points) == 1 assert all(points >= -10) @@ -545,16 +595,16 @@ def test_sample_real_dimension(self): points = numpy.random.uniform(-5, 5, 32) below_points = [points[:8], points[8:16]] above_points = [points[16:24], points[24:]] - points = tpe.sample_one_dimension(dim2, 2, - below_points, above_points, tpe._sample_real_point) + points = tpe._sample_real_dimension(dim2, 2, + below_points, above_points) points = numpy.asarray(points) assert len(points) == 2 assert all(points >= -10) assert all(points < 10) tpe.n_ei_candidates = 0 - points = tpe.sample_one_dimension(dim2, 2, - below_points, above_points, tpe._sample_real_point) + points = tpe._sample_real_dimension(dim2, 2, + below_points, above_points) assert len(points) == 0 def test_suggest(self, tpe): diff --git a/tests/unittests/core/test_transformer.py b/tests/unittests/core/test_transformer.py index 011973495..0bb2b6955 100644 --- a/tests/unittests/core/test_transformer.py +++ b/tests/unittests/core/test_transformer.py @@ -531,7 +531,7 @@ def test_interval(self, tdim): def test_interval_from_categorical(self, tdim2): """Check how we should treat interval when original dimension is categorical.""" - assert tdim2.interval() == (-0.1, 1.1) + assert tdim2.interval() == ('asdfa', '2', '3', '4') def test_contains(self, tdim): """Check method `__contains__`.""" From 1c51f5fb1285e21891948fb90f4779d7b6da143c Mon Sep 17 00:00:00 2001 From: donglinjy Date: Sat, 23 May 2020 23:16:15 +0800 Subject: [PATCH 4/6] sync categorical change --- src/orion/algo/tpe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py index 0266008c0..db506a965 100644 --- a/src/orion/algo/tpe.py +++ b/src/orion/algo/tpe.py @@ -389,7 +389,7 @@ def _sample_int_point(self, dimension, below_points, above_points): def _sample_categorical_point(self, dimension, below_points, above_points): """Sample one value for categorical dimension based on the observed good and bad points""" - _, choices = dimension.interval() + choices = dimension.interval() below_points = [choices.index(point) for point in below_points] above_points = [choices.index(point) for point in above_points] From 055d9009d9a337276c1d9b2a25a09378be873347 Mon Sep 17 00:00:00 2001 From: donglinjy Date: Wed, 27 May 2020 22:58:35 +0800 Subject: [PATCH 5/6] add fidelity support --- src/orion/algo/tpe.py | 37 +++++++++------------ tests/unittests/algo/test_tpe.py | 56 +++++++------------------------- 2 files changed, 26 insertions(+), 67 deletions(-) diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py index db506a965..4a585c1cc 100644 --- a/src/orion/algo/tpe.py +++ b/src/orion/algo/tpe.py @@ -11,7 +11,7 @@ import logging import numpy -from scipy.stats import lognorm, norm +from scipy.stats import norm from orion.algo.base import BaseAlgorithm from orion.core.utils.points import flatten_dims, regroup_dims @@ -186,11 +186,8 @@ def __init__(self, space, seed=None, for dimension in self.space.values(): - if dimension.type not in ['real', 'integer', 'categorical']: - raise ValueError("TPE now only supports Real, Integer " - "and Categorical Dimension.") - - if dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']: + if dimension.type != 'fidelity' and \ + dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']: raise ValueError("TPE now only supports uniform, loguniform, uniform discrete " "and choices as prior.") @@ -283,7 +280,8 @@ def suggest(self, num=1): above_points[idx: idx + shape[0]], self._sample_categorical_point) else: - raise NotImplementedError() + # fidelity dimension + points = dimension.sample(num) if len(points) < shape[0]: logger.warning('TPE failed to sample new point with configuration %s', @@ -336,12 +334,10 @@ def _sample_real_point(self, dimension, below_points, above_points, is_log=False """Sample one value for real dimension based on the observed good and bad points""" low, high = dimension.interval() if is_log: - below_points = numpy.log(below_points) - above_points = numpy.log(above_points) - - # scipy.stats loguniform low = numpy.log(low) high = numpy.log(high) + below_points = numpy.log(below_points) + above_points = numpy.log(above_points) below_mus, below_sigmas, below_weights = \ adaptive_parzen_estimator(below_points, low, high, self.prior_weight, @@ -351,15 +347,19 @@ def _sample_real_point(self, dimension, below_points, above_points, is_log=False self.equal_weight, flat_num=self.full_weight_num) gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, - low, high, below_weights, is_log=is_log) + low, high, below_weights) gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, - low, high, above_weights, is_log=is_log) + low, high, above_weights) candidate_points = gmm_sampler_below.sample(self.n_ei_candidates) if candidate_points: lik_blow = gmm_sampler_below.get_loglikelis(candidate_points) lik_above = gmm_sampler_above.get_loglikelis(candidate_points) new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above) + + if is_log: + new_point = numpy.exp(new_point) + return new_point return None @@ -452,7 +452,7 @@ class GMMSampler(): """ - def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False): + def __init__(self, tpe, mus, sigmas, low, high, weights=None): self.tpe = tpe self.mus = mus @@ -460,10 +460,6 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False): self.low = low self.high = high self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)] - self.is_log = is_log - if is_log: - self.low = numpy.exp(low) - self.high = numpy.exp(high) self.pdfs = [] self._build_mixture() @@ -471,10 +467,7 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None, is_log=False): def _build_mixture(self): """Build the Gaussian components in the GMM""" for mu, sigma in zip(self.mus, self.sigmas): - if self.is_log: - self.pdfs.append(lognorm(s=sigma, loc=0, scale=numpy.exp(mu))) - else: - self.pdfs.append(norm(mu, sigma)) + self.pdfs.append(norm(mu, sigma)) def sample(self, num=1): """Sample required number of points""" diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py index 73ee7f5af..3f44adfdd 100644 --- a/tests/unittests/algo/test_tpe.py +++ b/tests/unittests/algo/test_tpe.py @@ -4,7 +4,7 @@ import numpy import pytest -from scipy.stats import lognorm, norm +from scipy.stats import norm from orion.algo.space import Categorical, Fidelity, Integer, Real, Space from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \ @@ -278,11 +278,6 @@ def test_gmm_sampler_creation(self, tpe): assert len(gmm_sampler.weights) == 12 assert len(gmm_sampler.pdfs) == 12 - gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3, is_log=True) - - assert len(gmm_sampler.weights) == 12 - assert len(gmm_sampler.pdfs) == 12 - def test_sample(self, tpe): """Test GMMSampler sample function""" mus = numpy.linspace(-3, 3, num=12, endpoint=False) @@ -311,18 +306,6 @@ def test_sample(self, tpe): assert numpy.all(points >= -11) assert numpy.all(points < 9) - # loguniform - gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True) - points = gmm_sampler.sample(10000) - points = numpy.array(points) - - bins = numpy.array([-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9]) - hist = numpy.histogram(points, bins=numpy.exp(bins)) - - assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort()) - assert numpy.all(points >= numpy.exp(-11)) - assert numpy.all(points < numpy.exp(9)) - def test_get_loglikelis(self): """Test to get log likelis of points""" mus = numpy.linspace(-10, 10, num=10, endpoint=False) @@ -361,26 +344,6 @@ def test_get_loglikelis(self): assert point_likeli == gmm_likeli assert len(likelis) == len(points) - # loguniform - gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights, is_log=True) - - log_pdf = [] - pdfs = [] - for i in range(10): - pdfs.append(lognorm(s=sigmas[i], loc=0, scale=numpy.exp(mus[i]))) - for pdf, weight in zip(pdfs, weights): - log_pdf.append(numpy.log(pdf.pdf(0) * weight)) - point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf))) - - points = numpy.random.uniform(-11, 9, 30) - points = numpy.insert(points, 10, 0) - likelis = gmm_sampler.get_loglikelis(points) - - point_likeli = numpy.format_float_scientific(point_likeli, precision=10) - gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10) - assert point_likeli == gmm_likeli - assert len(likelis) == len(points) - class TestTPE(): """Tests for the algo TPE.""" @@ -407,13 +370,16 @@ def test_set_state(self, tpe): def test_unsupported_space(self): """Test tpe only work for supported search space""" space = Space() - dim = Fidelity('epoch', 1, 9, 3) - space.register(dim) - - with pytest.raises(ValueError) as ex: - TPE(space) - - assert 'TPE now only supports Real, Integer and Categorical Dimension' in str(ex.value) + dim1 = Real('yolo1', 'uniform', -10, 10) + space.register(dim1) + dim2 = Real('yolo2', 'reciprocal', 10, 20) + space.register(dim2) + categories = ['a', 0.1, 2, 'c'] + dim3 = Categorical('yolo3', categories) + space.register(dim3) + dim4 = Fidelity('epoch', 1, 9, 3) + space.register(dim4) + TPE(space) space = Space() dim = Real('yolo1', 'norm', 0.9) From 49718252f970ac0f973a7cc505af51cdae614198 Mon Sep 17 00:00:00 2001 From: donglinjy Date: Tue, 9 Jun 2020 10:00:45 +0800 Subject: [PATCH 6/6] fix issues --- docs/src/user/api.rst | 2 +- src/orion/algo/tpe.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/src/user/api.rst b/docs/src/user/api.rst index 48c6cd12e..5c433dc15 100644 --- a/docs/src/user/api.rst +++ b/docs/src/user/api.rst @@ -78,7 +78,7 @@ you can optimize a function with a single line of code. experiment = workon(foo, space=dict(x='uniform(-50,50)')) -The experiment object returned is can be used to fetch the database of trials +The experiment object returned can be used to fetch the database of trials and analyze the optimization process. Note that the storage for `workon` is in-memory and requires no setup. This means however that :py:func:`orion.client.workon` cannot be used for parallel optimisation. diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py index 4a585c1cc..5a26a1d0b 100644 --- a/src/orion/algo/tpe.py +++ b/src/orion/algo/tpe.py @@ -279,9 +279,11 @@ def suggest(self, num=1): below_points[idx: idx + shape[0]], above_points[idx: idx + shape[0]], self._sample_categorical_point) - else: + elif dimension.type == 'fidelity': # fidelity dimension points = dimension.sample(num) + else: + raise NotImplementedError() if len(points) < shape[0]: logger.warning('TPE failed to sample new point with configuration %s',