Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# 0.0.10

* Cartpole Benchmark Version 0.0.4:
Fix: Pass the hp `entropy_regularization` to the PPO Agent.
Set the lower limit of an hyperparameter from 0 to 10e-7 (0 is invalid.)

# 0.0.9
* Add new Benchmarks: Tabular Benchmarks.
Provided by @Neeratyoy.
Expand Down
18 changes: 13 additions & 5 deletions hpobench/benchmarks/rl/cartpole.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
"""
Changelog:
==========
0.0.4
* Set the lower bound of the hp `likelihood_ratio_clipping` to a small number instead of 0.
The PPO agent does not accept a value of 0 here and will raise an error.
* Pass the hp `entropy_regularization` to the agent.
* Add the hp `entropy_regularization` to the ConfigSpace of the CartpoleFull Benchmark.

0.0.3
* New container release due to a general change in the communication between container and HPOBench.
Works with HPOBench >= v0.0.8
Expand Down Expand Up @@ -30,7 +36,7 @@
from hpobench.abstract_benchmark import AbstractBenchmark # noqa: E402
from hpobench.util import rng_helper # noqa: E402

__version__ = '0.0.3'
__version__ = '0.0.4'

logger = logging.getLogger('CartpoleBenchmark')
tf.logging.set_verbosity(tf.logging.ERROR)
Expand Down Expand Up @@ -184,7 +190,8 @@ def objective_function(self, configuration: Union[Dict, CS.Configuration],
"learning_rate":
configuration["baseline_learning_rate"]},
"num_steps": configuration["baseline_optimization_steps"]},
likelihood_ratio_clipping=configuration["likelihood_ratio_clipping"]
likelihood_ratio_clipping=configuration["likelihood_ratio_clipping"],
entropy_regularization=configuration["entropy_regularization"],
)

def episode_finished(record):
Expand Down Expand Up @@ -279,7 +286,8 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
CS.UniformIntegerHyperparameter("batch_size", lower=8, default_value=64, upper=256, log=True),
CS.UniformFloatHyperparameter("learning_rate", lower=1e-7, default_value=1e-3, upper=1e-1, log=True),
CS.UniformFloatHyperparameter("discount", lower=0, default_value=.99, upper=1),
CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=0, default_value=.2, upper=1),
CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=1e-7, default_value=.2, upper=1),
CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1),
CS.CategoricalHyperparameter("activation_1", ["tanh", "relu"]),
CS.CategoricalHyperparameter("activation_2", ["tanh", "relu"]),
CS.CategoricalHyperparameter("optimizer_type", ["adam", "rmsprop"]),
Expand Down Expand Up @@ -327,8 +335,8 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
CS.UniformIntegerHyperparameter("batch_size", lower=8, default_value=64, upper=256, log=True),
CS.UniformFloatHyperparameter("learning_rate", lower=1e-7, default_value=1e-3, upper=1e-1, log=True),
CS.UniformFloatHyperparameter("discount", lower=0, default_value=.99, upper=1),
CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=0, default_value=.2, upper=1),
CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1)
CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=1e-7, default_value=.2, upper=1),
CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1),
])
return cs

Expand Down
4 changes: 2 additions & 2 deletions hpobench/container/benchmarks/rl/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ class CartpoleReduced(AbstractBenchmarkClient):
def __init__(self, **kwargs):
kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'CartpoleReduced')
kwargs['container_name'] = kwargs.get('container_name', 'cartpole')
kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
super(CartpoleReduced, self).__init__(**kwargs)


class CartpoleFull(AbstractBenchmarkClient):
def __init__(self, **kwargs):
kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'CartpoleFull')
kwargs['container_name'] = kwargs.get('container_name', 'cartpole')
kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
kwargs['latest'] = kwargs.get('container_tag', '0.0.4')
super(CartpoleFull, self).__init__(**kwargs)