Skip to content

Commit eede3bf

Browse files
committed
Cartpole: Pass hp entropy_regularization to Agent.
1 parent 7b9373b commit eede3bf

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# 0.0.10
22
* Cartpole Benchmark Version 0.0.4:
3-
Remove an unused parameter from the reduced benchmark.
3+
Fix: Pass the hp `entropy_regularization` to the PPO Agent.
44
Set the lower limit of an hyperparameter from 0 to 10e-7 (0 is invalid.)
55

66
# 0.0.9

hpobench/benchmarks/rl/cartpole.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
Changelog:
33
==========
44
0.0.4
5-
* Remove redundant hyperparameter from Reduced.
5+
* Set the lower bound of the hp `likelihood_ratio_clipping` to a small number instead of 0.
6+
The PPO agent does not accept a value of 0 here and will raise an error.
7+
* Pass the hp `entropy_regularization` to the agent.
8+
* Add the hp `entropy_regularization` to the ConfigSpace of the CartpoleFull Benchmark.
69
710
0.0.3
811
* New container release due to a general change in the communication between container and HPOBench.
@@ -187,7 +190,8 @@ def objective_function(self, configuration: Union[Dict, CS.Configuration],
187190
"learning_rate":
188191
configuration["baseline_learning_rate"]},
189192
"num_steps": configuration["baseline_optimization_steps"]},
190-
likelihood_ratio_clipping=configuration["likelihood_ratio_clipping"]
193+
likelihood_ratio_clipping=configuration["likelihood_ratio_clipping"],
194+
entropy_regularization=configuration["entropy_regularization"],
191195
)
192196

193197
def episode_finished(record):
@@ -283,6 +287,7 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
283287
CS.UniformFloatHyperparameter("learning_rate", lower=1e-7, default_value=1e-3, upper=1e-1, log=True),
284288
CS.UniformFloatHyperparameter("discount", lower=0, default_value=.99, upper=1),
285289
CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=1e-7, default_value=.2, upper=1),
290+
CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1),
286291
CS.CategoricalHyperparameter("activation_1", ["tanh", "relu"]),
287292
CS.CategoricalHyperparameter("activation_2", ["tanh", "relu"]),
288293
CS.CategoricalHyperparameter("optimizer_type", ["adam", "rmsprop"]),
@@ -331,6 +336,7 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
331336
CS.UniformFloatHyperparameter("learning_rate", lower=1e-7, default_value=1e-3, upper=1e-1, log=True),
332337
CS.UniformFloatHyperparameter("discount", lower=0, default_value=.99, upper=1),
333338
CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=1e-7, default_value=.2, upper=1),
339+
CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1),
334340
])
335341
return cs
336342

0 commit comments

Comments
 (0)