|
2 | 2 | Changelog: |
3 | 3 | ========== |
4 | 4 | 0.0.4 |
5 | | -* Remove redundant hyperparameter from Reduced. |
| 5 | +* Set the lower bound of the hp `likelihood_ratio_clipping` to a small number instead of 0. |
| 6 | + The PPO agent does not accept a value of 0 here and will raise an error. |
| 7 | +* Pass the hp `entropy_regularization` to the agent. |
| 8 | +* Add the hp `entropy_regularization` to the ConfigSpace of the CartpoleFull Benchmark. |
6 | 9 |
|
7 | 10 | 0.0.3 |
8 | 11 | * New container release due to a general change in the communication between container and HPOBench. |
@@ -187,7 +190,8 @@ def objective_function(self, configuration: Union[Dict, CS.Configuration], |
187 | 190 | "learning_rate": |
188 | 191 | configuration["baseline_learning_rate"]}, |
189 | 192 | "num_steps": configuration["baseline_optimization_steps"]}, |
190 | | - likelihood_ratio_clipping=configuration["likelihood_ratio_clipping"] |
| 193 | + likelihood_ratio_clipping=configuration["likelihood_ratio_clipping"], |
| 194 | + entropy_regularization=configuration["entropy_regularization"], |
191 | 195 | ) |
192 | 196 |
|
193 | 197 | def episode_finished(record): |
@@ -283,6 +287,7 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp |
283 | 287 | CS.UniformFloatHyperparameter("learning_rate", lower=1e-7, default_value=1e-3, upper=1e-1, log=True), |
284 | 288 | CS.UniformFloatHyperparameter("discount", lower=0, default_value=.99, upper=1), |
285 | 289 | CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=1e-7, default_value=.2, upper=1), |
| 290 | + CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1), |
286 | 291 | CS.CategoricalHyperparameter("activation_1", ["tanh", "relu"]), |
287 | 292 | CS.CategoricalHyperparameter("activation_2", ["tanh", "relu"]), |
288 | 293 | CS.CategoricalHyperparameter("optimizer_type", ["adam", "rmsprop"]), |
@@ -331,6 +336,7 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp |
331 | 336 | CS.UniformFloatHyperparameter("learning_rate", lower=1e-7, default_value=1e-3, upper=1e-1, log=True), |
332 | 337 | CS.UniformFloatHyperparameter("discount", lower=0, default_value=.99, upper=1), |
333 | 338 | CS.UniformFloatHyperparameter("likelihood_ratio_clipping", lower=1e-7, default_value=.2, upper=1), |
| 339 | + CS.UniformFloatHyperparameter("entropy_regularization", lower=0, default_value=0.01, upper=1), |
334 | 340 | ]) |
335 | 341 | return cs |
336 | 342 |
|
|
0 commit comments