From bf599ab80e20cc94b3467023eb33ca95ee21c1aa Mon Sep 17 00:00:00 2001 From: kengz Date: Mon, 3 Apr 2017 23:25:44 -0400 Subject: [PATCH 1/6] correct doubleboltzmannpolicy for PER --- rl/spec/classic_experiment_specs.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index 237c247..aa237f6 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -94,7 +94,7 @@ "HyperOptimizer": "GridSearch", "Memory": "PrioritizedExperienceReplay", "Optimizer": "AdamOptimizer", - "Policy": "BoltzmannPolicy", + "Policy": "DoubleDQNBoltzmannPolicy", "PreProcessor": "NoPreProcessor", "param": { "lr": 0.02, @@ -667,7 +667,7 @@ "HyperOptimizer": "GridSearch", "Memory": "PrioritizedExperienceReplay", "Optimizer": "AdamOptimizer", - "Policy": "BoltzmannPolicy", + "Policy": "DoubleDQNBoltzmannPolicy", "PreProcessor": "NoPreProcessor", "param": { "train_per_n_new_exp": 5, From b3d8fe7c95935ec9e1ed8d735b46ee56def7270c Mon Sep 17 00:00:00 2001 From: kengz Date: Mon, 3 Apr 2017 23:31:40 -0400 Subject: [PATCH 2/6] reschedule mountain per --- rl/spec/classic_experiment_specs.json | 12 +++++++----- rl/spec/problems.json | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index aa237f6..82b2fa7 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -670,20 +670,22 @@ "Policy": "DoubleDQNBoltzmannPolicy", "PreProcessor": "NoPreProcessor", "param": { - "train_per_n_new_exp": 5, + "train_per_n_new_exp": 4, + "batch_size": 32, "lr": 0.001, "gamma": 0.99, "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 600 + "exploration_anneal_episodes": 300, + "epi_change_lr": 500 }, "param_range": { - "lr": [0.001, 0.01], - "gamma": [0.99, 0.999], + "lr": [0.01, 0.02], + "gamma": [0.99, 0.999, 1], "hidden_layers": [ [400], + [800], [400, 200] ] } diff --git a/rl/spec/problems.json b/rl/spec/problems.json index a477a2e..ad8d446 100644 --- a/rl/spec/problems.json +++ b/rl/spec/problems.json @@ -38,7 +38,7 @@ "RENDER": true, "GYM_ENV_NAME": "MountainCar-v0", "SOLVED_MEAN_REWARD": -110.0, - "MAX_EPISODES": 2000, + "MAX_EPISODES": 1000, "REWARD_MEAN_LEN": 100 }, "MountainCarContinuous-v0": { From 3dbd0eb7f92f0166bbd62142d9d1e9994359e6c1 Mon Sep 17 00:00:00 2001 From: kengz Date: Mon, 3 Apr 2017 23:33:10 -0400 Subject: [PATCH 3/6] schedule mountain per --- rl/spec/classic_experiment_specs.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index 82b2fa7..ff57d11 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -678,7 +678,8 @@ "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", "exploration_anneal_episodes": 300, - "epi_change_lr": 500 + "epi_change_lr": 500, + "max_mem_len": 20000 }, "param_range": { "lr": [0.01, 0.02], From 13b58a3e5be01338b33a7f20fb484e60c590ed00 Mon Sep 17 00:00:00 2001 From: kengz Date: Tue, 4 Apr 2017 07:44:16 -0400 Subject: [PATCH 4/6] reschedule mountain per, has hope! --- rl/spec/classic_experiment_specs.json | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index ff57d11..7025538 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -670,20 +670,19 @@ "Policy": "DoubleDQNBoltzmannPolicy", "PreProcessor": "NoPreProcessor", "param": { - "train_per_n_new_exp": 4, "batch_size": 32, "lr": 0.001, "gamma": 0.99, "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 300, + "exploration_anneal_episodes": 200, "epi_change_lr": 500, "max_mem_len": 20000 }, "param_range": { - "lr": [0.01, 0.02], - "gamma": [0.99, 0.999, 1], + "lr": [0.01, 0.02, 0.03, 0.05], + "gamma": [0.99, 0.999], "hidden_layers": [ [400], [800], From fa700570f616a8d56f04166e02d8045647cbe652 Mon Sep 17 00:00:00 2001 From: kengz Date: Tue, 4 Apr 2017 08:02:59 -0400 Subject: [PATCH 5/6] fix negative rewards bug by adding bump to all --- rl/memory/prioritized_exp_replay.py | 11 ++++++----- rl/spec/box2d_experiment_specs.json | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rl/memory/prioritized_exp_replay.py b/rl/memory/prioritized_exp_replay.py index 90c2ee0..9d1a239 100644 --- a/rl/memory/prioritized_exp_replay.py +++ b/rl/memory/prioritized_exp_replay.py @@ -27,15 +27,16 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6, self.prio_tree = SumTree(self.max_mem_len) self.head = 0 + # bump to account for negative terms in reward get_priority + # and we cannot abs(reward) cuz it's sign sensitive SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD'] - if SOLVED_MEAN_REWARD > 0: - self.min_priority = 0 - else: - self.min_priority = abs(10 * SOLVED_MEAN_REWARD) + self.min_priority = abs(10 * SOLVED_MEAN_REWARD) def get_priority(self, error): # add min_priority to prevent root of negative = complex - return (self.min_priority + error + self.e) ** self.alpha + p = (self.min_priority + error + self.e) ** self.alpha + assert not np.isnan(p) + return p def add_exp(self, action, reward, next_state, terminal): '''Round robin memory updating''' diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json index c8b6971..c379ec4 100644 --- a/rl/spec/box2d_experiment_specs.json +++ b/rl/spec/box2d_experiment_specs.json @@ -107,14 +107,14 @@ "PreProcessor": "StackStates", "param": { "train_per_n_new_exp": 5, - "batch_size": 32, "lr": 0.005, "gamma": 0.99, "hidden_layers": [800, 400], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", "exploration_anneal_episodes": 150, - "epi_change_lr": 200 + "epi_change_lr": 200, + "max_mem_len": 20000 }, "param_range": { "lr": [0.001, 0.005, 0.01], From 1d707e7c88af29e0d8c2797fe89e7375a3574a41 Mon Sep 17 00:00:00 2001 From: kengz Date: Tue, 4 Apr 2017 08:05:26 -0400 Subject: [PATCH 6/6] reschedule lunar and acrobot per --- rl/spec/box2d_experiment_specs.json | 4 ++-- rl/spec/classic_experiment_specs.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json index c379ec4..7391fbe 100644 --- a/rl/spec/box2d_experiment_specs.json +++ b/rl/spec/box2d_experiment_specs.json @@ -106,7 +106,7 @@ "Policy": "DoubleDQNBoltzmannPolicy", "PreProcessor": "StackStates", "param": { - "train_per_n_new_exp": 5, + "train_per_n_new_exp": 2, "lr": 0.005, "gamma": 0.99, "hidden_layers": [800, 400], @@ -114,7 +114,7 @@ "output_layer_activation": "linear", "exploration_anneal_episodes": 150, "epi_change_lr": 200, - "max_mem_len": 20000 + "max_mem_len": 30000 }, "param_range": { "lr": [0.001, 0.005, 0.01], diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index 7025538..3917173 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -460,7 +460,7 @@ "Policy": "DoubleDQNBoltzmannPolicy", "PreProcessor": "StackStates", "param": { - "train_per_n_new_exp": 4, + "train_per_n_new_exp": 1, "lr": 0.01, "gamma": 0.99, "hidden_layers": [32],