From 87b4ff99a3dd67a88a6970003c93d5581617f37b Mon Sep 17 00:00:00 2001 From: kengz Date: Sat, 8 Apr 2017 11:03:58 -0400 Subject: [PATCH 1/5] schedule fast annealing for mountain double dqn --- rl/spec/classic_experiment_specs.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index 394a3c0..c5dde1a 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -733,8 +733,8 @@ "hidden_layers": [200], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 800 + "exploration_anneal_episodes": 100, + "epi_change_lr": 100 }, "param_range": { "lr": [0.01, 0.02], From 9e7cbefd50695a735186f6f6c6c9a86b5065d532 Mon Sep 17 00:00:00 2001 From: kengz Date: Sat, 8 Apr 2017 11:05:04 -0400 Subject: [PATCH 2/5] schedule mountain dqn too --- rl/spec/classic_experiment_specs.json | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index c5dde1a..dce7cb3 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -705,16 +705,15 @@ "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 800 + "exploration_anneal_episodes": 100, + "epi_change_lr": 100 }, "param_range": { "lr": [0.01, 0.02], "gamma": [0.99, 0.999], "hidden_layers": [ [200], - [400], - [800] + [400] ] } }, @@ -741,8 +740,7 @@ "gamma": [0.99, 0.999], "hidden_layers": [ [200], - [400], - [800] + [400] ] } }, From 9e29624066ba99122958af9598be6b5b5747a883 Mon Sep 17 00:00:00 2001 From: kengz Date: Sun, 9 Apr 2017 10:22:00 -0400 Subject: [PATCH 3/5] fix recompile_model for double dqn --- rl/agent/double_dqn.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/rl/agent/double_dqn.py b/rl/agent/double_dqn.py index 556baca..9a6cf45 100644 --- a/rl/agent/double_dqn.py +++ b/rl/agent/double_dqn.py @@ -31,6 +31,25 @@ def compile_model(self): optimizer=self.optimizer.keras_optimizer_2) logger.info("Models 1 and 2 compiled") + def switch_models(self): + # Switch model 1 and model 2, also the optimizers + temp = self.model + self.model = self.model_2 + self.model_2 = temp + + temp_optimizer = self.optimizer.keras_optimizer + self.optimizer.keras_optimizer = self.optimizer.keras_optimizer_2 + self.optimizer.keras_optimizer_2 = temp_optimizer + + def recompile_model(self, sys_vars): + '''rotate and recompile both models''' + if self.epi_change_lr is not None: + self.switch_models() # to model_2 + self.recompile_model(sys_vars) + self.switch_models() # back to model + self.recompile_model(sys_vars) + return self.model + def compute_Q_states(self, minibatch): (Q_states, Q_next_states_select, _max) = super( DoubleDQN, self).compute_Q_states(minibatch) @@ -45,16 +64,6 @@ def compute_Q_states(self, minibatch): return (Q_states, Q_next_states, Q_next_states_max) - def switch_models(self): - # Switch model 1 and model 2, also the optimizers - temp = self.model - self.model = self.model_2 - self.model_2 = temp - - temp_optimizer = self.optimizer.keras_optimizer - self.optimizer.keras_optimizer = self.optimizer.keras_optimizer_2 - self.optimizer.keras_optimizer_2 = temp_optimizer - def train_an_epoch(self): self.switch_models() return super(DoubleDQN, self).train_an_epoch() From c5cd7b575232140a56323bac23f347c6e4dd9d56 Mon Sep 17 00:00:00 2001 From: kengz Date: Sun, 9 Apr 2017 10:27:00 -0400 Subject: [PATCH 4/5] schedule mountain with delayed lr change --- rl/spec/classic_experiment_specs.json | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index dce7cb3..58fe583 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -705,7 +705,7 @@ "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 100, + "exploration_anneal_episodes": 50, "epi_change_lr": 100 }, "param_range": { @@ -732,7 +732,7 @@ "hidden_layers": [200], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 100, + "exploration_anneal_episodes": 50, "epi_change_lr": 100 }, "param_range": { @@ -788,16 +788,15 @@ "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 800 + "exploration_anneal_episodes": 50, + "epi_change_lr": 100 }, "param_range": { "lr": [0.01, 0.02], "gamma": [0.99, 0.999], "hidden_layers": [ [200], - [400], - [800] + [400] ] } } From 9e141e5020a843d6b64af07fa918593663094f20 Mon Sep 17 00:00:00 2001 From: kengz Date: Sun, 9 Apr 2017 10:28:32 -0400 Subject: [PATCH 5/5] call super recompile_model --- rl/agent/double_dqn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rl/agent/double_dqn.py b/rl/agent/double_dqn.py index 9a6cf45..92e104d 100644 --- a/rl/agent/double_dqn.py +++ b/rl/agent/double_dqn.py @@ -45,9 +45,9 @@ def recompile_model(self, sys_vars): '''rotate and recompile both models''' if self.epi_change_lr is not None: self.switch_models() # to model_2 - self.recompile_model(sys_vars) + super(DoubleDQN, self).recompile_model(sys_vars) self.switch_models() # back to model - self.recompile_model(sys_vars) + super(DoubleDQN, self).recompile_model(sys_vars) return self.model def compute_Q_states(self, minibatch):