diff --git a/rl/agent/double_dqn.py b/rl/agent/double_dqn.py index 556baca..92e104d 100644 --- a/rl/agent/double_dqn.py +++ b/rl/agent/double_dqn.py @@ -31,6 +31,25 @@ def compile_model(self): optimizer=self.optimizer.keras_optimizer_2) logger.info("Models 1 and 2 compiled") + def switch_models(self): + # Switch model 1 and model 2, also the optimizers + temp = self.model + self.model = self.model_2 + self.model_2 = temp + + temp_optimizer = self.optimizer.keras_optimizer + self.optimizer.keras_optimizer = self.optimizer.keras_optimizer_2 + self.optimizer.keras_optimizer_2 = temp_optimizer + + def recompile_model(self, sys_vars): + '''rotate and recompile both models''' + if self.epi_change_lr is not None: + self.switch_models() # to model_2 + super(DoubleDQN, self).recompile_model(sys_vars) + self.switch_models() # back to model + super(DoubleDQN, self).recompile_model(sys_vars) + return self.model + def compute_Q_states(self, minibatch): (Q_states, Q_next_states_select, _max) = super( DoubleDQN, self).compute_Q_states(minibatch) @@ -45,16 +64,6 @@ def compute_Q_states(self, minibatch): return (Q_states, Q_next_states, Q_next_states_max) - def switch_models(self): - # Switch model 1 and model 2, also the optimizers - temp = self.model - self.model = self.model_2 - self.model_2 = temp - - temp_optimizer = self.optimizer.keras_optimizer - self.optimizer.keras_optimizer = self.optimizer.keras_optimizer_2 - self.optimizer.keras_optimizer_2 = temp_optimizer - def train_an_epoch(self): self.switch_models() return super(DoubleDQN, self).train_an_epoch() diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json index 394a3c0..58fe583 100644 --- a/rl/spec/classic_experiment_specs.json +++ b/rl/spec/classic_experiment_specs.json @@ -705,16 +705,15 @@ "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 800 + "exploration_anneal_episodes": 50, + "epi_change_lr": 100 }, "param_range": { "lr": [0.01, 0.02], "gamma": [0.99, 0.999], "hidden_layers": [ [200], - [400], - [800] + [400] ] } }, @@ -733,16 +732,15 @@ "hidden_layers": [200], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 800 + "exploration_anneal_episodes": 50, + "epi_change_lr": 100 }, "param_range": { "lr": [0.01, 0.02], "gamma": [0.99, 0.999], "hidden_layers": [ [200], - [400], - [800] + [400] ] } }, @@ -790,16 +788,15 @@ "hidden_layers": [128, 64], "hidden_layers_activation": "sigmoid", "output_layer_activation": "linear", - "exploration_anneal_episodes": 400, - "epi_change_lr": 800 + "exploration_anneal_episodes": 50, + "epi_change_lr": 100 }, "param_range": { "lr": [0.01, 0.02], "gamma": [0.99, 0.999], "hidden_layers": [ [200], - [400], - [800] + [400] ] } }