From 33ebbc443ffa350e4a7cb0c3b0219fd73afafdee Mon Sep 17 00:00:00 2001 From: Andrea Biancini Date: Thu, 28 Nov 2019 11:09:18 +0100 Subject: [PATCH 1/3] Modified pyToarch call to API to make it work with torch 1.3.1 --- src/doom/scenarios/deathmatch.py | 1 + src/model/dqn/base.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/doom/scenarios/deathmatch.py b/src/doom/scenarios/deathmatch.py index e78fcb5..126a803 100644 --- a/src/doom/scenarios/deathmatch.py +++ b/src/doom/scenarios/deathmatch.py @@ -174,6 +174,7 @@ def evaluate_deathmatch(game, network, params, n_train_iter=None): # observe the game state / select the next action game.observe_state(params, last_states) action = network.next_action(last_states) + action = action.tolist() pred_features = network.pred_features # game features diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index 6e1c61c..42dfb31 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -185,8 +185,8 @@ def prepare_f_train_args(self, screens, variables, features, return screens, variables, features, actions, rewards, isfinal def register_loss(self, loss_history, loss_sc, loss_gf): - loss_history['dqn_loss'].append(loss_sc.data[0]) - loss_history['gf_loss'].append(loss_gf.data[0] + loss_history['dqn_loss'].append(loss_sc.data) + loss_history['gf_loss'].append(loss_gf.data if self.n_features else 0) def next_action(self, last_states, save_graph=False): @@ -205,7 +205,7 @@ def next_action(self, last_states, save_graph=False): if pred_features is not None: assert pred_features.size() == (1, seq_len, self.module.n_features) pred_features = pred_features[0, -1] - action_id = scores.data.max(0)[1][0] + action_id = scores.data.max(0)[1] self.pred_features = pred_features return action_id From 60dc9240c419ab7d314a68cc457de529299d04ff Mon Sep 17 00:00:00 2001 From: Andrea Biancini Date: Fri, 29 Nov 2019 09:27:57 +0100 Subject: [PATCH 2/3] Fixed tensors for pythorch 1.3.1 --- src/doom/actions.py | 4 ++-- src/model/dqn/base.py | 4 +++- src/model/dqn/feedforward.py | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/doom/actions.py b/src/doom/actions.py index 835b4dd..998799e 100644 --- a/src/doom/actions.py +++ b/src/doom/actions.py @@ -88,8 +88,8 @@ def get_action(self, action): for k in self.available_buttons] return doom_action else: - assert type(action) is int - return self.doom_actions[action] + assert type(action.item()) is int + return self.doom_actions[action.item()] action_categories_discrete = { diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index 42dfb31..b9f1d89 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -78,7 +78,9 @@ def base_forward(self, x_screens, x_variables): # create state input if self.n_variables: - output = torch.cat([conv_output] + embeddings, 1) + embeddings = torch.cat(embeddings) + embeddings.unsqueeze_(0) + output = torch.cat((conv_output, embeddings), dim=1) else: output = conv_output diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index e2af5a6..067f6d5 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -21,10 +21,11 @@ def forward(self, x_screens, x_variables): """ batch_size = x_screens.size(0) + assert x_screens.ndimension() == 4 assert len(x_variables) == self.n_variables - assert all(x.ndimension() == 1 and x.size(0) == batch_size - for x in x_variables) + assert all(x.ndimension() == 0 and len(list(x.size())) == batch_size-1 + for x in x_variables) # state input (screen / depth / labels buffer + variables) state_input, output_gf = self.base_forward(x_screens, x_variables) @@ -45,7 +46,6 @@ class DQNFeedforward(DQN): def f_eval(self, last_states): screens, variables = self.prepare_f_eval_args(last_states) - return self.module( screens.view(1, -1, *self.screen_shape[1:]), [variables[-1, i] for i in range(self.params.n_variables)] From c6716a3da24c45d2dafdc8b86c3d6e6eb738f663 Mon Sep 17 00:00:00 2001 From: Andrea Biancini Date: Fri, 29 Nov 2019 09:44:40 +0100 Subject: [PATCH 3/3] Fixed bugs for pytorch 1.3.1 --- src/doom/actions.py | 5 +++-- src/model/dqn/base.py | 4 +--- src/model/dqn/feedforward.py | 8 ++++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/doom/actions.py b/src/doom/actions.py index 998799e..63146e7 100644 --- a/src/doom/actions.py +++ b/src/doom/actions.py @@ -88,8 +88,9 @@ def get_action(self, action): for k in self.available_buttons] return doom_action else: - assert type(action.item()) is int - return self.doom_actions[action.item()] + a = action if type(action) == int else action.item() + assert type(a) is int + return self.doom_actions[a] action_categories_discrete = { diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index b9f1d89..53d540c 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -78,9 +78,7 @@ def base_forward(self, x_screens, x_variables): # create state input if self.n_variables: - embeddings = torch.cat(embeddings) - embeddings.unsqueeze_(0) - output = torch.cat((conv_output, embeddings), dim=1) + output = torch.cat([conv_output] + embeddings, dim=1) else: output = conv_output diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index 067f6d5..4294430 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -21,11 +21,15 @@ def forward(self, x_screens, x_variables): """ batch_size = x_screens.size(0) + + for x in x_variables: + x.unsqueeze_(0) assert x_screens.ndimension() == 4 assert len(x_variables) == self.n_variables - assert all(x.ndimension() == 0 and len(list(x.size())) == batch_size-1 - for x in x_variables) + + #assert all(x.ndimension() == 0 and len(list(x.size())) == batch_size + # for x in x_variables) # state input (screen / depth / labels buffer + variables) state_input, output_gf = self.base_forward(x_screens, x_variables)