From e2a08332bddc28e993387ab3c5cdcc2918366d9e Mon Sep 17 00:00:00 2001 From: Tarun Date: Thu, 2 Jul 2020 04:56:51 +0100 Subject: [PATCH] bug fix for state shape --- algos_tf14/vdnagent.py | 6 +- configs/vdn_3s5z_vs_3s6z.yaml | 84 ------------------- configs/vdn_3s_vs_5z.yaml | 84 ------------------- configs/whirl_baselines/vdn_3s5z_vs_3s6z.yaml | 3 +- configs/whirl_baselines/vdn_3s_vs_5z.yaml | 3 +- configs/whirl_baselines/vdn_MMM2.yaml | 1 + envs/smac_env.py | 3 - 7 files changed, 10 insertions(+), 174 deletions(-) delete mode 100644 configs/vdn_3s5z_vs_3s6z.yaml delete mode 100644 configs/vdn_3s_vs_5z.yaml diff --git a/algos_tf14/vdnagent.py b/algos_tf14/vdnagent.py index ccce5e44..8e8e5b06 100644 --- a/algos_tf14/vdnagent.py +++ b/algos_tf14/vdnagent.py @@ -70,7 +70,10 @@ def __init__(self, sess, base_name, observation_space, action_space, config, log self.atoms_num = self.config['atoms_num'] assert self.atoms_num == 1 - self.state_shape = (self.env.env_info['state_shape'],) + if central_state_space is not None: + self.state_shape = central_state_space.shape + else: + raise NotImplementedError("central_state_space input to VDN is NONE!") self.n_agents = self.env.env_info['n_agents'] if not self.is_prioritized: @@ -225,6 +228,7 @@ def play_steps(self, steps, epsilon=0.0): # Same reward, done for all agents reward = reward[0] is_done = all(is_done) + state = state[0] self.step_count += 1 self.total_reward += reward diff --git a/configs/vdn_3s5z_vs_3s6z.yaml b/configs/vdn_3s5z_vs_3s6z.yaml deleted file mode 100644 index 240f18e3..00000000 --- a/configs/vdn_3s5z_vs_3s6z.yaml +++ /dev/null @@ -1,84 +0,0 @@ -label: "" -name: "" -params: - algo: - name: vdn - - model: - name: vdn - - load_checkpoint: False - load_path: "" - - network: - name: dqn - dueling: True - atoms: 1 - noisy: False - cnn: - type: conv1d - activation: relu - initializer: - name: variance_scaling_initializer - scale: 2 - regularizer: - name: 'None' - convs: - - filters: 64 - kernel_size: 3 - strides: 2 - padding: 'same' - - filters: 128 - kernel_size: 3 - strides: 1 - padding: 'valid' - - filters: 256 - kernel_size: 3 - strides: 1 - padding: 'valid' - mlp: - units: [256] - activation: relu - initializer: - name: variance_scaling_initializer - scale: 2 - regularizer: - name: 'None' - - config: - reward_shaper: - scale_value: 0.1 - gamma: 0.99 - learning_rate: 0.0005 - steps_per_epoch: 4 - batch_size: 64 - epsilon: 1.0 - min_epsilon: 0.05 - epsilon_decay_frames: 100000 - num_epochs_to_copy: 10000 - env_name: smac_cnn - name: 3s5z_vs_3s6z - is_double: True - score_to_win: 20 - num_steps_fill_buffer: 100000 - replay_buffer_type: 'normal' - replay_buffer_size: 1000000 - priority_beta: 0.4 - priority_alpha: 0.6 - beta_decay_frames: 100000 - max_beta: 1 - steps_num: 10 -# episodes_to_log: 10 - atoms_num: 1 - games_to_track: 20 - lr_schedule: None - max_epochs: 100000 - grad_norm: 10 - mix_with_state: False - truncate_grads: True - - env_config: - name: 3s5z_vs_3s6z - frames: 4 - transpose: True - random_invalid_step: False \ No newline at end of file diff --git a/configs/vdn_3s_vs_5z.yaml b/configs/vdn_3s_vs_5z.yaml deleted file mode 100644 index 4fe035b0..00000000 --- a/configs/vdn_3s_vs_5z.yaml +++ /dev/null @@ -1,84 +0,0 @@ -label: "" -name: "" -params: - algo: - name: vdn - - model: - name: vdn - - load_checkpoint: False - load_path: "" - - network: - name: dqn - dueling: True - atoms: 1 - noisy: False - cnn: - type: conv1d - activation: relu - initializer: - name: variance_scaling_initializer - scale: 2 - regularizer: - name: 'None' - convs: - - filters: 64 - kernel_size: 3 - strides: 2 - padding: 'same' - - filters: 128 - kernel_size: 3 - strides: 1 - padding: 'valid' - - filters: 256 - kernel_size: 3 - strides: 1 - padding: 'valid' - mlp: - units: [256] - activation: relu - initializer: - name: variance_scaling_initializer - scale: 2 - regularizer: - name: 'None' - - config: - reward_shaper: - scale_value: 0.1 - gamma: 0.99 - learning_rate: 0.0005 - steps_per_epoch: 4 - batch_size: 64 - epsilon: 1.0 - min_epsilon: 0.05 - epsilon_decay_frames: 100000 - num_epochs_to_copy: 10000 - env_name: smac_cnn - name: 3s_vs_5z - is_double: True - score_to_win: 20 - num_steps_fill_buffer: 100000 - replay_buffer_type: 'normal' - replay_buffer_size: 1000000 - priority_beta: 0.4 - priority_alpha: 0.6 - beta_decay_frames: 100000 - max_beta: 1 - steps_num: 10 -# episodes_to_log: 10 - atoms_num: 1 - games_to_track: 20 - lr_schedule: None - max_epochs: 100000 - grad_norm: 10 - mix_with_state: False - truncate_grads: True - - env_config: - name: 3s_vs_5z - frames: 4 - transpose: True - random_invalid_step: False \ No newline at end of file diff --git a/configs/whirl_baselines/vdn_3s5z_vs_3s6z.yaml b/configs/whirl_baselines/vdn_3s5z_vs_3s6z.yaml index 240f18e3..c3af2daa 100644 --- a/configs/whirl_baselines/vdn_3s5z_vs_3s6z.yaml +++ b/configs/whirl_baselines/vdn_3s5z_vs_3s6z.yaml @@ -81,4 +81,5 @@ params: name: 3s5z_vs_3s6z frames: 4 transpose: True - random_invalid_step: False \ No newline at end of file + random_invalid_step: False + use_central_state: True \ No newline at end of file diff --git a/configs/whirl_baselines/vdn_3s_vs_5z.yaml b/configs/whirl_baselines/vdn_3s_vs_5z.yaml index 4fe035b0..f04bb164 100644 --- a/configs/whirl_baselines/vdn_3s_vs_5z.yaml +++ b/configs/whirl_baselines/vdn_3s_vs_5z.yaml @@ -81,4 +81,5 @@ params: name: 3s_vs_5z frames: 4 transpose: True - random_invalid_step: False \ No newline at end of file + random_invalid_step: False + use_central_state: True \ No newline at end of file diff --git a/configs/whirl_baselines/vdn_MMM2.yaml b/configs/whirl_baselines/vdn_MMM2.yaml index 7bda58c1..08e89ab7 100644 --- a/configs/whirl_baselines/vdn_MMM2.yaml +++ b/configs/whirl_baselines/vdn_MMM2.yaml @@ -82,3 +82,4 @@ params: frames: 4 transpose: True random_invalid_step: False + use_central_state: True diff --git a/envs/smac_env.py b/envs/smac_env.py index 269899d2..20639891 100644 --- a/envs/smac_env.py +++ b/envs/smac_env.py @@ -49,9 +49,6 @@ def _preproc_actions(self, actions): def get_state(self): return self.env.get_state() - def get_state(self): - return self.env.get_state() - def step(self, actions): fixed_rewards = None