diff --git a/configs/whirl_baselines_v2/27m_vs_30m.yaml b/configs/whirl_baselines_v2/27m_vs_30m.yaml new file mode 100644 index 00000000..8204d344 --- /dev/null +++ b/configs/whirl_baselines_v2/27m_vs_30m.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/27m_vs_30_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: 27m_vs_30m_cnn + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 4 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: 27m_vs_30m + frames: 4 + transpose: True + random_invalid_step: False \ No newline at end of file diff --git a/configs/whirl_baselines_v2/3s5z_vs_3s6z.yaml b/configs/whirl_baselines_v2/3s5z_vs_3s6z.yaml new file mode 100644 index 00000000..73ac8272 --- /dev/null +++ b/configs/whirl_baselines_v2/3s5z_vs_3s6z.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/3s5z_vs_3s6z_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: 3s5z_vs_3s6zaa + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 2 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: 3s5z_vs_3s6z + frames: 4 + transpose: True + random_invalid_step: False \ No newline at end of file diff --git a/configs/whirl_baselines_v2/3s_vs_5z.yaml b/configs/whirl_baselines_v2/3s_vs_5z.yaml new file mode 100644 index 00000000..39579613 --- /dev/null +++ b/configs/whirl_baselines_v2/3s_vs_5z.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/3s_vs_5z_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: 3s_vs_5z_cnn + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 2 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: 3s_vs_5z + frames: 4 + transpose: True + random_invalid_step: False \ No newline at end of file diff --git a/configs/whirl_baselines_v2/5m_vs_6m.yaml b/configs/whirl_baselines_v2/5m_vs_6m.yaml new file mode 100644 index 00000000..d0db29bd --- /dev/null +++ b/configs/whirl_baselines_v2/5m_vs_6m.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/5m_vs_6m_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: 5m_vs_6m_bias + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 2 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: 5m_vs_6m + frames: 4 + transpose: True + random_invalid_step: False \ No newline at end of file diff --git a/configs/whirl_baselines_v2/6h_vs_8z.yaml b/configs/whirl_baselines_v2/6h_vs_8z.yaml new file mode 100644 index 00000000..08522a12 --- /dev/null +++ b/configs/whirl_baselines_v2/6h_vs_8z.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/6h_vs_8z_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: 6h_vs_8z_cnn + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 2 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: 6h_vs_8z + frames: 4 + transpose: True + random_invalid_step: False diff --git a/configs/whirl_baselines_v2/MMM2.yaml b/configs/whirl_baselines_v2/MMM2.yaml new file mode 100644 index 00000000..4c85c61c --- /dev/null +++ b/configs/whirl_baselines_v2/MMM2.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/MMM_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: MMM2_cnn + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 4 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: MMM2 + frames: 4 + transpose: True + random_invalid_step: False \ No newline at end of file diff --git a/configs/whirl_baselines_v2/corridor.yaml b/configs/whirl_baselines_v2/corridor.yaml new file mode 100644 index 00000000..671fad52 --- /dev/null +++ b/configs/whirl_baselines_v2/corridor.yaml @@ -0,0 +1,81 @@ +label: "" +name: "" +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + load_checkpoint: False + load_path: 'nn/corridor_cnn' + + network: + name: actor_critic + separate: True + #normalization: layer_norm + space: + discrete: + + cnn: + type: conv1d + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + convs: + - filters: 64 + kernel_size: 3 + strides: 2 + padding: 'same' + - filters: 128 + kernel_size: 3 + strides: 1 + padding: 'valid' + - filters: 256 + kernel_size: 3 + strides: 1 + padding: 'valid' + mlp: + units: [256, 128] + activation: relu + initializer: + name: variance_scaling_initializer + scale: 2 + regularizer: + name: 'None' + config: + name: corridor_cnn + reward_shaper: + scale_value: 1 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + score_to_win: 20 + grad_norm: 0.5 + entropy_coef: 0.005 + truncate_grads: True + env_name: smac_cnn + ppo: true + e_clip: 0.2 + clip_value: True + num_actors: 8 + steps_num: 128 + minibatch_size: 4096 + mini_epochs: 1 + critic_coef: 2 + lr_schedule: None + lr_threshold: 0.05 + normalize_input: False + seq_len: 2 + use_action_masks: True + ignore_dead_batches : False + + env_config: + name: corridor + frames: 4 + transpose: True + random_invalid_step: False \ No newline at end of file