From 204255dbff05a2388299d6a2f49409168fcd48f1 Mon Sep 17 00:00:00 2001
From: HarryXuancy <lhsxxcy@126.com>
Date: Sun, 15 Oct 2023 23:06:10 +0800
Subject: [PATCH 1/2] polish(xcy):add muzero config for connect4

---
 .../config/connect4_muzero_bot_mode_config.py | 83 +++++++++++++++++++
 .../config/connect4_muzero_sp_mode_config.py  | 83 +++++++++++++++++++
 2 files changed, 166 insertions(+)
 create mode 100644 zoo/board_games/connect4/config/connect4_muzero_bot_mode_config.py
 create mode 100644 zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py

diff --git a/zoo/board_games/connect4/config/connect4_muzero_bot_mode_config.py b/zoo/board_games/connect4/config/connect4_muzero_bot_mode_config.py
new file mode 100644
index 000000000..2372453ba
--- /dev/null
+++ b/zoo/board_games/connect4/config/connect4_muzero_bot_mode_config.py
@@ -0,0 +1,83 @@
+from easydict import EasyDict
+
+# ==============================================================
+# begin of the most frequently changed config specified by the user
+# ==============================================================
+collector_env_num = 8
+n_episode = 8
+evaluator_env_num = 5
+num_simulations = 50
+update_per_collect = 50
+reanalyze_ratio = 0.
+batch_size = 256
+max_env_step = int(5e5)
+# ==============================================================
+# end of the most frequently changed config specified by the user
+# ==============================================================
+
+connect4_muzero_config = dict(
+    exp_name=
+    f'data_mz_ctree/connect4_botmode_rulebot_seed0',
+    env=dict(
+        battle_mode='play_with_bot_mode',
+        bot_action_type='rule',
+        channel_last=True,
+        collector_env_num=collector_env_num,
+        evaluator_env_num=evaluator_env_num,
+        n_evaluator_episode=evaluator_env_num,
+        manager=dict(shared_memory=False, ),
+    ),
+    policy=dict(
+        model=dict(
+            observation_shape=(3, 6, 7),
+            action_space_size=7,
+            image_channel=3,
+            num_res_blocks=1,
+            num_channels=64,
+            support_scale=300,
+            reward_support_size=601,
+            value_support_size=601,
+        ),
+        cuda=True,
+        env_type='board_games',
+        game_segment_length=int(6 * 7 / 2),  # for battle_mode='play_with_bot_mode'
+        update_per_collect=update_per_collect,
+        batch_size=batch_size,
+        optim_type='Adam',
+        lr_piecewise_constant_decay=False,
+        learning_rate=0.003,
+        grad_clip_value=0.5,
+        num_simulations=num_simulations,
+        reanalyze_ratio=reanalyze_ratio,
+        # NOTE：In board_games, we set large td_steps to make sure the value target is the final outcome.
+        td_steps=int(6 * 7 / 2),  # for battle_mode='play_with_bot_mode'
+        # NOTE：In board_games, we set discount_factor=1.
+        discount_factor=1,
+        n_episode=n_episode,
+        eval_freq=int(2e3),
+        replay_buffer_size=int(1e5),
+        collector_env_num=collector_env_num,
+        evaluator_env_num=evaluator_env_num,
+    ),
+)
+connect4_muzero_config = EasyDict(connect4_muzero_config)
+main_config = connect4_muzero_config
+
+connect4_muzero_create_config = dict(
+    env=dict(
+        type='connect4',
+        import_names=['zoo.board_games.connect4.envs.connect4_env'],
+    ),
+    env_manager=dict(type='subprocess'),
+    policy=dict(
+        type='muzero',
+        import_names=['lzero.policy.muzero'],
+    ),
+)
+connect4_muzero_create_config = EasyDict(connect4_muzero_create_config)
+create_config = connect4_muzero_create_config
+
+if __name__ == "__main__":
+    from lzero.entry import train_muzero
+
+    train_muzero([main_config, create_config], seed=1, max_env_step=max_env_step)
\ No newline at end of file
diff --git a/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py b/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py
new file mode 100644
index 000000000..0bec3e2b4
--- /dev/null
+++ b/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py
@@ -0,0 +1,83 @@
+from easydict import EasyDict
+
+# ==============================================================
+# begin of the most frequently changed config specified by the user
+# ==============================================================
+collector_env_num = 8
+n_episode = 8
+evaluator_env_num = 5
+num_simulations = 50
+update_per_collect = 50
+reanalyze_ratio = 0.
+batch_size = 256
+max_env_step = int(5e5)
+# ==============================================================
+# end of the most frequently changed config specified by the user
+# ==============================================================
+
+connect4_muzero_config = dict(
+    exp_name=
+    f'data_mz_ctree/connect4_spmode_rulebot_seed0',
+    env=dict(
+        battle_mode='self_play_mode',
+        bot_action_type='rule',
+        channel_last=True,
+        collector_env_num=collector_env_num,
+        evaluator_env_num=evaluator_env_num,
+        n_evaluator_episode=evaluator_env_num,
+        manager=dict(shared_memory=False, ),
+    ),
+    policy=dict(
+        model=dict(
+            observation_shape=(3, 6, 7),
+            action_space_size=7,
+            image_channel=3,
+            num_res_blocks=1,
+            num_channels=64,
+            support_scale=300,
+            reward_support_size=601,
+            value_support_size=601,
+        ),
+        cuda=True,
+        env_type='board_games',
+        game_segment_length=int(6 * 7 / 2),  # for battle_mode='play_with_bot_mode'
+        update_per_collect=update_per_collect,
+        batch_size=batch_size,
+        optim_type='Adam',
+        lr_piecewise_constant_decay=False,
+        learning_rate=0.003,
+        grad_clip_value=0.5,
+        num_simulations=num_simulations,
+        reanalyze_ratio=reanalyze_ratio,
+        # NOTE：In board_games, we set large td_steps to make sure the value target is the final outcome.
+        td_steps=int(6 * 7 / 2),  # for battle_mode='play_with_bot_mode'
+        # NOTE：In board_games, we set discount_factor=1.
+        discount_factor=1,
+        n_episode=n_episode,
+        eval_freq=int(2e3),
+        replay_buffer_size=int(1e5),
+        collector_env_num=collector_env_num,
+        evaluator_env_num=evaluator_env_num,
+    ),
+)
+connect4_muzero_config = EasyDict(connect4_muzero_config)
+main_config = connect4_muzero_config
+
+connect4_muzero_create_config = dict(
+    env=dict(
+        type='connect4',
+        import_names=['zoo.board_games.connect4.envs.connect4_env'],
+    ),
+    env_manager=dict(type='subprocess'),
+    policy=dict(
+        type='muzero',
+        import_names=['lzero.policy.muzero'],
+    ),
+)
+connect4_muzero_create_config = EasyDict(connect4_muzero_create_config)
+create_config = connect4_muzero_create_config
+
+if __name__ == "__main__":
+    from lzero.entry import train_muzero
+
+    train_muzero([main_config, create_config], seed=1, max_env_step=max_env_step)
\ No newline at end of file

From 3665bfb1363c9057848d00b3f0f1dc33a48eefaf Mon Sep 17 00:00:00 2001
From: HarryXuancy <lhsxxcy@126.com>
Date: Mon, 16 Oct 2023 15:45:28 +0800
Subject: [PATCH 2/2] polish(xcy):adjusting parameters in sp_mode

---
 .../connect4/config/connect4_muzero_sp_mode_config.py         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py b/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py
index 0bec3e2b4..0e4bf5d8a 100644
--- a/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py
+++ b/zoo/board_games/connect4/config/connect4_muzero_sp_mode_config.py
@@ -40,7 +40,7 @@
         ),
         cuda=True,
         env_type='board_games',
-        game_segment_length=int(6 * 7 / 2),  # for battle_mode='play_with_bot_mode'
+        game_segment_length=int(6 * 7),  # for battle_mode='self_play_mode'
         update_per_collect=update_per_collect,
         batch_size=batch_size,
         optim_type='Adam',
@@ -50,7 +50,7 @@
         num_simulations=num_simulations,
         reanalyze_ratio=reanalyze_ratio,
         # NOTE：In board_games, we set large td_steps to make sure the value target is the final outcome.
-        td_steps=int(6 * 7 / 2),  # for battle_mode='play_with_bot_mode'
+        td_steps=int(6 * 7),  # for battle_mode='self_play_mode'
         # NOTE：In board_games, we set discount_factor=1.
         discount_factor=1,
         n_episode=n_episode,