From 898faba8d3dc613577b3a239989981f0971c0aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=92=B2=E6=BA=90?= <2402552459@qq.com> Date: Fri, 12 May 2023 17:38:58 +0800 Subject: [PATCH 1/3] feature(pu): add buffer_memory_usage utils --- lzero/entry/train_muzero.py | 3 ++ lzero/entry/utils.py | 39 +++++++++++++++++++ lzero/mcts/buffer/game_buffer.py | 1 + .../cartpole/config/cartpole_muzero_config.py | 2 +- 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 lzero/entry/utils.py diff --git a/lzero/entry/train_muzero.py b/lzero/entry/train_muzero.py index c4e3fd0fa..b02e38b21 100644 --- a/lzero/entry/train_muzero.py +++ b/lzero/entry/train_muzero.py @@ -14,6 +14,7 @@ from ding.worker import BaseLearner from lzero.policy import visit_count_temperature from lzero.worker import MuZeroCollector, MuZeroEvaluator +from lzero.entry.utils import buffer_memory_usage def train_muzero( @@ -109,6 +110,8 @@ def train_muzero( # Learner's before_run hook. learner.call_hook('before_run') while True: + buffer_memory_usage(learner.train_iter, replay_buffer, tb_logger) + collect_kwargs = {} # set temperature for visit count distributions according to the train_iter, # please refer to Appendix D in MuZero paper for details. diff --git a/lzero/entry/utils.py b/lzero/entry/utils.py new file mode 100644 index 000000000..91bbab601 --- /dev/null +++ b/lzero/entry/utils.py @@ -0,0 +1,39 @@ +import os + +import psutil +from pympler.asizeof import asizeof + + +def buffer_memory_usage(train_iter, buffer, writer): + """ + Overview: + Log the memory usage of the buffer and the current process to TensorBoard. + Arguments: + - train_iter (:obj:`int`): The current training iteration. + - buffer (:obj:`GameBuffer`): The game buffer. + - writer (:obj:`SummaryWriter`): The TensorBoard writer. + """ + writer.add_scalar('Buffer/num_of_episodes', buffer.num_of_collected_episodes, train_iter) + writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter) + writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_pos_priorities), train_iter) + + game_segment_buffer = buffer.game_segment_buffer + + # Calculate the amount of memory occupied by self.game_segment_buffer (in bytes). + buffer_memory_usage = asizeof(game_segment_buffer) + + # Convert buffer_memory_usage to megabytes (MB). + buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024) + + # Record the memory usage of self.game_segment_buffer to TensorBoard. + writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter) + + # Get the amount of memory currently used by the process (in bytes). + process = psutil.Process(os.getpid()) + process_memory_usage = process.memory_info().rss + + # Convert process_memory_usage to megabytes (MB). + process_memory_usage_mb = process_memory_usage / (1024 * 1024) + + # Record the memory usage of the process to TensorBoard. + writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter) diff --git a/lzero/mcts/buffer/game_buffer.py b/lzero/mcts/buffer/game_buffer.py index 385bd5ab3..33bc807d7 100644 --- a/lzero/mcts/buffer/game_buffer.py +++ b/lzero/mcts/buffer/game_buffer.py @@ -358,6 +358,7 @@ def remove_oldest_data_to_fit(self) -> None: Overview: remove some oldest data if the replay buffer is full. """ + assert self.replay_buffer_size > self._cfg.batch_size, "replay buffer size should be larger than batch size" nums_of_game_segments = self.get_num_of_game_segments() total_transition = self.get_num_of_transitions() if total_transition > self.replay_buffer_size: diff --git a/zoo/classic_control/cartpole/config/cartpole_muzero_config.py b/zoo/classic_control/cartpole/config/cartpole_muzero_config.py index 8bfd87d5a..2da85a943 100644 --- a/zoo/classic_control/cartpole/config/cartpole_muzero_config.py +++ b/zoo/classic_control/cartpole/config/cartpole_muzero_config.py @@ -50,7 +50,7 @@ reanalyze_ratio=reanalyze_ratio, n_episode=n_episode, eval_freq=int(2e2), - replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions. + replay_buffer_size=int(1e3), # the size/capacity of replay_buffer, in the terms of transitions. collector_env_num=collector_env_num, evaluator_env_num=evaluator_env_num, ), From 529183e48508467ec76316ae8581508ad5213e8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=92=B2=E6=BA=90?= <2402552459@qq.com> Date: Tue, 16 May 2023 15:28:34 +0800 Subject: [PATCH 2/3] polish(pu): rename buffer_memory_usage to log_buffer_memory_usage --- lzero/entry/train_alphazero.py | 6 ++++-- lzero/entry/train_muzero.py | 9 ++++----- lzero/entry/utils.py | 3 ++- .../cartpole/config/cartpole_muzero_config.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/lzero/entry/train_alphazero.py b/lzero/entry/train_alphazero.py index 9cd79aaf9..6744ef871 100644 --- a/lzero/entry/train_alphazero.py +++ b/lzero/entry/train_alphazero.py @@ -4,14 +4,15 @@ from typing import Optional, Tuple import torch -from tensorboardX import SummaryWriter - from ding.config import compile_config from ding.envs import create_env_manager from ding.envs import get_vec_env_setting from ding.policy import create_policy from ding.utils import set_pkg_seed from ding.worker import BaseLearner, create_buffer +from tensorboardX import SummaryWriter + +from lzero.entry.utils import log_buffer_memory_usage from lzero.policy import visit_count_temperature from lzero.worker import AlphaZeroCollector, AlphaZeroEvaluator @@ -93,6 +94,7 @@ def train_alphazero( # Learner's before_run hook. learner.call_hook('before_run') while True: + log_buffer_memory_usage(learner.train_iter, replay_buffer, tb_logger) collect_kwargs = {} # set temperature for visit count distributions according to the train_iter, # please refer to Appendix D in MuZero paper for details. diff --git a/lzero/entry/train_muzero.py b/lzero/entry/train_muzero.py index b02e38b21..5f1dc649c 100644 --- a/lzero/entry/train_muzero.py +++ b/lzero/entry/train_muzero.py @@ -4,17 +4,17 @@ from typing import Optional, Tuple import torch -from tensorboardX import SummaryWriter - from ding.config import compile_config from ding.envs import create_env_manager from ding.envs import get_vec_env_setting from ding.policy import create_policy from ding.utils import set_pkg_seed from ding.worker import BaseLearner +from tensorboardX import SummaryWriter + +from lzero.entry.utils import log_buffer_memory_usage from lzero.policy import visit_count_temperature from lzero.worker import MuZeroCollector, MuZeroEvaluator -from lzero.entry.utils import buffer_memory_usage def train_muzero( @@ -110,8 +110,7 @@ def train_muzero( # Learner's before_run hook. learner.call_hook('before_run') while True: - buffer_memory_usage(learner.train_iter, replay_buffer, tb_logger) - + log_buffer_memory_usage(learner.train_iter, replay_buffer, tb_logger) collect_kwargs = {} # set temperature for visit count distributions according to the train_iter, # please refer to Appendix D in MuZero paper for details. diff --git a/lzero/entry/utils.py b/lzero/entry/utils.py index 91bbab601..aced4e08a 100644 --- a/lzero/entry/utils.py +++ b/lzero/entry/utils.py @@ -2,9 +2,10 @@ import psutil from pympler.asizeof import asizeof +from tensorboardX import SummaryWriter -def buffer_memory_usage(train_iter, buffer, writer): +def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None: """ Overview: Log the memory usage of the buffer and the current process to TensorBoard. diff --git a/zoo/classic_control/cartpole/config/cartpole_muzero_config.py b/zoo/classic_control/cartpole/config/cartpole_muzero_config.py index 2da85a943..8bfd87d5a 100644 --- a/zoo/classic_control/cartpole/config/cartpole_muzero_config.py +++ b/zoo/classic_control/cartpole/config/cartpole_muzero_config.py @@ -50,7 +50,7 @@ reanalyze_ratio=reanalyze_ratio, n_episode=n_episode, eval_freq=int(2e2), - replay_buffer_size=int(1e3), # the size/capacity of replay_buffer, in the terms of transitions. + replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions. collector_env_num=collector_env_num, evaluator_env_num=evaluator_env_num, ), From 2f252f4c0efef1b3d267edf4b41d04b03efc5415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=92=B2=E6=BA=90?= <2402552459@qq.com> Date: Tue, 16 May 2023 15:40:56 +0800 Subject: [PATCH 3/3] polish(pu): polish some variable names in buffer logs --- lzero/entry/utils.py | 4 ++-- lzero/mcts/buffer/game_buffer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lzero/entry/utils.py b/lzero/entry/utils.py index aced4e08a..2da99f3fa 100644 --- a/lzero/entry/utils.py +++ b/lzero/entry/utils.py @@ -14,9 +14,9 @@ def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: Summa - buffer (:obj:`GameBuffer`): The game buffer. - writer (:obj:`SummaryWriter`): The TensorBoard writer. """ - writer.add_scalar('Buffer/num_of_episodes', buffer.num_of_collected_episodes, train_iter) + writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter) writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter) - writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_pos_priorities), train_iter) + writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter) game_segment_buffer = buffer.game_segment_buffer diff --git a/lzero/mcts/buffer/game_buffer.py b/lzero/mcts/buffer/game_buffer.py index 33bc807d7..4d025b564 100644 --- a/lzero/mcts/buffer/game_buffer.py +++ b/lzero/mcts/buffer/game_buffer.py @@ -398,7 +398,7 @@ def get_num_of_game_segments(self) -> int: def get_num_of_transitions(self) -> int: # total number of transitions - return len(self.game_pos_priorities) + return len(self.game_segment_game_pos_look_up) def __repr__(self): - return f'current buffer statistics is: num_of_episodes: {self.num_of_collected_episodes}, num of game segments: {len(self.game_segment_buffer)}, number of transitions: {len(self.game_pos_priorities)}' + return f'current buffer statistics is: num_of_all_collected_episodes: {self.num_of_collected_episodes}, num of game segments: {len(self.game_segment_buffer)}, number of transitions: {len(self.game_segment_game_pos_look_up)}'