From f4c793d9401054b8bb9f50d1c843fb06baa7eb65 Mon Sep 17 00:00:00 2001 From: Johan Obando Ceron Date: Fri, 25 Aug 2023 10:08:08 +0000 Subject: [PATCH] Setting the same value for num_iterations in .gin config files. This for Atari 100k. Adding support for flatboard on Atari_100k agents and BBF repository. Fixing the requirements.txt file for BBF repository. PiperOrigin-RevId: 560035815 --- dopamine/labs/atari_100k/atari_100k_runner.py | 27 +++++++++++++++++++ dopamine/labs/atari_100k/configs/DrQ.gin | 4 +-- dopamine/labs/atari_100k/configs/DrQ_eps.gin | 4 +-- .../labs/atari_100k/configs/OTRainbow.gin | 4 +-- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/dopamine/labs/atari_100k/atari_100k_runner.py b/dopamine/labs/atari_100k/atari_100k_runner.py index 25adb338..e0a93795 100755 --- a/dopamine/labs/atari_100k/atari_100k_runner.py +++ b/dopamine/labs/atari_100k/atari_100k_runner.py @@ -24,6 +24,9 @@ from dopamine.discrete_domains import iteration_statistics from dopamine.discrete_domains import run_experiment from dopamine.labs.atari_100k import normalization_utils +from dopamine.metrics import collector_dispatcher +from dopamine.metrics import statistics_instance + import gin import jax import numpy as np @@ -101,6 +104,13 @@ def __init__( self._agent.cache_train_state() self.log_normalized_scores = log_normalized_scores + # Create a collector dispatcher for metrics reporting. + self._collector_dispatcher = collector_dispatcher.CollectorDispatcher( + self._base_dir) + set_collector_dispatcher_fn = getattr( + self._agent, 'set_collector_dispatcher', None) + if callable(set_collector_dispatcher_fn): + set_collector_dispatcher_fn(self._collector_dispatcher) def _run_one_phase(self, envs, @@ -455,6 +465,23 @@ def _run_one_iteration(self, iteration): num_episodes_eval, average_reward_eval, norm_score_eval = ( self._run_eval_phase(statistics) ) + self._collector_dispatcher.write([ + statistics_instance.StatisticsInstance( + 'Train/NumEpisodes', num_episodes_train, iteration + ), + statistics_instance.StatisticsInstance( + 'Train/AverageReturns', average_reward_train, iteration + ), + statistics_instance.StatisticsInstance( + 'Train/AverageStepsPerSecond', average_steps_per_second, iteration + ), + statistics_instance.StatisticsInstance( + 'Eval/NumEpisodes', num_episodes_eval, iteration + ), + statistics_instance.StatisticsInstance( + 'Eval/AverageReturns', average_reward_eval, iteration + ), + ]) self._save_tensorboard_summaries( iteration, num_episodes_train, diff --git a/dopamine/labs/atari_100k/configs/DrQ.gin b/dopamine/labs/atari_100k/configs/DrQ.gin index ca2b4883..fd1d8d41 100644 --- a/dopamine/labs/atari_100k/configs/DrQ.gin +++ b/dopamine/labs/atari_100k/configs/DrQ.gin @@ -36,8 +36,8 @@ atari_lib.create_atari_environment.game_name = 'Pong' # Atari 100K benchmark doesn't use sticky actions. atari_lib.create_atari_environment.sticky_actions = False AtariPreprocessing.terminal_on_life_loss = True -Runner.num_iterations = 1 -Runner.training_steps = 100000 # agent steps +Runner.num_iterations = 10 +Runner.training_steps = 10000 # agent steps MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes Runner.max_steps_per_episode = 27000 # agent steps diff --git a/dopamine/labs/atari_100k/configs/DrQ_eps.gin b/dopamine/labs/atari_100k/configs/DrQ_eps.gin index 6ce6c573..8926e525 100644 --- a/dopamine/labs/atari_100k/configs/DrQ_eps.gin +++ b/dopamine/labs/atari_100k/configs/DrQ_eps.gin @@ -37,8 +37,8 @@ atari_lib.create_atari_environment.game_name = 'Pong' # Atari 100K benchmark doesn't use sticky actions. atari_lib.create_atari_environment.sticky_actions = False AtariPreprocessing.terminal_on_life_loss = True -Runner.num_iterations = 1 -Runner.training_steps = 100000 # agent steps +Runner.num_iterations = 10 +Runner.training_steps = 10000 # agent steps MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes Runner.max_steps_per_episode = 27000 # agent steps diff --git a/dopamine/labs/atari_100k/configs/OTRainbow.gin b/dopamine/labs/atari_100k/configs/OTRainbow.gin index e91ce0b0..d65dd899 100644 --- a/dopamine/labs/atari_100k/configs/OTRainbow.gin +++ b/dopamine/labs/atari_100k/configs/OTRainbow.gin @@ -37,8 +37,8 @@ atari_lib.create_atari_environment.game_name = 'Pong' # Atari 100K benchmark doesn't use sticky actions. atari_lib.create_atari_environment.sticky_actions = False AtariPreprocessing.terminal_on_life_loss = True -Runner.num_iterations = 1 -Runner.training_steps = 100000 # agent steps +Runner.num_iterations = 10 +Runner.training_steps = 10000 # agent steps MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes Runner.max_steps_per_episode = 27000 # agent steps