Skip to content

Commit

Permalink
Increased time resolution for more precision performance tracking. (#295
Browse files Browse the repository at this point in the history
)

* Increased time resolution for more precision performance tracking.

* Updated recommended pytorch version.
  • Loading branch information
ViktorM authored Jul 4, 2024
1 parent 07043a3 commit 7a2b25f
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 26 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ Explore RL Games quick and easily in colab notebooks:

## Installation

For maximum training performance a preliminary installation of Pytorch 1.9+ with CUDA 11.1+ is highly recommended:
For maximum training performance a preliminary installation of Pytorch 2.2 or newer with CUDA 12.1 or newer is highly recommended:

```conda install pytorch torchvision cudatoolkit=11.3 -c pytorch -c nvidia``` or:
```pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html```
```conda install pytorch torchvision pytorch-cuda=12.1 -c pytorch -c nvidia``` or:
```pip install pip3 install torch torchvision```

Then:

Expand Down
15 changes: 7 additions & 8 deletions rl_games/algos_torch/sac_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def clear_stats(self):
self.algo_observer.after_clear_stats()

def play_steps(self, random_exploration = False):
total_time_start = time.time()
total_time_start = time.perf_counter()
total_update_time = 0
total_time = 0
step_time = 0.0
Expand All @@ -466,11 +466,10 @@ def play_steps(self, random_exploration = False):
with torch.no_grad():
action = self.act(obs.float(), self.env_info["action_space"].shape, sample=True)

step_start = time.time()

step_start = time.perf_counter()
with torch.no_grad():
next_obs, rewards, dones, infos = self.env_step(action)
step_end = time.time()
step_end = time.perf_counter()

self.current_rewards += rewards
self.current_lengths += 1
Expand Down Expand Up @@ -500,17 +499,17 @@ def play_steps(self, random_exploration = False):
self.obs = next_obs.clone()

rewards = self.rewards_shaper(rewards)

self.replay_buffer.add(obs, action, torch.unsqueeze(rewards, 1), next_obs_processed, torch.unsqueeze(dones, 1))

if isinstance(obs, dict):
obs = self.obs['obs']

if not random_exploration:
self.set_train()
update_time_start = time.time()

update_time_start = time.perf_counter()
actor_loss_info, critic1_loss, critic2_loss = self.update(self.epoch_num)
update_time_end = time.time()
update_time_end = time.perf_counter()
update_time = update_time_end - update_time_start

self.extract_actor_stats(actor_losses, entropies, alphas, alpha_losses, actor_loss_info)
Expand All @@ -521,7 +520,7 @@ def play_steps(self, random_exploration = False):

total_update_time += update_time

total_time_end = time.time()
total_time_end = time.perf_counter()
total_time = total_time_end - total_time_start
play_time = total_time - total_update_time

Expand Down
28 changes: 14 additions & 14 deletions rl_games/common/a2c_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,9 +757,9 @@ def play_steps(self):
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])

step_time_start = time.time()
step_time_start = time.perf_counter()
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
step_time_end = time.time()
step_time_end = time.perf_counter()

step_time += (step_time_end - step_time_start)

Expand Down Expand Up @@ -830,9 +830,9 @@ def play_steps_rnn(self):
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])

step_time_start = time.time()
step_time_start = time.perf_counter()
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
step_time_end = time.time()
step_time_end = time.perf_counter()

step_time += (step_time_end - step_time_start)

Expand Down Expand Up @@ -920,7 +920,7 @@ def train_epoch(self):
super().train_epoch()

self.set_eval()
play_time_start = time.time()
play_time_start = time.perf_counter()

with torch.no_grad():
if self.is_rnn:
Expand All @@ -930,8 +930,8 @@ def train_epoch(self):

self.set_train()

play_time_end = time.time()
update_time_start = time.time()
play_time_end = time.perf_counter()
update_time_start = time.perf_counter()
rnn_masks = batch_dict.get('rnn_masks', None)

self.curr_frames = batch_dict.pop('played_frames')
Expand Down Expand Up @@ -966,7 +966,7 @@ def train_epoch(self):
if self.normalize_input:
self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch

update_time_end = time.time()
update_time_end = time.perf_counter()
play_time = play_time_end - play_time_start
update_time = update_time_end - update_time_start
total_time = update_time_end - play_time_start
Expand Down Expand Up @@ -1034,7 +1034,7 @@ def prepare_dataset(self, batch_dict):
def train(self):
self.init_tensors()
self.mean_rewards = self.last_mean_rewards = -100500
start_time = time.time()
start_time = time.perf_counter()
total_time = 0
rep_count = 0
# self.frame = 0 # loading from checkpoint
Expand Down Expand Up @@ -1183,15 +1183,15 @@ def train_epoch(self):
super().train_epoch()

self.set_eval()
play_time_start = time.time()
play_time_start = time.perf_counter()
with torch.no_grad():
if self.is_rnn:
batch_dict = self.play_steps_rnn()
else:
batch_dict = self.play_steps()

play_time_end = time.time()
update_time_start = time.time()
play_time_end = time.perf_counter()
update_time_start = time.perf_counter()
rnn_masks = batch_dict.get('rnn_masks', None)

self.set_train()
Expand Down Expand Up @@ -1240,7 +1240,7 @@ def train_epoch(self):
if self.normalize_input:
self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch

update_time_end = time.time()
update_time_end = time.perf_counter()
play_time = play_time_end - play_time_start
update_time = update_time_end - update_time_start
total_time = update_time_end - play_time_start
Expand Down Expand Up @@ -1310,7 +1310,7 @@ def prepare_dataset(self, batch_dict):
def train(self):
self.init_tensors()
self.last_mean_rewards = -100500
start_time = time.time()
start_time = time.perf_counter()
total_time = 0
rep_count = 0
self.obs = self.env_reset()
Expand Down
2 changes: 1 addition & 1 deletion rl_games/torch_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(self, algo_observer=None):

self.algo_observer = algo_observer if algo_observer else DefaultAlgoObserver()
torch.backends.cudnn.benchmark = True
### it didnot help for lots for openai gym envs anyway :(
### it did not help for lots for openai gym envs anyway :(
#torch.backends.cudnn.deterministic = True
#torch.use_deterministic_algorithms(True)

Expand Down

0 comments on commit 7a2b25f

Please sign in to comment.