-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
🚀 [RofuncRL] RofuncDTrans pass debugging
- Loading branch information
1 parent
2d11808
commit 08f73ef
Showing
11 changed files
with
369 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
""" | ||
D4RL (RofuncRL) | ||
======================= | ||
D4RL tasks with RofuncRL offline RL algorithms (BC, DTrans, CQL, etc.) | ||
""" | ||
|
||
import argparse | ||
|
||
import gymnasium as gym | ||
|
||
from rofunc.config.utils import omegaconf_to_dict, get_config | ||
from rofunc.learning.RofuncRL.tasks import task_map | ||
from rofunc.learning.RofuncRL.trainers import trainer_map | ||
from rofunc.learning.pre_trained_models.download import model_zoo | ||
from rofunc.learning.utils.download_datasets import download_d4rl_dataset | ||
from rofunc.learning.utils.utils import set_seed | ||
|
||
|
||
def train(custom_args): | ||
# Config task and trainer parameters for Isaac Gym environments | ||
args_overrides = ["task={}".format(custom_args.task), | ||
"train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()), | ||
"sim_device={}".format(custom_args.sim_device), | ||
"rl_device={}".format(custom_args.rl_device), | ||
"graphics_device_id={}".format(custom_args.graphics_device_id), | ||
"headless={}".format(custom_args.headless)] | ||
cfg = get_config('./learning/rl', 'config', args=args_overrides) | ||
|
||
download_d4rl_dataset(save_dir='../data/D4RL') | ||
|
||
set_seed(cfg.train.Trainer.seed) | ||
|
||
# Instantiate the Isaac Gym environment | ||
env = gym.make(f'{custom_args.task}-v3') | ||
|
||
# Instantiate the RL trainer | ||
trainer = trainer_map[custom_args.agent](cfg=cfg.train, | ||
env=env, | ||
device=cfg.rl_device, | ||
env_name=custom_args.task) | ||
|
||
# Start training | ||
trainer.train() | ||
|
||
|
||
def inference(custom_args): | ||
# Config task and trainer parameters for Isaac Gym environments | ||
args_overrides = ["task={}".format(custom_args.task), | ||
"train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()), | ||
"sim_device={}".format(custom_args.sim_device), | ||
"rl_device={}".format(custom_args.rl_device), | ||
"graphics_device_id={}".format(custom_args.graphics_device_id), | ||
"headless={}".format(False), | ||
"num_envs={}".format(16)] | ||
cfg = get_config('./learning/rl', 'config', args=args_overrides) | ||
cfg_dict = omegaconf_to_dict(cfg.task) | ||
|
||
set_seed(cfg.train.Trainer.seed) | ||
|
||
# Instantiate the Isaac Gym environment | ||
infer_env = task_map[custom_args.task](cfg=cfg_dict, | ||
rl_device=cfg.rl_device, | ||
sim_device=cfg.sim_device, | ||
graphics_device_id=cfg.graphics_device_id, | ||
headless=cfg.headless, | ||
virtual_screen_capture=cfg.capture_video, # TODO: check | ||
force_render=cfg.force_render) | ||
|
||
# Instantiate the RL trainer | ||
trainer = trainer_map[custom_args.agent](cfg=cfg.train, | ||
env=infer_env, | ||
device=cfg.rl_device, | ||
env_name=custom_args.task) | ||
# load checkpoint | ||
if custom_args.ckpt_path is None: | ||
custom_args.ckpt_path = model_zoo(name="CURICabinetRofuncRLPPO_left_arm.pth") | ||
trainer.agent.load_ckpt(custom_args.ckpt_path) | ||
|
||
# Start inference | ||
trainer.inference() | ||
|
||
|
||
if __name__ == '__main__': | ||
gpu_id = 0 | ||
|
||
parser = argparse.ArgumentParser() | ||
# Available tasks: Hopper, HalfCheetah, Walker2d, Reacher2d | ||
parser.add_argument("--task", type=str, default="Hopper") | ||
parser.add_argument("--agent", type=str, default="dtrans") # dtrans | ||
parser.add_argument("--sim_device", type=str, default="cuda:{}".format(gpu_id)) | ||
parser.add_argument("--rl_device", type=str, default="cuda:{}".format(gpu_id)) | ||
parser.add_argument("--graphics_device_id", type=int, default=gpu_id) | ||
parser.add_argument("--headless", type=str, default="True") | ||
parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument") | ||
parser.add_argument("--ckpt_path", type=str, default=None) | ||
custom_args = parser.parse_args() | ||
|
||
if not custom_args.inference: | ||
train(custom_args) | ||
else: | ||
inference(custom_args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
name: Hopper | ||
|
90 changes: 90 additions & 0 deletions
90
rofunc/config/learning/rl/train/BaseTaskDTRANSRofuncRL.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# ========== Trainer parameters ========== | ||
Trainer: | ||
experiment_name: # Experiment name for logging. | ||
experiment_directory: # Experiment directory for logging. | ||
write_interval: 100 # TensorBoard write interval for logging. (timesteps) | ||
checkpoint_interval: 1000 # Checkpoint interval for logging. (timesteps) | ||
wandb: False # If true, log to Weights & Biases. | ||
wandb_kwargs: # Weights & Biases kwargs. https://docs.wandb.ai/ref/python/init | ||
project: # Weights & Biases project name. | ||
rofunc_logger_kwargs: # Rofunc BeautyLogger kwargs. | ||
verbose: True # If true, print to stdout. | ||
maximum_steps: 100000 # The maximum number of steps to run for. | ||
random_steps: 0 # The number of random exploration steps to take. | ||
start_learning_steps: 0 # The number of steps to take before starting network updating. | ||
seed: 42 # The random seed. | ||
rollouts: 16 # The number of rollouts before updating. | ||
eval_flag: False # If true, run evaluation. | ||
eval_freq: 2500 # The frequency of evaluation. (timesteps) | ||
eval_steps: 1000 # The number of steps to run for evaluation. | ||
use_eval_thread: True # If true, use a separate thread for evaluation. | ||
inference_steps: 1000 # The number of steps to run for inference. | ||
max_episode_steps: 1000 # The maximum number of steps per episode. | ||
|
||
dataset_type: medium # medium, medium-replay, medium-expert, expert | ||
mode: normal # normal for standard setting, delayed for sparse | ||
dataset_root_path: /home/ubuntu/Github/Rofunc/examples/data/D4RL | ||
env_targets: [ 3600, 1800 ] # evaluation conditioning targets | ||
scale: 1000. # scale for reward and action | ||
max_seq_length: 20 # Maximum length of the sequence for inputting to the GPT model. | ||
|
||
|
||
# ========== Agent parameters ========== | ||
Agent: | ||
discount: 0.99 # The discount factor, gamma. | ||
td_lambda: 0.95 # TD(lambda) coefficient (lam) for computing returns and advantages. | ||
|
||
learning_epochs: 8 # The number of epochs to train for per update. | ||
batch_size: 1024 # Batch size for training. | ||
|
||
lr: 1e-4 # Learning rate for actor. | ||
# lr_scheduler: # Learning rate scheduler type. | ||
# lr_scheduler_kwargs: # Learning rate scheduler kwargs. | ||
adam_eps: 1e-5 # Adam epsilon. | ||
weight_decay: 1e-4 # Weight decay. | ||
|
||
# If true, use the Generalized Advantage Estimator (GAE) | ||
# with a value function, see https://arxiv.org/pdf/1506.02438.pdf. | ||
use_gae: True | ||
|
||
entropy_loss_scale: 0.01 # entropy loss scaling factor | ||
value_loss_scale: 2.0 # value loss scaling factor | ||
|
||
grad_norm_clip: 1.0 # clipping coefficient for the norm of the gradients | ||
ratio_clip: 0.2 # clipping coefficient for computing the clipped surrogate objective | ||
value_clip: 0.2 # clipping coefficient for computing the value loss (if clip_predicted_values is True) | ||
clip_predicted_values: True # clip predicted values during value loss computation | ||
|
||
kl_threshold: 0 # Initial coefficient for KL divergence. | ||
|
||
# state_preprocessor: # State preprocessor type. | ||
# state_preprocessor_kwargs: # State preprocessor kwargs. | ||
# value_preprocessor: # Value preprocessor type. | ||
# value_preprocessor_kwargs: # Value preprocessor kwargs. | ||
# rewards_shaper: # Rewards shaper type. | ||
|
||
|
||
# ========= Model parameters ========== | ||
Model: | ||
use_init: True | ||
use_action_clip: False # If true, clip actions to the action space range. | ||
use_action_out_tanh: True # If true, apply tanh to the output of the actor. | ||
action_clip: 1.0 # clipping coefficient for the norm of the actions | ||
action_scale: 1.0 # scaling action range from [-1, 1] after tanh to [-action_scale, action_scale] | ||
use_log_std_clip: True # If true, clip log standard deviations to the range [-20, 2]. | ||
log_std_clip_max: 2.0 # clipping coefficient for the log standard deviations | ||
log_std_clip_min: -20 # clipping coefficient for the log standard deviations | ||
|
||
actor: | ||
n_layer: 3 | ||
n_head: 1 | ||
n_embd: 128 | ||
dropout: 0.1 | ||
activation_function: relu | ||
max_episode_steps: ${train.Trainer.max_episode_steps} | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.