diff --git a/rofunc/config/learning/rl/train/AntPPORofuncRL.yaml b/rofunc/config/learning/rl/train/AntPPORofuncRL.yaml index 5184799d2..b49bfa33d 100755 --- a/rofunc/config/learning/rl/train/AntPPORofuncRL.yaml +++ b/rofunc/config/learning/rl/train/AntPPORofuncRL.yaml @@ -16,7 +16,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/BaseTaskA2CRofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskA2CRofuncRL.yaml index 5cb6f5060..a3533aaff 100644 --- a/rofunc/config/learning/rl/train/BaseTaskA2CRofuncRL.yaml +++ b/rofunc/config/learning/rl/train/BaseTaskA2CRofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/BaseTaskAMPRofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskAMPRofuncRL.yaml index e70fbfddd..c91e77eac 100644 --- a/rofunc/config/learning/rl/train/BaseTaskAMPRofuncRL.yaml +++ b/rofunc/config/learning/rl/train/BaseTaskAMPRofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml index b0e8cff05..d7c27d32c 100644 --- a/rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml +++ b/rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: True # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml index 6a670ee5a..860c75ad3 100644 --- a/rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml +++ b/rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 1000 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/BaseTaskTD3RofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskTD3RofuncRL.yaml index 63c93d5fa..8f77ec2c1 100644 --- a/rofunc/config/learning/rl/train/BaseTaskTD3RofuncRL.yaml +++ b/rofunc/config/learning/rl/train/BaseTaskTD3RofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/CURICabinetImagePPORofuncRL.yaml b/rofunc/config/learning/rl/train/CURICabinetImagePPORofuncRL.yaml index d36114e3b..9a822e31c 100644 --- a/rofunc/config/learning/rl/train/CURICabinetImagePPORofuncRL.yaml +++ b/rofunc/config/learning/rl/train/CURICabinetImagePPORofuncRL.yaml @@ -16,7 +16,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/CURICabinetPPORofuncRL.yaml b/rofunc/config/learning/rl/train/CURICabinetPPORofuncRL.yaml index caf430371..6b57d2d43 100644 --- a/rofunc/config/learning/rl/train/CURICabinetPPORofuncRL.yaml +++ b/rofunc/config/learning/rl/train/CURICabinetPPORofuncRL.yaml @@ -16,7 +16,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/CURICabinetSACRofuncRL.yaml b/rofunc/config/learning/rl/train/CURICabinetSACRofuncRL.yaml index 909f130d7..a9594fd6f 100644 --- a/rofunc/config/learning/rl/train/CURICabinetSACRofuncRL.yaml +++ b/rofunc/config/learning/rl/train/CURICabinetSACRofuncRL.yaml @@ -15,7 +15,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/FrankaCabinetPPORofuncRL.yaml b/rofunc/config/learning/rl/train/FrankaCabinetPPORofuncRL.yaml index f1a74bcf3..64bfd790d 100644 --- a/rofunc/config/learning/rl/train/FrankaCabinetPPORofuncRL.yaml +++ b/rofunc/config/learning/rl/train/FrankaCabinetPPORofuncRL.yaml @@ -16,7 +16,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 16 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/HumanoidASEGetupSwordShieldASERofuncRL.yaml b/rofunc/config/learning/rl/train/HumanoidASEGetupSwordShieldASERofuncRL.yaml index 954288760..6ca85b158 100644 --- a/rofunc/config/learning/rl/train/HumanoidASEGetupSwordShieldASERofuncRL.yaml +++ b/rofunc/config/learning/rl/train/HumanoidASEGetupSwordShieldASERofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 32 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/HumanoidASEHeadingSwordShieldASERofuncRL.yaml b/rofunc/config/learning/rl/train/HumanoidASEHeadingSwordShieldASERofuncRL.yaml index a84dcc673..b029e4245 100644 --- a/rofunc/config/learning/rl/train/HumanoidASEHeadingSwordShieldASERofuncRL.yaml +++ b/rofunc/config/learning/rl/train/HumanoidASEHeadingSwordShieldASERofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 32 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/HumanoidASELocationSwordShieldASERofuncRL.yaml b/rofunc/config/learning/rl/train/HumanoidASELocationSwordShieldASERofuncRL.yaml index a84dcc673..b029e4245 100644 --- a/rofunc/config/learning/rl/train/HumanoidASELocationSwordShieldASERofuncRL.yaml +++ b/rofunc/config/learning/rl/train/HumanoidASELocationSwordShieldASERofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 32 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/HumanoidASEPerturbSwordShieldASERofuncRL.yaml b/rofunc/config/learning/rl/train/HumanoidASEPerturbSwordShieldASERofuncRL.yaml index 954288760..6ca85b158 100644 --- a/rofunc/config/learning/rl/train/HumanoidASEPerturbSwordShieldASERofuncRL.yaml +++ b/rofunc/config/learning/rl/train/HumanoidASEPerturbSwordShieldASERofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 32 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/HumanoidASEReachSwordShieldASERofuncRL.yaml b/rofunc/config/learning/rl/train/HumanoidASEReachSwordShieldASERofuncRL.yaml index a84dcc673..b029e4245 100644 --- a/rofunc/config/learning/rl/train/HumanoidASEReachSwordShieldASERofuncRL.yaml +++ b/rofunc/config/learning/rl/train/HumanoidASEReachSwordShieldASERofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 32 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode. diff --git a/rofunc/config/learning/rl/train/HumanoidASEStrikeSwordShieldASERofuncRL.yaml b/rofunc/config/learning/rl/train/HumanoidASEStrikeSwordShieldASERofuncRL.yaml index a84dcc673..b029e4245 100644 --- a/rofunc/config/learning/rl/train/HumanoidASEStrikeSwordShieldASERofuncRL.yaml +++ b/rofunc/config/learning/rl/train/HumanoidASEStrikeSwordShieldASERofuncRL.yaml @@ -14,7 +14,10 @@ Trainer: start_learning_steps: 0 # The number of steps to take before starting network updating. seed: 42 # The random seed. rollouts: 32 # The number of rollouts before updating. + eval_flag: False # If true, run evaluation. + eval_freq: 2500 # The frequency of evaluation. (timesteps) eval_steps: 1000 # The number of steps to run for evaluation. + use_eval_thread: True # If true, use a separate thread for evaluation. inference_steps: 1000 # The number of steps to run for inference. max_episode_steps: 200 # The maximum number of steps per episode.