-
Notifications
You must be signed in to change notification settings - Fork 62
/
callbacks.yaml
64 lines (63 loc) · 2.41 KB
/
callbacks.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
wandb_entity: openrl-lab
experiment_name: test_callbacks
run_dir: ./wandb_run
callbacks:
- id: "ProgressBarCallback"
- id: "StopTrainingOnMaxEpisodes"
args: {
"max_episodes": 25, # the max number of episodes to run
"verbose": 1,
}
- id: "CheckpointCallback"
args: {
"save_freq": 500, # how often to save the model
"save_path": "./results/checkpoints/", # where to save the model
"name_prefix": "ppo", # the prefix of the saved model
"save_replay_buffer": True, # not work yet
"verbose": 2,
}
- id: "EvalCallback"
args: {
"eval_env": { "id": "CartPole-v1","env_num": 5 }, # how many envs to set up for evaluation
"n_eval_episodes": 5, # how many episodes to run for each evaluation
"eval_freq": 500, # how often to run evaluation
"log_path": "./results/eval_log_path", # where to save the evaluation results
"best_model_save_path": "./results/best_model/", # where to save the best model
"deterministic": True, # whether to use deterministic action
"render": False, # whether to render the env
"asynchronous": True, # whether to run evaluation asynchronously
"stop_logic": "OR", # the logic to stop training, OR means training stops when any one of the conditions is met, AND means training stops when all conditions are met
"callbacks_on_new_best": [
{
id: "StopTrainingOnRewardThreshold",
args: {
"reward_threshold": 500, # the reward threshold to stop training
"verbose": 1,
}
} ],
"callbacks_after_eval": [
{
id: "StopTrainingOnNoModelImprovement",
args: {
"max_no_improvement_evals": 10, # Maximum number of consecutive evaluations without a new best model.
"min_evals": 2, # Number of evaluations before start to count evaluations without improvements.
}
},
],
}
- id: "EveryNTimesteps" # This is same to "CheckpointCallback"
args: {
"n_steps": 5000,
"callbacks":[
{
"id": "CheckpointCallback",
args: {
"save_freq": 1,
"save_path": "./results/checkpoints_with_EveryNTimesteps/", # where to save the model
"name_prefix": "ppo", # the prefix of the saved model
"save_replay_buffer": True, # not work yet
"verbose": 2,
}
}
]
}