-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig_validation.py
145 lines (122 loc) · 4.12 KB
/
config_validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import multiprocessing
import random
from pydantic import Extra, BaseModel, validator
import os
from os.path import join
class AsyncPPO(BaseModel, extra=Extra.forbid):
experiment_summaries_interval: int = 20
adam_eps: float = 1e-6
adam_beta1: float = 0.9
adam_beta2: float = 0.999
gae_lambda: float = 0.95
rollout: int = 32
num_workers: int = multiprocessing.cpu_count()
recurrence: int = 32
use_rnn: bool = True
rnn_type: str = 'gru'
rnn_num_layers: int = 1
ppo_clip_ratio: float = 0.1
ppo_clip_value: float = 1.0
batch_size: int = 1024
num_batches_per_iteration: int = 1
ppo_epochs: int = 1
num_minibatches_to_accumulate: int = -1
max_grad_norm: float = 4.0
exploration_loss_coeff: float = 0.003
value_loss_coeff: float = 0.5
kl_loss_coeff: float = 0.0
exploration_loss: str = 'entropy'
num_envs_per_worker: int = 32
worker_num_splits: int = 2
num_policies: int = 1
policy_workers_per_policy: int = 1
max_policy_lag: int = 10000
traj_buffers_excess_ratio: int = 2
decorrelate_experience_max_seconds: int = 10
decorrelate_envs_on_one_worker: bool = True
with_vtrace: bool = True
vtrace_rho: float = 1.0
vtrace_c: float = 1.0
set_workers_cpu_affinity: bool = True
force_envs_single_thread: bool = True
reset_timeout_seconds: int = 120
default_niceness: int = 0
train_in_background_thread: bool = True
learner_main_loop_num_cores: int = 1
actor_worker_gpus = []
with_pbt: bool = False
pbt_mix_policies_in_one_env: bool = True
pbt_period_env_steps: int = 5e6
pbt_start_mutation: int = 2e7
pbt_replace_fraction: float = 0.3
pbt_mutation_rate: float = 0.15
pbt_replace_reward_gap: float = 0.1
pbt_replace_reward_gap_absolute: float = 1e-6
pbt_optimize_batch_size: bool = False
pbt_target_objective: str = 'true_reward'
use_cpc: bool = False
cpc_forward_steps: int = 8
cpc_time_subsample: int = 6
cpc_forward_subsample: int = 2
benchmark: bool = False
sampler_only: bool = False
class ExperimentSettings(BaseModel, extra=Extra.forbid):
save_every_sec: int = 120
keep_checkpoints: int = 1
save_milestones_sec: int = -1
stats_avg: int = 100
learning_rate: float = 1e-4
train_for_env_steps: int = 1e10
train_for_seconds: int = 1e10
obs_subtract_mean: float = 0.0
obs_scale: float = 1.0
gamma: float = 0.99
reward_scale: float = 1.0
reward_clip: float = 10.0
encoder_type: str = None
encoder_custom: str = 'custom_env_encoder'
encoder_subtype: str = 'resnet_impala'
encoder_extra_fc_layers: int = 1
hidden_size: int = 256
nonlinearity: str = 'relu'
policy_initialization: str = 'orthogonal'
policy_init_gain: float = 1.0
actor_critic_share_weights: bool = True
use_spectral_norm: bool = False
adaptive_stddev: bool = True
initial_stddev: float = 1.0
class GlobalSettings(BaseModel, extra=Extra.forbid):
algo: str = 'APPO'
env: str = None
experiment: str = 'TreeChopBaseline-iglu'
experiments_root: str = None
train_dir: str = 'train_dir/experiment'
device: str = 'gpu'
seed: int = None
cli_args: dict = {}
use_wandb: bool = True
with_wandb: bool = True
class Evaluation(BaseModel, extra=Extra.forbid):
fps: int = 0
render_action_repeat: int = None
no_render: bool = True
policy_index: int = 0
record_to: str = join(os.getcwd(), '..', 'recs')
continuous_actions_sample: bool = True
env_frameskip: int = None
class Environment(BaseModel, ):
name: str = 'IGLUSilentBuilder-v0'
class Experiment(BaseModel):
name: str = None
environment: Environment = Environment()
async_ppo: AsyncPPO = AsyncPPO()
experiment_settings: ExperimentSettings = ExperimentSettings()
global_settings: GlobalSettings = GlobalSettings()
evaluation: Evaluation = Evaluation()
@validator('global_settings')
def seed_initialization(cls, v, values):
if v.env is None:
v.env = values['environment'].name
if v.experiment is None:
v.experiment = values['name']
return v