-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconstants.py
27 lines (25 loc) · 1.72 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
constants = {
'GAMMA': 0.99, # discount factor for rewards
'LAMBDA': 1.0, # lambda of Generalized Advantage Estimation: https://arxiv.org/abs/1506.02438
'ENTROPY_BETA': 0.01, # entropy regurarlization constant.
'ROLLOUT_MAXLEN': 20, # 20 represents the number of 'local steps': the number of timesteps
# we run the policy before we update the parameters.
# The larger local steps is, the lower is the variance in our policy gradients estimate
# on the one hand; but on the other hand, we get less frequent parameter updates, which
# slows down learning. In this code, we found that making local steps be much
# smaller than 20 makes the algorithm more difficult to tune and to get to work.
'GRAD_NORM_CLIP': 40.0, # gradient norm clipping
'REWARD_CLIP': 1.0, # reward value clipping in [-x,x]
'MAX_GLOBAL_STEPS': 100000000, # total steps taken across all workers
'LEARNING_RATE': 1e-4, # learning rate for adam
'PREDICTION_BETA': 0.0005, # weight of prediction bonus
# set 0.5 for unsup=state
'PREDICTION_LR_SCALE': 0.01, # scale lr of predictor wrt to policy network
# set 30-50 for unsup=state
'FORWARD_LOSS_WT': 0.2, # should be between [0,1]
# predloss = ( (1-FORWARD_LOSS_WT) * inv_loss + FORWARD_LOSS_WT * forward_loss) * PREDICTION_LR_SCALE
'POLICY_NO_BACKPROP_STEPS': 0, # number of global steps after which we start backpropagating to policy
'USE_VPC_SMOOTHING': False, # True if the smoothing function should be used
'SMOOTHING_VALUE': 1, # constant used in weighting function
'VPC_WT': 0.1, # weight for value prediction regularization term
}