Skip to content

Commit

Permalink
fix negative rewards bug by adding bump to all
Browse files Browse the repository at this point in the history
  • Loading branch information
kengz committed Apr 4, 2017
1 parent 13b58a3 commit fa70057
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
11 changes: 6 additions & 5 deletions rl/memory/prioritized_exp_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,
self.prio_tree = SumTree(self.max_mem_len)
self.head = 0

# bump to account for negative terms in reward get_priority
# and we cannot abs(reward) cuz it's sign sensitive
SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD']
if SOLVED_MEAN_REWARD > 0:
self.min_priority = 0
else:
self.min_priority = abs(10 * SOLVED_MEAN_REWARD)
self.min_priority = abs(10 * SOLVED_MEAN_REWARD)

def get_priority(self, error):
# add min_priority to prevent root of negative = complex
return (self.min_priority + error + self.e) ** self.alpha
p = (self.min_priority + error + self.e) ** self.alpha
assert not np.isnan(p)
return p

def add_exp(self, action, reward, next_state, terminal):
'''Round robin memory updating'''
Expand Down
4 changes: 2 additions & 2 deletions rl/spec/box2d_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,14 @@
"PreProcessor": "StackStates",
"param": {
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.005,
"gamma": 0.99,
"hidden_layers": [800, 400],
"hidden_layers_activation": "sigmoid",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 150,
"epi_change_lr": 200
"epi_change_lr": 200,
"max_mem_len": 20000
},
"param_range": {
"lr": [0.001, 0.005, 0.01],
Expand Down

0 comments on commit fa70057

Please sign in to comment.