Skip to content

Commit

Permalink
Merge pull request #108 from kengz/schedule
Browse files Browse the repository at this point in the history
fix PER sign issue
  • Loading branch information
kengz authored Apr 4, 2017
2 parents 0ecafc0 + 1d707e7 commit 5fb91d3
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 16 deletions.
11 changes: 6 additions & 5 deletions rl/memory/prioritized_exp_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,
self.prio_tree = SumTree(self.max_mem_len)
self.head = 0

# bump to account for negative terms in reward get_priority
# and we cannot abs(reward) cuz it's sign sensitive
SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD']
if SOLVED_MEAN_REWARD > 0:
self.min_priority = 0
else:
self.min_priority = abs(10 * SOLVED_MEAN_REWARD)
self.min_priority = abs(10 * SOLVED_MEAN_REWARD)

def get_priority(self, error):
# add min_priority to prevent root of negative = complex
return (self.min_priority + error + self.e) ** self.alpha
p = (self.min_priority + error + self.e) ** self.alpha
assert not np.isnan(p)
return p

def add_exp(self, action, reward, next_state, terminal):
'''Round robin memory updating'''
Expand Down
6 changes: 3 additions & 3 deletions rl/spec/box2d_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,15 @@
"Policy": "DoubleDQNBoltzmannPolicy",
"PreProcessor": "StackStates",
"param": {
"train_per_n_new_exp": 5,
"batch_size": 32,
"train_per_n_new_exp": 2,
"lr": 0.005,
"gamma": 0.99,
"hidden_layers": [800, 400],
"hidden_layers_activation": "sigmoid",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 150,
"epi_change_lr": 200
"epi_change_lr": 200,
"max_mem_len": 30000
},
"param_range": {
"lr": [0.001, 0.005, 0.01],
Expand Down
16 changes: 9 additions & 7 deletions rl/spec/classic_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
"HyperOptimizer": "GridSearch",
"Memory": "PrioritizedExperienceReplay",
"Optimizer": "AdamOptimizer",
"Policy": "BoltzmannPolicy",
"Policy": "DoubleDQNBoltzmannPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"lr": 0.02,
Expand Down Expand Up @@ -460,7 +460,7 @@
"Policy": "DoubleDQNBoltzmannPolicy",
"PreProcessor": "StackStates",
"param": {
"train_per_n_new_exp": 4,
"train_per_n_new_exp": 1,
"lr": 0.01,
"gamma": 0.99,
"hidden_layers": [32],
Expand Down Expand Up @@ -667,23 +667,25 @@
"HyperOptimizer": "GridSearch",
"Memory": "PrioritizedExperienceReplay",
"Optimizer": "AdamOptimizer",
"Policy": "BoltzmannPolicy",
"Policy": "DoubleDQNBoltzmannPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [128, 64],
"hidden_layers_activation": "sigmoid",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 400,
"epi_change_lr": 600
"exploration_anneal_episodes": 200,
"epi_change_lr": 500,
"max_mem_len": 20000
},
"param_range": {
"lr": [0.001, 0.01],
"lr": [0.01, 0.02, 0.03, 0.05],
"gamma": [0.99, 0.999],
"hidden_layers": [
[400],
[800],
[400, 200]
]
}
Expand Down
2 changes: 1 addition & 1 deletion rl/spec/problems.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"RENDER": true,
"GYM_ENV_NAME": "MountainCar-v0",
"SOLVED_MEAN_REWARD": -110.0,
"MAX_EPISODES": 2000,
"MAX_EPISODES": 1000,
"REWARD_MEAN_LEN": 100
},
"MountainCarContinuous-v0": {
Expand Down

0 comments on commit 5fb91d3

Please sign in to comment.