Skip to content

Commit

Permalink
Merge pull request #129 from kengz/schedule
Browse files Browse the repository at this point in the history
Solution: Pendulum-v0 with DDPG LinearNoisePolicy
  • Loading branch information
kengz authored Apr 24, 2017
2 parents 14e9fcd + 649f1a7 commit 2c02f06
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 17 deletions.
2 changes: 1 addition & 1 deletion rl/memory/prioritized_exp_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,

# bump to account for negative terms in reward get_priority
# and we cannot abs(reward) cuz it's sign sensitive
SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD']
SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD'] or 10000
self.min_priority = abs(10 * SOLVED_MEAN_REWARD)

def get_priority(self, error):
Expand Down
70 changes: 66 additions & 4 deletions rl/spec/box2d_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,74 @@
"hidden_layers_activation": "sigmoid"
},
"param_range": {
"lr": [0.005, 0.01, 0.05, 0.1],
"gamma": [0.97, 0.99, 0.999],
"lr": [0.001, 0.005, 0.01],
"gamma": [0.99, 0.999],
"hidden_layers": [
[400, 200],
[400, 300],
[800, 400],
[400, 200, 100]
[800, 600]
]
}
},
"lunar_ddpg_linearnoise": {
"problem": "LunarLanderContinuous-v2",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "LinearNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 100,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh"
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005, 0.01],
"gamma": [0.97, 0.99, 0.999],
"hidden_layers": [
[400, 300],
[600, 300],
[800, 400, 200]
]
}
},
"walker_ddpg_linearnoise": {
"problem": "BipedalWalker-v2",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "LinearNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 100,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh"
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005, 0.01],
"gamma": [0.97, 0.99, 0.999],
"hidden_layers": [
[400, 300],
[600, 300],
[800, 400, 200]
]
}
}
Expand Down
211 changes: 199 additions & 12 deletions rl/spec/classic_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,195 @@
]
}
},
"pendulum_ddpg_nonoise": {
"problem": "Pendulum-v0",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "NoNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 50,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh"
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005],
"gamma": [0.95, 0.97, 0.99],
"hidden_layers": [
[400, 300],
[800, 400, 200],
[800, 600, 400, 200]
]
}
},
"pendulum_ddpg_per": {
"problem": "Pendulum-v0",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "PrioritizedExperienceReplay",
"Optimizer": "AdamOptimizer",
"Policy": "NoNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.001,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 50,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh",
"max_mem_len": 30000
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005],
"gamma": [0.95, 0.97, 0.99],
"hidden_layers": [
[400, 300],
[800, 400, 200],
[800, 600, 400, 200]
]
}
},
"pendulum_ddpg_per_linearnoise": {
"problem": "Pendulum-v0",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "PrioritizedExperienceReplay",
"Optimizer": "AdamOptimizer",
"Policy": "LinearNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 100,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh",
"max_mem_len": 30000
},
"param_range": {
"lr": [0.0001, 0.0005],
"critic_lr": [0.001, 0.005],
"gamma": [0.95, 0.97, 0.99],
"hidden_layers": [
[200, 100],
[400, 200],
[400, 300],
[800, 400]
]
}
},
"pendulum_ddpg_linearnoise": {
"problem": "Pendulum-v0",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "LinearNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.0001,
"critic_lr": 0.005,
"exploration_anneal_episodes": 50,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh"
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005],
"gamma": [0.95, 0.97, 0.99],
"hidden_layers": [
[400, 300],
[800, 400, 200],
[800, 600, 400, 200]
]
}
},
"pendulum_ddpg_ounoise": {
"problem": "Pendulum-v0",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "NoNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 50,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh"
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005],
"gamma": [0.95, 0.97, 0.99],
"hidden_layers": [
[400, 300],
[800, 400, 200],
[800, 600, 400, 200]
]
}
},
"pendulum_ddpg_gaussiannoise": {
"problem": "Pendulum-v0",
"Agent": "DDPG",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "GaussianWhiteNoisePolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 64,
"n_epoch": 1,
"tau": 0.005,
"lr": 0.001,
"critic_lr": 0.001,
"exploration_anneal_episodes": 50,
"gamma": 0.97,
"hidden_layers": [400, 300],
"hidden_layers_activation": "relu",
"output_layer_activation": "tanh"
},
"param_range": {
"lr": [0.0001, 0.0005, 0.001],
"critic_lr": [0.001, 0.005],
"gamma": [0.95, 0.97, 0.99],
"hidden_layers": [
[400, 300],
[800, 400, 200],
[800, 600, 400, 200]
]
}
},
"mountain_dqn": {
"problem": "MountainCar-v0",
"Agent": "DQN",
Expand Down Expand Up @@ -880,13 +1069,13 @@
]
}
},
"mountain_double_dqn_per": {
"mountain_dqn_per": {
"problem": "MountainCar-v0",
"Agent": "DoubleDQN",
"Agent": "DQN",
"HyperOptimizer": "GridSearch",
"Memory": "PrioritizedExperienceReplay",
"Optimizer": "AdamOptimizer",
"Policy": "DoubleDQNBoltzmannPolicy",
"Policy": "BoltzmannPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"batch_size": 32,
Expand All @@ -895,17 +1084,16 @@
"hidden_layers": [128, 64],
"hidden_layers_activation": "sigmoid",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 200,
"epi_change_lr": 500,
"max_mem_len": 20000
"exploration_anneal_episodes": 50,
"epi_change_lr": 150,
"max_mem_len": 30000
},
"param_range": {
"lr": [0.01, 0.02, 0.03, 0.05],
"lr": [0.005, 0.01, 0.02, 0.05],
"gamma": [0.99, 0.999],
"hidden_layers": [
[400],
[800],
[400, 200]
[800]
]
}
},
Expand Down Expand Up @@ -951,12 +1139,11 @@
"hidden_layers_activation": "sigmoid"
},
"param_range": {
"lr": [0.005, 0.01, 0.05, 0.1],
"lr": [0.001, 0.005, 0.01],
"gamma": [0.97, 0.99, 0.999],
"hidden_layers": [
[200],
[400, 200],
[400, 200, 100]
[400]
]
}
}
Expand Down

0 comments on commit 2c02f06

Please sign in to comment.