Skip to content

Commit

Permalink
fix all specs to pass component locks; debug
Browse files Browse the repository at this point in the history
  • Loading branch information
kengz committed Apr 10, 2017
1 parent a4c6657 commit eeb1da6
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 89 deletions.
2 changes: 1 addition & 1 deletion rl/spec/atari_experiment_specs.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"dev_conv_dqn": {
"problem": "Dev-Breakout-v0",
"problem": "DevBreakout-v0",
"Agent": "ConvDQN",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
Expand Down
74 changes: 1 addition & 73 deletions rl/spec/box2d_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
},
"lunar_freeze": {
"problem": "LunarLander-v2",
"Agent": "DQNFreeze",
"Agent": "FreezeDQN",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
Expand Down Expand Up @@ -241,77 +241,5 @@
[400, 200, 100]
]
}
},
"bipedal": {
"problem": "BipedalWalker-v2",
"Agent": "DQN",
"HyperOptimizer": "RandomSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "EpsilonGreedyPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"max_evals": 50,
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [300, 150, 75],
"hidden_layers_activation": "relu",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 4000,
"epi_change_lr": 5000
},
"param_range": {
"lr": {
"min": 0.0001,
"max": 0.005
},
"gamma": {
"min": 0.97,
"max": 0.999
},
"hidden_layers": [
[800, 400],
[400, 200, 100],
[400, 200, 100, 50]
]
}
},
"bipedal_hardcore": {
"problem": "BipedalWalkerHardcore-v2",
"Agent": "DQN",
"HyperOptimizer": "RandomSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "EpsilonGreedyPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"max_evals": 50,
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [300, 150, 75],
"hidden_layers_activation": "relu",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 4000,
"epi_change_lr": 5000
},
"param_range": {
"lr": {
"min": 0.0001,
"max": 0.005
},
"gamma": {
"min": 0.97,
"max": 0.999
},
"hidden_layers": [
[800, 400],
[400, 200, 100],
[400, 200, 100, 50]
]
}
}
}
24 changes: 14 additions & 10 deletions rl/spec/component_locks.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,37 @@
"details": "double_network agents need policies that invokes both networks properly",
"head": "Agent",
"Agent": [
"DoubleDQN",
"DoubleConvDQN"
"DoubleConvDQN",
"DoubleDQN"
],
"Policy": [
"DoubleDQNEpsilonGreedyPolicy",
"DoubleDQNBoltzmannPolicy"
"DoubleDQNBoltzmannPolicy",
"DoubleDQNEpsilonGreedyPolicy"
]
},
"discrete_action": {
"type": "subset",
"details": "discrete components cannot work in continuous action space",
"head": "problem",
"problem": [
"CartPole-v0",
"CartPole-v1",
"Acrobot-v1",
"MountainCar-v0",
"LunarLander-v2",
"AirRaid-v0",
"Alien-v0",
"Assault-v0",
"Breakout-v0",
"CartPole-v0",
"CartPole-v1",
"DevBreakout-v0",
"DevCartPole-v0",
"FlappyBird-v0",
"LunarLander-v2",
"MountainCar-v0",
"MsPacman-v0",
"Pong-v0",
"Qbert-v0",
"Snake-v0",
"SpaceInvader-v0",
"FlappyBird-v0",
"Snake-v0"
"TestPassCartPole-v0"
],
"Agent": [
"ConvDQN",
Expand All @@ -40,6 +43,7 @@
"DoubleConvDQN",
"DoubleDQN",
"DQN",
"Dummy",
"FreezeDQN",
"OffPolicySarsa",
"QTable"
Expand Down
2 changes: 1 addition & 1 deletion rl/spec/dev_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
"first_hidden_layer_size": 512,
"e": 0.01,
"alpha": 0.6,
"max_mem_len" : 7
"max_mem_len": 7
},
"param_range": {
"gamma": [0.97, 0.99],
Expand Down
2 changes: 1 addition & 1 deletion rl/spec/problems.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"MAX_EPISODES": 5000,
"REWARD_MEAN_LEN": 100
},
"Dev-Breakout-v0": {
"DevBreakout-v0": {
"GYM_ENV_NAME": "Breakout-v0",
"SOLVED_MEAN_REWARD": null,
"MAX_EPISODES": 1,
Expand Down
6 changes: 3 additions & 3 deletions rl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
LOCK_HEAD_REST_SIG = {
# signature list of [head, rest] in component lock
'mutex': [[0, 0], [1, 1]],
'subset': [[0, 0], [0, 1], [1, 1]],
'subset': [[0, 0], [1, 0], [1, 1]],
}


Expand Down Expand Up @@ -49,15 +49,15 @@ def check_lock(lock_name, lock, experiment_spec):
bin_head = (experiment_spec[lock_head] in lock[lock_head])
bin_rest_list = []
for k, v_list in lock.items():
if k in experiment_spec:
if k in experiment_spec and k != lock_head:
bin_rest_list.append(experiment_spec[k] in v_list)
# rest must all have the same signature
rest_equal = check_equal(bin_rest_list)
if not rest_equal:
raise ValueError(
'All components need to be of the same set, '
'check component lock "{}" and your spec "{}"'.format(
lock_name, experiment_spec['experiment_name']))
bin_rest_list, experiment_spec['experiment_name']))

bin_rest = bin_rest_list[0]
lock_sig = [bin_head, bin_rest]
Expand Down

0 comments on commit eeb1da6

Please sign in to comment.