Skip to content

Commit

Permalink
Merge pull request #120 from kengz/component-lock
Browse files Browse the repository at this point in the history
Component lock
  • Loading branch information
kengz authored Apr 10, 2017
2 parents ca0a3cf + eeb1da6 commit aadb4c4
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 76 deletions.
2 changes: 1 addition & 1 deletion rl/spec/atari_experiment_specs.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"dev_conv_dqn": {
"problem": "Dev-Breakout-v0",
"problem": "DevBreakout-v0",
"Agent": "ConvDQN",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
Expand Down
74 changes: 1 addition & 73 deletions rl/spec/box2d_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
},
"lunar_freeze": {
"problem": "LunarLander-v2",
"Agent": "DQNFreeze",
"Agent": "FreezeDQN",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
Expand Down Expand Up @@ -241,77 +241,5 @@
[400, 200, 100]
]
}
},
"bipedal": {
"problem": "BipedalWalker-v2",
"Agent": "DQN",
"HyperOptimizer": "RandomSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "EpsilonGreedyPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"max_evals": 50,
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [300, 150, 75],
"hidden_layers_activation": "relu",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 4000,
"epi_change_lr": 5000
},
"param_range": {
"lr": {
"min": 0.0001,
"max": 0.005
},
"gamma": {
"min": 0.97,
"max": 0.999
},
"hidden_layers": [
[800, 400],
[400, 200, 100],
[400, 200, 100, 50]
]
}
},
"bipedal_hardcore": {
"problem": "BipedalWalkerHardcore-v2",
"Agent": "DQN",
"HyperOptimizer": "RandomSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "EpsilonGreedyPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"max_evals": 50,
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [300, 150, 75],
"hidden_layers_activation": "relu",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 4000,
"epi_change_lr": 5000
},
"param_range": {
"lr": {
"min": 0.0001,
"max": 0.005
},
"gamma": {
"min": 0.97,
"max": 0.999
},
"hidden_layers": [
[800, 400],
[400, 200, 100],
[400, 200, 100, 50]
]
}
}
}
61 changes: 61 additions & 0 deletions rl/spec/component_locks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"double_network": {
"type": "mutex",
"details": "double_network agents need policies that invokes both networks properly",
"head": "Agent",
"Agent": [
"DoubleConvDQN",
"DoubleDQN"
],
"Policy": [
"DoubleDQNBoltzmannPolicy",
"DoubleDQNEpsilonGreedyPolicy"
]
},
"discrete_action": {
"type": "subset",
"details": "discrete components cannot work in continuous action space",
"head": "problem",
"problem": [
"Acrobot-v1",
"AirRaid-v0",
"Alien-v0",
"Assault-v0",
"Breakout-v0",
"CartPole-v0",
"CartPole-v1",
"DevBreakout-v0",
"DevCartPole-v0",
"FlappyBird-v0",
"LunarLander-v2",
"MountainCar-v0",
"MsPacman-v0",
"Pong-v0",
"Qbert-v0",
"Snake-v0",
"SpaceInvader-v0",
"TestPassCartPole-v0"
],
"Agent": [
"ConvDQN",
"DeepExpectedSarsa",
"DeepSarsa",
"DoubleConvDQN",
"DoubleDQN",
"DQN",
"Dummy",
"FreezeDQN",
"OffPolicySarsa",
"QTable"
],
"Policy": [
"BoltzmannPolicy",
"DecayingEpsilonGreedyPolicy",
"DoubleDQNBoltzmannPolicy",
"DoubleDQNEpsilonGreedyPolicy",
"EpsilonGreedyPolicy",
"OscillatingEpsilonGreedyPolicy",
"TargetedEpsilonGreedyPolicy"
]
}
}
2 changes: 1 addition & 1 deletion rl/spec/dev_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
"first_hidden_layer_size": 512,
"e": 0.01,
"alpha": 0.6,
"max_mem_len" : 7
"max_mem_len": 7
},
"param_range": {
"gamma": [0.97, 0.99],
Expand Down
2 changes: 1 addition & 1 deletion rl/spec/problems.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"MAX_EPISODES": 5000,
"REWARD_MEAN_LEN": 100
},
"Dev-Breakout-v0": {
"DevBreakout-v0": {
"GYM_ENV_NAME": "Breakout-v0",
"SOLVED_MEAN_REWARD": null,
"MAX_EPISODES": 1,
Expand Down
65 changes: 65 additions & 0 deletions rl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,70 @@
PARALLEL_PROCESS_NUM = mp.cpu_count()
TIMESTAMP_REGEX = r'(\d{4}_\d{2}_\d{2}_\d{6})'
SPEC_PATH = path.join(path.dirname(__file__), 'spec')
COMPONENT_LOCKS = json.loads(
open(path.join(SPEC_PATH, 'component_locks.json')).read())
LOCK_HEAD_REST_SIG = {
# signature list of [head, rest] in component lock
'mutex': [[0, 0], [1, 1]],
'subset': [[0, 0], [1, 0], [1, 1]],
}


def check_equal(iterator):
'''check if list contains all the same elements'''
iterator = iter(iterator)
try:
first = next(iterator)
except StopIteration:
return True
return all(first == rest for rest in iterator)


def check_lock(lock_name, lock, experiment_spec):
'''
refer to rl/spec/component_locks.json
check a spec's component lock using binary signatures
e.g. head = problem (discrete)
rest = [Agent, Policy] (to be discrete too)
first check if rest all has the same signature, i.e. same set
then check pair [bin_head, bin_rest] in valid_lock_sig_list
as specified by the lock's type
'''
lock_type = lock['type']
valid_lock_sig_list = LOCK_HEAD_REST_SIG[lock_type]
lock_head = lock['head']
bin_head = (experiment_spec[lock_head] in lock[lock_head])
bin_rest_list = []
for k, v_list in lock.items():
if k in experiment_spec and k != lock_head:
bin_rest_list.append(experiment_spec[k] in v_list)
# rest must all have the same signature
rest_equal = check_equal(bin_rest_list)
if not rest_equal:
raise ValueError(
'All components need to be of the same set, '
'check component lock "{}" and your spec "{}"'.format(
bin_rest_list, experiment_spec['experiment_name']))

bin_rest = bin_rest_list[0]
lock_sig = [bin_head, bin_rest]
lock_valid = lock_sig in valid_lock_sig_list
if not lock_valid:
raise ValueError(
'Component lock violated: "{}", spec: "{}"'.format(
lock_name, experiment_spec['experiment_name']))
return lock_valid


def check_component_locks(experiment_spec):
'''
check the spec components for all locks
to ensure no lock is violated
refer to rl/spec/component_locks.json
'''
for lock_name, lock in COMPONENT_LOCKS.items():
check_lock(lock_name, lock, experiment_spec)
return


# import and safeguard the PROBLEMS, EXPERIMENT_SPECS with checks
Expand Down Expand Up @@ -42,6 +106,7 @@ def import_guard_asset():
assert all(k in spec for k in REQUIRED_SPEC_KEYS), \
'{} needs all REQUIRED_SPEC_KEYS'.format(experiment_name)
EXPERIMENT_SPECS[experiment_name]['experiment_name'] = experiment_name
check_component_locks(spec) # check component_locks.json
if 'param_range' not in EXPERIMENT_SPECS[experiment_name]:
continue

Expand Down

0 comments on commit aadb4c4

Please sign in to comment.