Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Component lock #120

Merged
merged 2 commits into from
Apr 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rl/spec/atari_experiment_specs.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"dev_conv_dqn": {
"problem": "Dev-Breakout-v0",
"problem": "DevBreakout-v0",
"Agent": "ConvDQN",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
Expand Down
74 changes: 1 addition & 73 deletions rl/spec/box2d_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
},
"lunar_freeze": {
"problem": "LunarLander-v2",
"Agent": "DQNFreeze",
"Agent": "FreezeDQN",
"HyperOptimizer": "GridSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
Expand Down Expand Up @@ -241,77 +241,5 @@
[400, 200, 100]
]
}
},
"bipedal": {
"problem": "BipedalWalker-v2",
"Agent": "DQN",
"HyperOptimizer": "RandomSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "EpsilonGreedyPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"max_evals": 50,
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [300, 150, 75],
"hidden_layers_activation": "relu",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 4000,
"epi_change_lr": 5000
},
"param_range": {
"lr": {
"min": 0.0001,
"max": 0.005
},
"gamma": {
"min": 0.97,
"max": 0.999
},
"hidden_layers": [
[800, 400],
[400, 200, 100],
[400, 200, 100, 50]
]
}
},
"bipedal_hardcore": {
"problem": "BipedalWalkerHardcore-v2",
"Agent": "DQN",
"HyperOptimizer": "RandomSearch",
"Memory": "LinearMemoryWithForgetting",
"Optimizer": "AdamOptimizer",
"Policy": "EpsilonGreedyPolicy",
"PreProcessor": "NoPreProcessor",
"param": {
"max_evals": 50,
"train_per_n_new_exp": 5,
"batch_size": 32,
"lr": 0.001,
"gamma": 0.99,
"hidden_layers": [300, 150, 75],
"hidden_layers_activation": "relu",
"output_layer_activation": "linear",
"exploration_anneal_episodes": 4000,
"epi_change_lr": 5000
},
"param_range": {
"lr": {
"min": 0.0001,
"max": 0.005
},
"gamma": {
"min": 0.97,
"max": 0.999
},
"hidden_layers": [
[800, 400],
[400, 200, 100],
[400, 200, 100, 50]
]
}
}
}
61 changes: 61 additions & 0 deletions rl/spec/component_locks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"double_network": {
"type": "mutex",
"details": "double_network agents need policies that invokes both networks properly",
"head": "Agent",
"Agent": [
"DoubleConvDQN",
"DoubleDQN"
],
"Policy": [
"DoubleDQNBoltzmannPolicy",
"DoubleDQNEpsilonGreedyPolicy"
]
},
"discrete_action": {
"type": "subset",
"details": "discrete components cannot work in continuous action space",
"head": "problem",
"problem": [
"Acrobot-v1",
"AirRaid-v0",
"Alien-v0",
"Assault-v0",
"Breakout-v0",
"CartPole-v0",
"CartPole-v1",
"DevBreakout-v0",
"DevCartPole-v0",
"FlappyBird-v0",
"LunarLander-v2",
"MountainCar-v0",
"MsPacman-v0",
"Pong-v0",
"Qbert-v0",
"Snake-v0",
"SpaceInvader-v0",
"TestPassCartPole-v0"
],
"Agent": [
"ConvDQN",
"DeepExpectedSarsa",
"DeepSarsa",
"DoubleConvDQN",
"DoubleDQN",
"DQN",
"Dummy",
"FreezeDQN",
"OffPolicySarsa",
"QTable"
],
"Policy": [
"BoltzmannPolicy",
"DecayingEpsilonGreedyPolicy",
"DoubleDQNBoltzmannPolicy",
"DoubleDQNEpsilonGreedyPolicy",
"EpsilonGreedyPolicy",
"OscillatingEpsilonGreedyPolicy",
"TargetedEpsilonGreedyPolicy"
]
}
}
2 changes: 1 addition & 1 deletion rl/spec/dev_experiment_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
"first_hidden_layer_size": 512,
"e": 0.01,
"alpha": 0.6,
"max_mem_len" : 7
"max_mem_len": 7
},
"param_range": {
"gamma": [0.97, 0.99],
Expand Down
2 changes: 1 addition & 1 deletion rl/spec/problems.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"MAX_EPISODES": 5000,
"REWARD_MEAN_LEN": 100
},
"Dev-Breakout-v0": {
"DevBreakout-v0": {
"GYM_ENV_NAME": "Breakout-v0",
"SOLVED_MEAN_REWARD": null,
"MAX_EPISODES": 1,
Expand Down
65 changes: 65 additions & 0 deletions rl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,70 @@
PARALLEL_PROCESS_NUM = mp.cpu_count()
TIMESTAMP_REGEX = r'(\d{4}_\d{2}_\d{2}_\d{6})'
SPEC_PATH = path.join(path.dirname(__file__), 'spec')
COMPONENT_LOCKS = json.loads(
open(path.join(SPEC_PATH, 'component_locks.json')).read())
LOCK_HEAD_REST_SIG = {
# signature list of [head, rest] in component lock
'mutex': [[0, 0], [1, 1]],
'subset': [[0, 0], [1, 0], [1, 1]],
}


def check_equal(iterator):
'''check if list contains all the same elements'''
iterator = iter(iterator)
try:
first = next(iterator)
except StopIteration:
return True
return all(first == rest for rest in iterator)


def check_lock(lock_name, lock, experiment_spec):
'''
refer to rl/spec/component_locks.json
check a spec's component lock using binary signatures
e.g. head = problem (discrete)
rest = [Agent, Policy] (to be discrete too)
first check if rest all has the same signature, i.e. same set
then check pair [bin_head, bin_rest] in valid_lock_sig_list
as specified by the lock's type
'''
lock_type = lock['type']
valid_lock_sig_list = LOCK_HEAD_REST_SIG[lock_type]
lock_head = lock['head']
bin_head = (experiment_spec[lock_head] in lock[lock_head])
bin_rest_list = []
for k, v_list in lock.items():
if k in experiment_spec and k != lock_head:
bin_rest_list.append(experiment_spec[k] in v_list)
# rest must all have the same signature
rest_equal = check_equal(bin_rest_list)
if not rest_equal:
raise ValueError(
'All components need to be of the same set, '
'check component lock "{}" and your spec "{}"'.format(
bin_rest_list, experiment_spec['experiment_name']))

bin_rest = bin_rest_list[0]
lock_sig = [bin_head, bin_rest]
lock_valid = lock_sig in valid_lock_sig_list
if not lock_valid:
raise ValueError(
'Component lock violated: "{}", spec: "{}"'.format(
lock_name, experiment_spec['experiment_name']))
return lock_valid


def check_component_locks(experiment_spec):
'''
check the spec components for all locks
to ensure no lock is violated
refer to rl/spec/component_locks.json
'''
for lock_name, lock in COMPONENT_LOCKS.items():
check_lock(lock_name, lock, experiment_spec)
return


# import and safeguard the PROBLEMS, EXPERIMENT_SPECS with checks
Expand Down Expand Up @@ -42,6 +106,7 @@ def import_guard_asset():
assert all(k in spec for k in REQUIRED_SPEC_KEYS), \
'{} needs all REQUIRED_SPEC_KEYS'.format(experiment_name)
EXPERIMENT_SPECS[experiment_name]['experiment_name'] = experiment_name
check_component_locks(spec) # check component_locks.json
if 'param_range' not in EXPERIMENT_SPECS[experiment_name]:
continue

Expand Down