diff --git a/rl/spec/atari_experiment_specs.json b/rl/spec/atari_experiment_specs.json index ad2b41c..f7ad52c 100644 --- a/rl/spec/atari_experiment_specs.json +++ b/rl/spec/atari_experiment_specs.json @@ -1,6 +1,6 @@ { "dev_conv_dqn": { - "problem": "Dev-Breakout-v0", + "problem": "DevBreakout-v0", "Agent": "ConvDQN", "HyperOptimizer": "GridSearch", "Memory": "LinearMemoryWithForgetting", diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json index 7391fbe..37fdedc 100644 --- a/rl/spec/box2d_experiment_specs.json +++ b/rl/spec/box2d_experiment_specs.json @@ -156,7 +156,7 @@ }, "lunar_freeze": { "problem": "LunarLander-v2", - "Agent": "DQNFreeze", + "Agent": "FreezeDQN", "HyperOptimizer": "GridSearch", "Memory": "LinearMemoryWithForgetting", "Optimizer": "AdamOptimizer", @@ -241,77 +241,5 @@ [400, 200, 100] ] } - }, - "bipedal": { - "problem": "BipedalWalker-v2", - "Agent": "DQN", - "HyperOptimizer": "RandomSearch", - "Memory": "LinearMemoryWithForgetting", - "Optimizer": "AdamOptimizer", - "Policy": "EpsilonGreedyPolicy", - "PreProcessor": "NoPreProcessor", - "param": { - "max_evals": 50, - "train_per_n_new_exp": 5, - "batch_size": 32, - "lr": 0.001, - "gamma": 0.99, - "hidden_layers": [300, 150, 75], - "hidden_layers_activation": "relu", - "output_layer_activation": "linear", - "exploration_anneal_episodes": 4000, - "epi_change_lr": 5000 - }, - "param_range": { - "lr": { - "min": 0.0001, - "max": 0.005 - }, - "gamma": { - "min": 0.97, - "max": 0.999 - }, - "hidden_layers": [ - [800, 400], - [400, 200, 100], - [400, 200, 100, 50] - ] - } - }, - "bipedal_hardcore": { - "problem": "BipedalWalkerHardcore-v2", - "Agent": "DQN", - "HyperOptimizer": "RandomSearch", - "Memory": "LinearMemoryWithForgetting", - "Optimizer": "AdamOptimizer", - "Policy": "EpsilonGreedyPolicy", - "PreProcessor": "NoPreProcessor", - "param": { - "max_evals": 50, - "train_per_n_new_exp": 5, - "batch_size": 32, - "lr": 0.001, - "gamma": 0.99, - "hidden_layers": [300, 150, 75], - "hidden_layers_activation": "relu", - "output_layer_activation": "linear", - "exploration_anneal_episodes": 4000, - "epi_change_lr": 5000 - }, - "param_range": { - "lr": { - "min": 0.0001, - "max": 0.005 - }, - "gamma": { - "min": 0.97, - "max": 0.999 - }, - "hidden_layers": [ - [800, 400], - [400, 200, 100], - [400, 200, 100, 50] - ] - } } } diff --git a/rl/spec/component_locks.json b/rl/spec/component_locks.json new file mode 100644 index 0000000..c657604 --- /dev/null +++ b/rl/spec/component_locks.json @@ -0,0 +1,61 @@ +{ + "double_network": { + "type": "mutex", + "details": "double_network agents need policies that invokes both networks properly", + "head": "Agent", + "Agent": [ + "DoubleConvDQN", + "DoubleDQN" + ], + "Policy": [ + "DoubleDQNBoltzmannPolicy", + "DoubleDQNEpsilonGreedyPolicy" + ] + }, + "discrete_action": { + "type": "subset", + "details": "discrete components cannot work in continuous action space", + "head": "problem", + "problem": [ + "Acrobot-v1", + "AirRaid-v0", + "Alien-v0", + "Assault-v0", + "Breakout-v0", + "CartPole-v0", + "CartPole-v1", + "DevBreakout-v0", + "DevCartPole-v0", + "FlappyBird-v0", + "LunarLander-v2", + "MountainCar-v0", + "MsPacman-v0", + "Pong-v0", + "Qbert-v0", + "Snake-v0", + "SpaceInvader-v0", + "TestPassCartPole-v0" + ], + "Agent": [ + "ConvDQN", + "DeepExpectedSarsa", + "DeepSarsa", + "DoubleConvDQN", + "DoubleDQN", + "DQN", + "Dummy", + "FreezeDQN", + "OffPolicySarsa", + "QTable" + ], + "Policy": [ + "BoltzmannPolicy", + "DecayingEpsilonGreedyPolicy", + "DoubleDQNBoltzmannPolicy", + "DoubleDQNEpsilonGreedyPolicy", + "EpsilonGreedyPolicy", + "OscillatingEpsilonGreedyPolicy", + "TargetedEpsilonGreedyPolicy" + ] + } +} diff --git a/rl/spec/dev_experiment_specs.json b/rl/spec/dev_experiment_specs.json index 5067c85..be835e9 100644 --- a/rl/spec/dev_experiment_specs.json +++ b/rl/spec/dev_experiment_specs.json @@ -114,7 +114,7 @@ "first_hidden_layer_size": 512, "e": 0.01, "alpha": 0.6, - "max_mem_len" : 7 + "max_mem_len": 7 }, "param_range": { "gamma": [0.97, 0.99], diff --git a/rl/spec/problems.json b/rl/spec/problems.json index a69da8d..40afb3f 100644 --- a/rl/spec/problems.json +++ b/rl/spec/problems.json @@ -95,7 +95,7 @@ "MAX_EPISODES": 5000, "REWARD_MEAN_LEN": 100 }, - "Dev-Breakout-v0": { + "DevBreakout-v0": { "GYM_ENV_NAME": "Breakout-v0", "SOLVED_MEAN_REWARD": null, "MAX_EPISODES": 1, diff --git a/rl/util.py b/rl/util.py index b6ea978..422df42 100644 --- a/rl/util.py +++ b/rl/util.py @@ -14,6 +14,70 @@ PARALLEL_PROCESS_NUM = mp.cpu_count() TIMESTAMP_REGEX = r'(\d{4}_\d{2}_\d{2}_\d{6})' SPEC_PATH = path.join(path.dirname(__file__), 'spec') +COMPONENT_LOCKS = json.loads( + open(path.join(SPEC_PATH, 'component_locks.json')).read()) +LOCK_HEAD_REST_SIG = { + # signature list of [head, rest] in component lock + 'mutex': [[0, 0], [1, 1]], + 'subset': [[0, 0], [1, 0], [1, 1]], +} + + +def check_equal(iterator): + '''check if list contains all the same elements''' + iterator = iter(iterator) + try: + first = next(iterator) + except StopIteration: + return True + return all(first == rest for rest in iterator) + + +def check_lock(lock_name, lock, experiment_spec): + ''' + refer to rl/spec/component_locks.json + check a spec's component lock using binary signatures + e.g. head = problem (discrete) + rest = [Agent, Policy] (to be discrete too) + first check if rest all has the same signature, i.e. same set + then check pair [bin_head, bin_rest] in valid_lock_sig_list + as specified by the lock's type + ''' + lock_type = lock['type'] + valid_lock_sig_list = LOCK_HEAD_REST_SIG[lock_type] + lock_head = lock['head'] + bin_head = (experiment_spec[lock_head] in lock[lock_head]) + bin_rest_list = [] + for k, v_list in lock.items(): + if k in experiment_spec and k != lock_head: + bin_rest_list.append(experiment_spec[k] in v_list) + # rest must all have the same signature + rest_equal = check_equal(bin_rest_list) + if not rest_equal: + raise ValueError( + 'All components need to be of the same set, ' + 'check component lock "{}" and your spec "{}"'.format( + bin_rest_list, experiment_spec['experiment_name'])) + + bin_rest = bin_rest_list[0] + lock_sig = [bin_head, bin_rest] + lock_valid = lock_sig in valid_lock_sig_list + if not lock_valid: + raise ValueError( + 'Component lock violated: "{}", spec: "{}"'.format( + lock_name, experiment_spec['experiment_name'])) + return lock_valid + + +def check_component_locks(experiment_spec): + ''' + check the spec components for all locks + to ensure no lock is violated + refer to rl/spec/component_locks.json + ''' + for lock_name, lock in COMPONENT_LOCKS.items(): + check_lock(lock_name, lock, experiment_spec) + return # import and safeguard the PROBLEMS, EXPERIMENT_SPECS with checks @@ -42,6 +106,7 @@ def import_guard_asset(): assert all(k in spec for k in REQUIRED_SPEC_KEYS), \ '{} needs all REQUIRED_SPEC_KEYS'.format(experiment_name) EXPERIMENT_SPECS[experiment_name]['experiment_name'] = experiment_name + check_component_locks(spec) # check component_locks.json if 'param_range' not in EXPERIMENT_SPECS[experiment_name]: continue