Merge pull request #120 from kengz/component-lock

Component lock
kengz · Apr 10, 2017 · aadb4c4 · aadb4c4
2 parents ca0a3cf + eeb1da6
commit aadb4c4
Show file tree

Hide file tree

Showing 6 changed files with 130 additions and 76 deletions.
diff --git a/rl/spec/atari_experiment_specs.json b/rl/spec/atari_experiment_specs.json
@@ -1,6 +1,6 @@
 {
   "dev_conv_dqn": {
-    "problem": "Dev-Breakout-v0",
+    "problem": "DevBreakout-v0",
     "Agent": "ConvDQN",
     "HyperOptimizer": "GridSearch",
     "Memory": "LinearMemoryWithForgetting",

diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json
@@ -156,7 +156,7 @@
   },
   "lunar_freeze": {
     "problem": "LunarLander-v2",
-    "Agent": "DQNFreeze",
+    "Agent": "FreezeDQN",
     "HyperOptimizer": "GridSearch",
     "Memory": "LinearMemoryWithForgetting",
     "Optimizer": "AdamOptimizer",
@@ -241,77 +241,5 @@
         [400, 200, 100]
       ]
     }
-  },
-  "bipedal": {
-    "problem": "BipedalWalker-v2",
-    "Agent": "DQN",
-    "HyperOptimizer": "RandomSearch",
-    "Memory": "LinearMemoryWithForgetting",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "EpsilonGreedyPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "max_evals": 50,
-      "train_per_n_new_exp": 5,
-      "batch_size": 32,
-      "lr": 0.001,
-      "gamma": 0.99,
-      "hidden_layers": [300, 150, 75],
-      "hidden_layers_activation": "relu",
-      "output_layer_activation": "linear",
-      "exploration_anneal_episodes": 4000,
-      "epi_change_lr": 5000
-    },
-    "param_range": {
-      "lr": {
-        "min": 0.0001,
-        "max": 0.005
-      },
-      "gamma": {
-        "min": 0.97,
-        "max": 0.999
-      },
-      "hidden_layers": [
-        [800, 400],
-        [400, 200, 100],
-        [400, 200, 100, 50]
-      ]
-    }
-  },
-  "bipedal_hardcore": {
-    "problem": "BipedalWalkerHardcore-v2",
-    "Agent": "DQN",
-    "HyperOptimizer": "RandomSearch",
-    "Memory": "LinearMemoryWithForgetting",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "EpsilonGreedyPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "max_evals": 50,
-      "train_per_n_new_exp": 5,
-      "batch_size": 32,
-      "lr": 0.001,
-      "gamma": 0.99,
-      "hidden_layers": [300, 150, 75],
-      "hidden_layers_activation": "relu",
-      "output_layer_activation": "linear",
-      "exploration_anneal_episodes": 4000,
-      "epi_change_lr": 5000
-    },
-    "param_range": {
-      "lr": {
-        "min": 0.0001,
-        "max": 0.005
-      },
-      "gamma": {
-        "min": 0.97,
-        "max": 0.999
-      },
-      "hidden_layers": [
-        [800, 400],
-        [400, 200, 100],
-        [400, 200, 100, 50]
-      ]
-    }
   }
 }
diff --git a/rl/spec/component_locks.json b/rl/spec/component_locks.json
@@ -0,0 +1,61 @@
+{
+  "double_network": {
+    "type": "mutex",
+    "details": "double_network agents need policies that invokes both networks properly",
+    "head": "Agent",
+    "Agent": [
+      "DoubleConvDQN",
+      "DoubleDQN"
+    ],
+    "Policy": [
+      "DoubleDQNBoltzmannPolicy",
+      "DoubleDQNEpsilonGreedyPolicy"
+    ]
+  },
+  "discrete_action": {
+    "type": "subset",
+    "details": "discrete components cannot work in continuous action space",
+    "head": "problem",
+    "problem": [
+      "Acrobot-v1",
+      "AirRaid-v0",
+      "Alien-v0",
+      "Assault-v0",
+      "Breakout-v0",
+      "CartPole-v0",
+      "CartPole-v1",
+      "DevBreakout-v0",
+      "DevCartPole-v0",
+      "FlappyBird-v0",
+      "LunarLander-v2",
+      "MountainCar-v0",
+      "MsPacman-v0",
+      "Pong-v0",
+      "Qbert-v0",
+      "Snake-v0",
+      "SpaceInvader-v0",
+      "TestPassCartPole-v0"
+    ],
+    "Agent": [
+      "ConvDQN",
+      "DeepExpectedSarsa",
+      "DeepSarsa",
+      "DoubleConvDQN",
+      "DoubleDQN",
+      "DQN",
+      "Dummy",
+      "FreezeDQN",
+      "OffPolicySarsa",
+      "QTable"
+    ],
+    "Policy": [
+      "BoltzmannPolicy",
+      "DecayingEpsilonGreedyPolicy",
+      "DoubleDQNBoltzmannPolicy",
+      "DoubleDQNEpsilonGreedyPolicy",
+      "EpsilonGreedyPolicy",
+      "OscillatingEpsilonGreedyPolicy",
+      "TargetedEpsilonGreedyPolicy"
+    ]
+  }
+}
diff --git a/rl/spec/dev_experiment_specs.json b/rl/spec/dev_experiment_specs.json
@@ -114,7 +114,7 @@
       "first_hidden_layer_size": 512,
       "e": 0.01,
       "alpha": 0.6,
-      "max_mem_len" : 7
+      "max_mem_len": 7
     },
     "param_range": {
       "gamma": [0.97, 0.99],

diff --git a/rl/spec/problems.json b/rl/spec/problems.json
@@ -95,7 +95,7 @@
     "MAX_EPISODES": 5000,
     "REWARD_MEAN_LEN": 100
   },
-  "Dev-Breakout-v0": {
+  "DevBreakout-v0": {
     "GYM_ENV_NAME": "Breakout-v0",
     "SOLVED_MEAN_REWARD": null,
     "MAX_EPISODES": 1,

diff --git a/rl/util.py b/rl/util.py
@@ -14,6 +14,70 @@
 PARALLEL_PROCESS_NUM = mp.cpu_count()
 TIMESTAMP_REGEX = r'(\d{4}_\d{2}_\d{2}_\d{6})'
 SPEC_PATH = path.join(path.dirname(__file__), 'spec')
+COMPONENT_LOCKS = json.loads(
+    open(path.join(SPEC_PATH, 'component_locks.json')).read())
+LOCK_HEAD_REST_SIG = {
+    # signature list of [head, rest] in component lock
+    'mutex': [[0, 0], [1, 1]],
+    'subset': [[0, 0], [1, 0], [1, 1]],
+}
+
+
+def check_equal(iterator):
+    '''check if list contains all the same elements'''
+    iterator = iter(iterator)
+    try:
+        first = next(iterator)
+    except StopIteration:
+        return True
+    return all(first == rest for rest in iterator)
+
+
+def check_lock(lock_name, lock, experiment_spec):
+    '''
+    refer to rl/spec/component_locks.json
+    check a spec's component lock using binary signatures
+    e.g. head = problem (discrete)
+    rest = [Agent, Policy] (to be discrete too)
+    first check if rest all has the same signature, i.e. same set
+    then check pair [bin_head, bin_rest] in valid_lock_sig_list
+    as specified by the lock's type
+    '''
+    lock_type = lock['type']
+    valid_lock_sig_list = LOCK_HEAD_REST_SIG[lock_type]
+    lock_head = lock['head']
+    bin_head = (experiment_spec[lock_head] in lock[lock_head])
+    bin_rest_list = []
+    for k, v_list in lock.items():
+        if k in experiment_spec and k != lock_head:
+            bin_rest_list.append(experiment_spec[k] in v_list)
+    # rest must all have the same signature
+    rest_equal = check_equal(bin_rest_list)
+    if not rest_equal:
+        raise ValueError(
+            'All components need to be of the same set, '
+            'check component lock "{}" and your spec "{}"'.format(
+                bin_rest_list, experiment_spec['experiment_name']))
+
+    bin_rest = bin_rest_list[0]
+    lock_sig = [bin_head, bin_rest]
+    lock_valid = lock_sig in valid_lock_sig_list
+    if not lock_valid:
+        raise ValueError(
+            'Component lock violated: "{}", spec: "{}"'.format(
+                lock_name, experiment_spec['experiment_name']))
+    return lock_valid
+
+
+def check_component_locks(experiment_spec):
+    '''
+    check the spec components for all locks
+    to ensure no lock is violated
+    refer to rl/spec/component_locks.json
+    '''
+    for lock_name, lock in COMPONENT_LOCKS.items():
+        check_lock(lock_name, lock, experiment_spec)
+    return
 
 
 # import and safeguard the PROBLEMS, EXPERIMENT_SPECS with checks
@@ -42,6 +106,7 @@ def import_guard_asset():
         assert all(k in spec for k in REQUIRED_SPEC_KEYS), \
             '{} needs all REQUIRED_SPEC_KEYS'.format(experiment_name)
         EXPERIMENT_SPECS[experiment_name]['experiment_name'] = experiment_name
+        check_component_locks(spec)  # check component_locks.json
         if 'param_range' not in EXPERIMENT_SPECS[experiment_name]:
             continue