fix all specs to pass component locks; debug

kengz · Apr 10, 2017 · eeb1da6 · eeb1da6
1 parent a4c6657
commit eeb1da6
Show file tree

Hide file tree

Showing 6 changed files with 21 additions and 89 deletions.
diff --git a/rl/spec/atari_experiment_specs.json b/rl/spec/atari_experiment_specs.json
@@ -1,6 +1,6 @@
 {
   "dev_conv_dqn": {
-    "problem": "Dev-Breakout-v0",
+    "problem": "DevBreakout-v0",
     "Agent": "ConvDQN",
     "HyperOptimizer": "GridSearch",
     "Memory": "LinearMemoryWithForgetting",

diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json
@@ -156,7 +156,7 @@
   },
   "lunar_freeze": {
     "problem": "LunarLander-v2",
-    "Agent": "DQNFreeze",
+    "Agent": "FreezeDQN",
     "HyperOptimizer": "GridSearch",
     "Memory": "LinearMemoryWithForgetting",
     "Optimizer": "AdamOptimizer",
@@ -241,77 +241,5 @@
         [400, 200, 100]
       ]
     }
-  },
-  "bipedal": {
-    "problem": "BipedalWalker-v2",
-    "Agent": "DQN",
-    "HyperOptimizer": "RandomSearch",
-    "Memory": "LinearMemoryWithForgetting",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "EpsilonGreedyPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "max_evals": 50,
-      "train_per_n_new_exp": 5,
-      "batch_size": 32,
-      "lr": 0.001,
-      "gamma": 0.99,
-      "hidden_layers": [300, 150, 75],
-      "hidden_layers_activation": "relu",
-      "output_layer_activation": "linear",
-      "exploration_anneal_episodes": 4000,
-      "epi_change_lr": 5000
-    },
-    "param_range": {
-      "lr": {
-        "min": 0.0001,
-        "max": 0.005
-      },
-      "gamma": {
-        "min": 0.97,
-        "max": 0.999
-      },
-      "hidden_layers": [
-        [800, 400],
-        [400, 200, 100],
-        [400, 200, 100, 50]
-      ]
-    }
-  },
-  "bipedal_hardcore": {
-    "problem": "BipedalWalkerHardcore-v2",
-    "Agent": "DQN",
-    "HyperOptimizer": "RandomSearch",
-    "Memory": "LinearMemoryWithForgetting",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "EpsilonGreedyPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "max_evals": 50,
-      "train_per_n_new_exp": 5,
-      "batch_size": 32,
-      "lr": 0.001,
-      "gamma": 0.99,
-      "hidden_layers": [300, 150, 75],
-      "hidden_layers_activation": "relu",
-      "output_layer_activation": "linear",
-      "exploration_anneal_episodes": 4000,
-      "epi_change_lr": 5000
-    },
-    "param_range": {
-      "lr": {
-        "min": 0.0001,
-        "max": 0.005
-      },
-      "gamma": {
-        "min": 0.97,
-        "max": 0.999
-      },
-      "hidden_layers": [
-        [800, 400],
-        [400, 200, 100],
-        [400, 200, 100, 50]
-      ]
-    }
   }
 }
diff --git a/rl/spec/component_locks.json b/rl/spec/component_locks.json
@@ -4,34 +4,37 @@
     "details": "double_network agents need policies that invokes both networks properly",
     "head": "Agent",
     "Agent": [
-      "DoubleDQN",
-      "DoubleConvDQN"
+      "DoubleConvDQN",
+      "DoubleDQN"
     ],
     "Policy": [
-      "DoubleDQNEpsilonGreedyPolicy",
-      "DoubleDQNBoltzmannPolicy"
+      "DoubleDQNBoltzmannPolicy",
+      "DoubleDQNEpsilonGreedyPolicy"
     ]
   },
   "discrete_action": {
     "type": "subset",
     "details": "discrete components cannot work in continuous action space",
     "head": "problem",
     "problem": [
-      "CartPole-v0",
-      "CartPole-v1",
       "Acrobot-v1",
-      "MountainCar-v0",
-      "LunarLander-v2",
       "AirRaid-v0",
       "Alien-v0",
       "Assault-v0",
       "Breakout-v0",
+      "CartPole-v0",
+      "CartPole-v1",
+      "DevBreakout-v0",
+      "DevCartPole-v0",
+      "FlappyBird-v0",
+      "LunarLander-v2",
+      "MountainCar-v0",
       "MsPacman-v0",
       "Pong-v0",
       "Qbert-v0",
+      "Snake-v0",
       "SpaceInvader-v0",
-      "FlappyBird-v0",
-      "Snake-v0"
+      "TestPassCartPole-v0"
     ],
     "Agent": [
       "ConvDQN",
@@ -40,6 +43,7 @@
       "DoubleConvDQN",
       "DoubleDQN",
       "DQN",
+      "Dummy",
       "FreezeDQN",
       "OffPolicySarsa",
       "QTable"

diff --git a/rl/spec/dev_experiment_specs.json b/rl/spec/dev_experiment_specs.json
@@ -114,7 +114,7 @@
       "first_hidden_layer_size": 512,
       "e": 0.01,
       "alpha": 0.6,
-      "max_mem_len" : 7
+      "max_mem_len": 7
     },
     "param_range": {
       "gamma": [0.97, 0.99],

diff --git a/rl/spec/problems.json b/rl/spec/problems.json
@@ -95,7 +95,7 @@
     "MAX_EPISODES": 5000,
     "REWARD_MEAN_LEN": 100
   },
-  "Dev-Breakout-v0": {
+  "DevBreakout-v0": {
     "GYM_ENV_NAME": "Breakout-v0",
     "SOLVED_MEAN_REWARD": null,
     "MAX_EPISODES": 1,

diff --git a/rl/util.py b/rl/util.py
@@ -19,7 +19,7 @@
 LOCK_HEAD_REST_SIG = {
     # signature list of [head, rest] in component lock
     'mutex': [[0, 0], [1, 1]],
-    'subset': [[0, 0], [0, 1], [1, 1]],
+    'subset': [[0, 0], [1, 0], [1, 1]],
 }
 
 
@@ -49,15 +49,15 @@ def check_lock(lock_name, lock, experiment_spec):
     bin_head = (experiment_spec[lock_head] in lock[lock_head])
     bin_rest_list = []
     for k, v_list in lock.items():
-        if k in experiment_spec:
+        if k in experiment_spec and k != lock_head:
             bin_rest_list.append(experiment_spec[k] in v_list)
     # rest must all have the same signature
     rest_equal = check_equal(bin_rest_list)
     if not rest_equal:
         raise ValueError(
             'All components need to be of the same set, '
             'check component lock "{}" and your spec "{}"'.format(
-                lock_name, experiment_spec['experiment_name']))
+                bin_rest_list, experiment_spec['experiment_name']))
 
     bin_rest = bin_rest_list[0]
     lock_sig = [bin_head, bin_rest]