diff --git a/rl/agent/actor_critic.py b/rl/agent/actor_critic.py
index 945f2c9..7c3ae86 100644
--- a/rl/agent/actor_critic.py
+++ b/rl/agent/actor_critic.py
@@ -114,7 +114,11 @@ def train_critic(self, minibatch):
         actor_delta = Q_next_vals - Q_vals
         loss = self.critic.train_on_batch(minibatch['states'], Q_targets)
 
+        # update memory, needed for PER
         errors = abs(np.sum(Q_vals - Q_targets, axis=1))
+        # Q size is only 1, from critic
+        assert Q_targets.shape == (self.batch_size, 1)
+        assert errors.shape == (self.batch_size, )
         self.memory.update(errors)
         return loss, actor_delta
 
diff --git a/rl/agent/ddpg.py b/rl/agent/ddpg.py
index cb9efb8..eece44e 100644
--- a/rl/agent/ddpg.py
+++ b/rl/agent/ddpg.py
@@ -242,6 +242,7 @@ def train_an_epoch(self):
 
         # train critic
         mu_prime = self.actor.target_predict(minibatch['next_states'])
+        q_val = self.critic.target_predict(minibatch['states'], mu_prime)
         q_prime = self.critic.target_predict(
             minibatch['next_states'], mu_prime)
         # reshape for element-wise multiplication
@@ -250,6 +251,13 @@ def train_an_epoch(self):
             (1 - minibatch['terminals']) * np.reshape(q_prime, (-1))
         y = np.reshape(y, (-1, 1))
 
+        # update memory, needed for PER
+        errors = abs(np.sum(q_val - y, axis=1))
+        # Q size is only 1, from critic
+        assert y.shape == (self.batch_size, 1)
+        assert errors.shape == (self.batch_size, )
+        self.memory.update(errors)
+
         _, _, critic_loss = self.critic.train_tf(
             minibatch['states'], minibatch['actions'], y)
 
diff --git a/rl/agent/deep_sarsa.py b/rl/agent/deep_sarsa.py
index a535050..f6ddd09 100644
--- a/rl/agent/deep_sarsa.py
+++ b/rl/agent/deep_sarsa.py
@@ -1,3 +1,4 @@
+import numpy as np
 from rl.agent.dqn import DQN
 
 
@@ -30,4 +31,10 @@ def train_an_epoch(self):
         Q_targets = self.compute_Q_targets(
             minibatch, Q_states, Q_next_states_selected)
         loss = self.model.train_on_batch(minibatch['states'], Q_targets)
+
+        errors = abs(np.sum(Q_states - Q_targets, axis=1))
+        assert Q_targets.shape == (
+            self.batch_size, self.env_spec['action_dim'])
+        assert errors.shape == (self.batch_size, )
+        self.memory.update(errors)
         return loss
diff --git a/rl/agent/dqn.py b/rl/agent/dqn.py
index 26e3ae4..4810e9c 100644
--- a/rl/agent/dqn.py
+++ b/rl/agent/dqn.py
@@ -190,10 +190,12 @@ def train_an_epoch(self):
             minibatch)
         Q_targets = self.compute_Q_targets(
             minibatch, Q_states, Q_next_states_max)
-
         loss = self.model.train_on_batch(minibatch['states'], Q_targets)
 
         errors = abs(np.sum(Q_states - Q_targets, axis=1))
+        assert Q_targets.shape == (
+            self.batch_size, self.env_spec['action_dim'])
+        assert errors.shape == (self.batch_size, )
         self.memory.update(errors)
         return loss
 
diff --git a/rl/analytics.py b/rl/analytics.py
index 57d654a..1be9044 100644
--- a/rl/analytics.py
+++ b/rl/analytics.py
@@ -317,9 +317,10 @@ def compose_data(trial):
     }
 
     # param variables for independent vars of trials
+    default_param = trial.experiment_spec['param']
     param_variables = {
-        pv: trial.experiment_spec['param'][pv] for
-        pv in trial.param_variables}
+        pv: default_param[pv] for
+        pv in trial.param_variables if pv in default_param}
 
     trial.data['metrics'].update(metrics)
     trial.data['param_variables'] = param_variables
@@ -459,7 +460,7 @@ def analyze_data(experiment_data_or_experiment_id):
 
     data_df.sort_values(
         ['fitness_score'], ascending=False, inplace=True)
-    data_df.reset_index(inplace=True)
+    data_df.reset_index(drop=True, inplace=True)
 
     trial_id = experiment_data[0]['trial_id']
     save_experiment_data(data_df, trial_id)
diff --git a/rl/memory/prioritized_exp_replay.py b/rl/memory/prioritized_exp_replay.py
index d6e8320..476bbe0 100644
--- a/rl/memory/prioritized_exp_replay.py
+++ b/rl/memory/prioritized_exp_replay.py
@@ -12,8 +12,13 @@ class PrioritizedExperienceReplay(LinearMemoryWithForgetting):
     memory unit
     '''
 
-    def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,
+    def __init__(self, env_spec, max_mem_len=None, e=0.01, alpha=0.6,
                  **kwargs):
+        if max_mem_len is None:  # auto calculate mem len
+            max_timestep = env_spec['timestep_limit']
+            max_epis = env_spec['problem']['MAX_EPISODES']
+            memory_epi = np.ceil(max_epis / 3.).astype(int)
+            max_mem_len = max(10**6, max_timestep * memory_epi)
         super(PrioritizedExperienceReplay, self).__init__(
             env_spec, max_mem_len)
         self.exp_keys.append('error')
@@ -27,21 +32,18 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,
         self.prio_tree = SumTree(self.max_mem_len)
         self.head = 0
 
-        # bump to account for negative terms in reward get_priority
-        # and we cannot abs(reward) cuz it's sign sensitive
-        SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD'] or 10000
-        self.min_priority = abs(10 * SOLVED_MEAN_REWARD)
-
     def get_priority(self, error):
         # add min_priority to prevent root of negative = complex
-        p = (self.min_priority + error + self.e) ** self.alpha
-        assert not np.isnan(p)
+        p = (error + self.e) ** self.alpha
+        assert np.isfinite(p)
         return p
 
     def add_exp(self, action, reward, next_state, terminal):
         '''Round robin memory updating'''
-        # roughly the error between estimated Q and true q is the reward
-        error = reward
+        # init error to reward first, update later
+        error = abs(reward)
+        p = self.get_priority(error)
+
         if self.size() < self.max_mem_len:  # add as usual
             super(PrioritizedExperienceReplay, self).add_exp(
                 action, reward, next_state, terminal)
@@ -59,7 +61,6 @@ def add_exp(self, action, reward, next_state, terminal):
         if self.head >= self.max_mem_len:
             self.head = 0  # reset for round robin
 
-        p = self.get_priority(error)
         self.prio_tree.add(p)
 
         assert self.head == self.prio_tree.head, 'prio_tree head is wrong'
diff --git a/rl/policy/actor_critic.py b/rl/policy/actor_critic.py
index 900ac7d..89f15be 100644
--- a/rl/policy/actor_critic.py
+++ b/rl/policy/actor_critic.py
@@ -39,7 +39,7 @@ class SoftmaxPolicy(Policy):
     def __init__(self, env_spec,
                  **kwargs):  # absorb generic param without breaking
         super(SoftmaxPolicy, self).__init__(env_spec)
-        self.clip_val = 500
+        self.clip_val = 500.
         log_self(self)
 
     def select_action(self, state):
@@ -47,11 +47,10 @@ def select_action(self, state):
         state = np.expand_dims(state, axis=0)
         A_score = agent.actor.predict(state)[0]  # extract from batch predict
         assert A_score.ndim == 1
-        A_score = A_score.astype('float32')  # fix precision nan issue
-        A_score = A_score - np.amax(A_score)  # prevent overflow
+        A_score = A_score.astype('float64')  # fix precision overflow
         exp_values = np.exp(
             np.clip(A_score, -self.clip_val, self.clip_val))
-        assert not np.isnan(exp_values).any()
+        assert np.isfinite(exp_values).all()
         probs = np.array(exp_values / np.sum(exp_values))
         probs /= probs.sum()  # renormalize to prevent floating pt error
         action = np.random.choice(agent.env_spec['actions'], p=probs)
@@ -83,6 +82,9 @@ def select_action(self, state):
         a_mean = agent.actor.predict(state)[0]  # extract from batch predict
         action = a_mean + np.random.normal(
             loc=0.0, scale=self.variance, size=a_mean.shape)
+        action = np.clip(action,
+                         self.env_spec['action_bound_low'],
+                         self.env_spec['action_bound_high'])
         return action
 
     def update(self, sys_vars):
diff --git a/rl/policy/boltzmann.py b/rl/policy/boltzmann.py
index ef9069b..0fdd53f 100644
--- a/rl/policy/boltzmann.py
+++ b/rl/policy/boltzmann.py
@@ -18,7 +18,7 @@ def __init__(self, env_spec,
         self.final_tau = final_tau
         self.tau = self.init_tau
         self.exploration_anneal_episodes = exploration_anneal_episodes
-        self.clip_val = 500
+        self.clip_val = 500.
         log_self(self)
 
     def select_action(self, state):
@@ -26,11 +26,10 @@ def select_action(self, state):
         state = np.expand_dims(state, axis=0)
         Q_state = agent.model.predict(state)[0]  # extract from batch predict
         assert Q_state.ndim == 1
-        Q_state = Q_state.astype('float32')  # fix precision nan issue
-        Q_state = Q_state - np.amax(Q_state)  # prevent overflow
+        Q_state = Q_state.astype('float64')  # fix precision overflow
         exp_values = np.exp(
             np.clip(Q_state / self.tau, -self.clip_val, self.clip_val))
-        assert not np.isnan(exp_values).any()
+        assert np.isfinite(exp_values).all()
         probs = np.array(exp_values / np.sum(exp_values))
         probs /= probs.sum()  # renormalize to prevent floating pt error
         action = np.random.choice(agent.env_spec['actions'], p=probs)
@@ -66,11 +65,10 @@ def select_action(self, state):
         Q_state2 = agent.model_2.predict(state)[0]
         Q_state = Q_state1 + Q_state2
         assert Q_state.ndim == 1
-        Q_state = Q_state.astype('float32')  # fix precision nan issue
-        Q_state = Q_state - np.amax(Q_state)  # prevent overflow
+        Q_state = Q_state.astype('float64')  # fix precision overflow
         exp_values = np.exp(
             np.clip(Q_state / self.tau, -self.clip_val, self.clip_val))
-        assert not np.isnan(exp_values).any()
+        assert np.isfinite(exp_values).all()
         probs = np.array(exp_values / np.sum(exp_values))
         probs /= probs.sum()  # renormalize to prevent floating pt error
         action = np.random.choice(agent.env_spec['actions'], p=probs)
diff --git a/rl/policy/noise.py b/rl/policy/noise.py
index fec9507..e1b695b 100644
--- a/rl/policy/noise.py
+++ b/rl/policy/noise.py
@@ -1,6 +1,7 @@
 import numpy as np
 from rl.util import log_self
 from rl.policy.base_policy import Policy
+from rl.policy.epsilon_greedy import EpsilonGreedyPolicy
 
 
 class NoNoisePolicy(Policy):
@@ -25,6 +26,9 @@ def select_action(self, state):
         state = np.expand_dims(state, axis=0)
         if self.env_spec['actions'] == 'continuous':
             action = agent.actor.predict(state)[0] + self.sample()
+            action = np.clip(action,
+                             self.env_spec['action_bound_low'],
+                             self.env_spec['action_bound_high'])
         else:
             Q_state = agent.actor.predict(state)[0]
             assert Q_state.ndim == 1
@@ -60,6 +64,26 @@ def update(self, sys_vars):
             self.n_step = sys_vars['epi']
 
 
+class EpsilonGreedyNoisePolicy(EpsilonGreedyPolicy, NoNoisePolicy):
+
+    '''
+    akin to epsilon greedy decay,
+    but return random sample instead
+    '''
+
+    def sample(self):
+        if self.e > np.random.rand():
+            noise = np.random.uniform(
+                0.5 * self.env_spec['action_bound_low'],
+                0.5 * self.env_spec['action_bound_high'])
+        else:
+            noise = 0
+        return noise
+
+    def select_action(self, state):
+        return NoNoisePolicy.select_action(self, state)
+
+
 class AnnealedGaussianPolicy(LinearNoisePolicy):
 
     '''
diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json
index 2ed58f0..6b8ce94 100644
--- a/rl/spec/box2d_experiment_specs.json
+++ b/rl/spec/box2d_experiment_specs.json
@@ -97,35 +97,6 @@
       ]
     }
   },
-  "lunar_double_dqn_per": {
-    "problem": "LunarLander-v2",
-    "Agent": "DoubleDQN",
-    "HyperOptimizer": "GridSearch",
-    "Memory": "PrioritizedExperienceReplay",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "DoubleDQNBoltzmannPolicy",
-    "PreProcessor": "StackStates",
-    "param": {
-      "train_per_n_new_exp": 2,
-      "lr": 0.005,
-      "gamma": 0.99,
-      "hidden_layers": [800, 400],
-      "hidden_layers_activation": "sigmoid",
-      "output_layer_activation": "linear",
-      "exploration_anneal_episodes": 150,
-      "epi_change_lr": 200,
-      "max_mem_len": 30000
-    },
-    "param_range": {
-      "lr": [0.001, 0.005, 0.01],
-      "gamma": [0.97, 0.99, 0.999],
-      "hidden_layers": [
-        [400, 200],
-        [800, 400],
-        [400, 200, 100]
-      ]
-    }
-  },
   "lunar_double_dqn_nopreprocess": {
     "problem": "LunarLander-v2",
     "Agent": "DoubleDQN",
@@ -266,11 +237,11 @@
       ]
     }
   },
-  "lunar_ddpg_linearnoise": {
+  "lunar_cont_ddpg_per_linearnoise": {
     "problem": "LunarLanderContinuous-v2",
     "Agent": "DDPG",
     "HyperOptimizer": "GridSearch",
-    "Memory": "LinearMemoryWithForgetting",
+    "Memory": "PrioritizedExperienceReplay",
     "Optimizer": "AdamOptimizer",
     "Policy": "LinearNoisePolicy",
     "PreProcessor": "NoPreProcessor",
@@ -327,5 +298,35 @@
         [800, 400, 200]
       ]
     }
+  },
+  "walker_ddpg_per_linearnoise": {
+    "problem": "BipedalWalker-v2",
+    "Agent": "DDPG",
+    "HyperOptimizer": "GridSearch",
+    "Memory": "PrioritizedExperienceReplay",
+    "Optimizer": "AdamOptimizer",
+    "Policy": "LinearNoisePolicy",
+    "PreProcessor": "NoPreProcessor",
+    "param": {
+      "batch_size": 64,
+      "n_epoch": 1,
+      "tau": 0.005,
+      "lr": 0.0005,
+      "critic_lr": 0.001,
+      "gamma": 0.97,
+      "hidden_layers": [400, 200],
+      "hidden_layers_activation": "relu",
+      "output_layer_activation": "tanh"
+    },
+    "param_range": {
+      "lr": [0.0001, 0.0005],
+      "critic_lr": [0.001, 0.005],
+      "gamma": [0.95, 0.97, 0.99],
+      "hidden_layers": [
+        [200, 100],
+        [400, 300],
+        [800, 400]
+      ]
+    }
   }
 }
diff --git a/rl/spec/classic_experiment_specs.json b/rl/spec/classic_experiment_specs.json
index e007b88..65e012f 100644
--- a/rl/spec/classic_experiment_specs.json
+++ b/rl/spec/classic_experiment_specs.json
@@ -92,14 +92,11 @@
       "exploration_anneal_episodes": 10
     },
     "param_range": {
-      "lr": [0.001, 0.005, 0.01, 0.02],
-      "gamma": [0.95, 0.97, 0.99, 0.999],
+      "max_mem_len": [2000, 5000, 10000],
+      "alpha": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
       "hidden_layers": [
-        [16],
-        [32],
         [64],
-        [16, 8],
-        [32, 16]
+        [128]
       ]
     }
   },
@@ -165,35 +162,6 @@
       ]
     }
   },
-  "double_dqn_per": {
-    "problem": "CartPole-v0",
-    "Agent": "DoubleDQN",
-    "HyperOptimizer": "GridSearch",
-    "Memory": "PrioritizedExperienceReplay",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "DoubleDQNBoltzmannPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "lr": 0.02,
-      "gamma": 0.99,
-      "hidden_layers": [64],
-      "hidden_layers_activation": "sigmoid",
-      "exploration_anneal_episodes": 10,
-      "e": 0.01,
-      "alpha": 0.5,
-      "max_mem_len": 20000
-    },
-    "param_range": {
-      "lr": [0.001, 0.005, 0.01, 0.02],
-      "gamma": [0.97, 0.99, 0.999],
-      "hidden_layers": [
-        [16],
-        [32],
-        [64],
-        [32, 16]
-      ]
-    }
-  },
   "sarsa": {
     "problem": "CartPole-v0",
     "Agent": "DeepSarsa",
@@ -222,68 +190,6 @@
       ]
     }
   },
-  "sarsa_epsilon": {
-    "problem": "CartPole-v0",
-    "Agent": "DeepSarsa",
-    "HyperOptimizer": "GridSearch",
-    "Memory": "LinearMemoryWithForgetting",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "EpsilonGreedyPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "lr": 0.02,
-      "gamma": 0.99,
-      "hidden_layers": [64],
-      "hidden_layers_activation": "sigmoid",
-      "exploration_anneal_episodes": 50,
-      "max_mem_len": 50000
-    },
-    "param_range": {
-      "lr": [0.005, 0.01, 0.02, 0.05],
-      "gamma": [0.97, 0.99, 0.999],
-      "hidden_layers": [
-        [16],
-        [32],
-        [64],
-        [16, 8],
-        [200, 100]
-      ]
-    }
-  },
-  "rand_sarsa": {
-    "problem": "CartPole-v0",
-    "Agent": "DeepSarsa",
-    "HyperOptimizer": "RandomSearch",
-    "Memory": "LinearMemoryWithForgetting",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "BoltzmannPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "max_evals": 50,
-      "lr": 0.01,
-      "gamma": 0.99,
-      "hidden_layers": [32],
-      "hidden_layers_activation": "sigmoid",
-      "exploration_anneal_episodes": 10
-    },
-    "param_range": {
-      "lr": {
-        "min": 0.0005,
-        "max": 0.05
-      },
-      "gamma": {
-        "min": 0.95,
-        "max": 0.999
-      },
-      "hidden_layers": [
-        [16],
-        [32],
-        [64],
-        [16, 8],
-        [32, 16]
-      ]
-    }
-  },
   "exp_sarsa": {
     "problem": "CartPole-v0",
     "Agent": "DeepExpectedSarsa",
@@ -410,6 +316,31 @@
       ]
     }
   },
+  "dqn_per_v1": {
+    "problem": "CartPole-v1",
+    "Agent": "DQN",
+    "HyperOptimizer": "GridSearch",
+    "Memory": "PrioritizedExperienceReplay",
+    "Optimizer": "AdamOptimizer",
+    "Policy": "BoltzmannPolicy",
+    "PreProcessor": "NoPreProcessor",
+    "param": {
+      "exploration_anneal_episodes": 10,
+      "gamma": 0.999,
+      "hidden_layers": [128],
+      "hidden_layers_activation": "sigmoid",
+      "lr": 0.005
+    },
+    "param_range": {
+      "max_mem_len": [5000, 10000, 20000],
+      "alpha": [0.0, 0.6, 0.8, 1.0],
+      "gamma": [0.99, 0.999],
+      "hidden_layers": [
+        [64],
+        [128]
+      ]
+    }
+  },
   "rand_dqn_v1": {
     "problem": "CartPole-v1",
     "Agent": "DQN",
@@ -472,32 +403,6 @@
       ]
     }
   },
-  "double_dqn_per_v1": {
-    "problem": "CartPole-v1",
-    "Agent": "DoubleDQN",
-    "HyperOptimizer": "GridSearch",
-    "Memory": "PrioritizedExperienceReplay",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "DoubleDQNBoltzmannPolicy",
-    "PreProcessor": "NoPreProcessor",
-    "param": {
-      "lr": 0.02,
-      "gamma": 0.999,
-      "hidden_layers": [64],
-      "hidden_layers_activation": "sigmoid",
-      "exploration_anneal_episodes": 10
-    },
-    "param_range": {
-      "lr": [0.001, 0.005, 0.01, 0.02],
-      "gamma": [0.97, 0.99, 0.999],
-      "hidden_layers": [
-        [16],
-        [32],
-        [64],
-        [32, 16]
-      ]
-    }
-  },
   "offpol_sarsa_v1": {
     "problem": "CartPole-v1",
     "Agent": "OffPolicySarsa",
@@ -602,32 +507,6 @@
       ]
     }
   },
-  "acrobot_double_dqn_per": {
-    "problem": "Acrobot-v1",
-    "Agent": "DoubleDQN",
-    "HyperOptimizer": "GridSearch",
-    "Memory": "PrioritizedExperienceReplay",
-    "Optimizer": "AdamOptimizer",
-    "Policy": "DoubleDQNBoltzmannPolicy",
-    "PreProcessor": "StackStates",
-    "param": {
-      "train_per_n_new_exp": 1,
-      "lr": 0.01,
-      "gamma": 0.99,
-      "hidden_layers": [32],
-      "hidden_layers_activation": "sigmoid",
-      "exploration_anneal_episodes": 200
-    },
-    "param_range": {
-      "lr": [0.001, 0.005, 0.01],
-      "gamma": [0.97, 0.99, 0.999],
-      "hidden_layers": [
-        [200],
-        [200, 100],
-        [400, 200, 100]
-      ]
-    }
-  },
   "acrobot_offpol_sarsa": {
     "problem": "Acrobot-v1",
     "Agent": "OffPolicySarsa",
@@ -857,26 +736,25 @@
       ]
     }
   },
-  "pendulum_ddpg_per": {
+  "pendulum_ddpg_epsilonnoise": {
     "problem": "Pendulum-v0",
     "Agent": "DDPG",
     "HyperOptimizer": "GridSearch",
-    "Memory": "PrioritizedExperienceReplay",
+    "Memory": "LinearMemoryWithForgetting",
     "Optimizer": "AdamOptimizer",
-    "Policy": "NoNoisePolicy",
+    "Policy": "EpsilonGreedyNoisePolicy",
     "PreProcessor": "NoPreProcessor",
     "param": {
       "batch_size": 64,
       "n_epoch": 1,
-      "tau": 0.001,
+      "tau": 0.005,
       "lr": 0.001,
       "critic_lr": 0.001,
       "exploration_anneal_episodes": 50,
       "gamma": 0.97,
       "hidden_layers": [400, 300],
       "hidden_layers_activation": "relu",
-      "output_layer_activation": "tanh",
-      "max_mem_len": 30000
+      "output_layer_activation": "tanh"
     },
     "param_range": {
       "lr": [0.0001, 0.0005, 0.001],
@@ -889,11 +767,11 @@
       ]
     }
   },
-  "pendulum_ddpg_per_linearnoise": {
+  "pendulum_ddpg_linearnoise": {
     "problem": "Pendulum-v0",
     "Agent": "DDPG",
     "HyperOptimizer": "GridSearch",
-    "Memory": "PrioritizedExperienceReplay",
+    "Memory": "LinearMemoryWithForgetting",
     "Optimizer": "AdamOptimizer",
     "Policy": "LinearNoisePolicy",
     "PreProcessor": "NoPreProcessor",
@@ -901,41 +779,39 @@
       "batch_size": 64,
       "n_epoch": 1,
       "tau": 0.005,
-      "lr": 0.0005,
-      "critic_lr": 0.001,
-      "exploration_anneal_episodes": 100,
+      "lr": 0.0001,
+      "critic_lr": 0.005,
+      "exploration_anneal_episodes": 50,
       "gamma": 0.97,
-      "hidden_layers": [400, 200],
+      "hidden_layers": [400, 300],
       "hidden_layers_activation": "relu",
-      "output_layer_activation": "tanh",
-      "max_mem_len": 30000
+      "output_layer_activation": "tanh"
     },
     "param_range": {
-      "lr": [0.0001, 0.0005],
+      "lr": [0.0001, 0.0005, 0.001],
       "critic_lr": [0.001, 0.005],
       "gamma": [0.95, 0.97, 0.99],
       "hidden_layers": [
-        [200, 100],
-        [400, 200],
         [400, 300],
-        [800, 400]
+        [800, 400, 200],
+        [800, 600, 400, 200]
       ]
     }
   },
-  "pendulum_ddpg_linearnoise": {
+  "pendulum_ddpg_ounoise": {
     "problem": "Pendulum-v0",
     "Agent": "DDPG",
     "HyperOptimizer": "GridSearch",
     "Memory": "LinearMemoryWithForgetting",
     "Optimizer": "AdamOptimizer",
-    "Policy": "LinearNoisePolicy",
+    "Policy": "NoNoisePolicy",
     "PreProcessor": "NoPreProcessor",
     "param": {
       "batch_size": 64,
       "n_epoch": 1,
       "tau": 0.005,
-      "lr": 0.0001,
-      "critic_lr": 0.005,
+      "lr": 0.001,
+      "critic_lr": 0.001,
       "exploration_anneal_episodes": 50,
       "gamma": 0.97,
       "hidden_layers": [400, 300],
@@ -953,13 +829,13 @@
       ]
     }
   },
-  "pendulum_ddpg_ounoise": {
+  "pendulum_ddpg_gaussiannoise": {
     "problem": "Pendulum-v0",
     "Agent": "DDPG",
     "HyperOptimizer": "GridSearch",
     "Memory": "LinearMemoryWithForgetting",
     "Optimizer": "AdamOptimizer",
-    "Policy": "NoNoisePolicy",
+    "Policy": "GaussianWhiteNoisePolicy",
     "PreProcessor": "NoPreProcessor",
     "param": {
       "batch_size": 64,
@@ -984,25 +860,26 @@
       ]
     }
   },
-  "pendulum_ddpg_gaussiannoise": {
+  "pendulum_ddpg_per": {
     "problem": "Pendulum-v0",
     "Agent": "DDPG",
     "HyperOptimizer": "GridSearch",
-    "Memory": "LinearMemoryWithForgetting",
+    "Memory": "PrioritizedExperienceReplay",
     "Optimizer": "AdamOptimizer",
-    "Policy": "GaussianWhiteNoisePolicy",
+    "Policy": "NoNoisePolicy",
     "PreProcessor": "NoPreProcessor",
     "param": {
       "batch_size": 64,
       "n_epoch": 1,
-      "tau": 0.005,
+      "tau": 0.001,
       "lr": 0.001,
       "critic_lr": 0.001,
       "exploration_anneal_episodes": 50,
       "gamma": 0.97,
       "hidden_layers": [400, 300],
       "hidden_layers_activation": "relu",
-      "output_layer_activation": "tanh"
+      "output_layer_activation": "tanh",
+      "max_mem_len": 30000
     },
     "param_range": {
       "lr": [0.0001, 0.0005, 0.001],
@@ -1015,6 +892,39 @@
       ]
     }
   },
+  "pendulum_ddpg_per_linearnoise": {
+    "problem": "Pendulum-v0",
+    "Agent": "DDPG",
+    "HyperOptimizer": "GridSearch",
+    "Memory": "PrioritizedExperienceReplay",
+    "Optimizer": "AdamOptimizer",
+    "Policy": "LinearNoisePolicy",
+    "PreProcessor": "NoPreProcessor",
+    "param": {
+      "batch_size": 64,
+      "n_epoch": 1,
+      "tau": 0.005,
+      "lr": 0.0005,
+      "critic_lr": 0.001,
+      "exploration_anneal_episodes": 100,
+      "gamma": 0.97,
+      "hidden_layers": [400, 200],
+      "hidden_layers_activation": "relu",
+      "output_layer_activation": "tanh",
+      "max_mem_len": 30000
+    },
+    "param_range": {
+      "lr": [0.0001, 0.0005],
+      "critic_lr": [0.001, 0.005],
+      "gamma": [0.95, 0.97, 0.99],
+      "hidden_layers": [
+        [200, 100],
+        [400, 200],
+        [400, 300],
+        [800, 400]
+      ]
+    }
+  },
   "mountain_dqn": {
     "problem": "MountainCar-v0",
     "Agent": "DQN",
@@ -1078,22 +988,22 @@
     "Policy": "BoltzmannPolicy",
     "PreProcessor": "NoPreProcessor",
     "param": {
-      "batch_size": 32,
+      "batch_size": 64,
       "lr": 0.001,
       "gamma": 0.99,
       "hidden_layers": [128, 64],
       "hidden_layers_activation": "sigmoid",
       "output_layer_activation": "linear",
       "exploration_anneal_episodes": 50,
-      "epi_change_lr": 150,
-      "max_mem_len": 30000
+      "epi_change_lr": 150
     },
     "param_range": {
-      "lr": [0.005, 0.01, 0.02, 0.05],
+      "lr": [0.001, 0.005, 0.01],
       "gamma": [0.99, 0.999],
       "hidden_layers": [
         [400],
-        [800]
+        [800],
+        [1200]
       ]
     }
   },
@@ -1128,7 +1038,7 @@
     "problem": "MountainCar-v0",
     "Agent": "ActorCritic",
     "HyperOptimizer": "GridSearch",
-    "Memory": "LinearMemoryWithForgetting",
+    "Memory": "PrioritizedExperienceReplay",
     "Optimizer": "AdamOptimizer",
     "Policy": "SoftmaxPolicy",
     "PreProcessor": "NoPreProcessor",
@@ -1136,7 +1046,8 @@
       "lr": 0.02,
       "gamma": 0.99,
       "hidden_layers": [64],
-      "hidden_layers_activation": "sigmoid"
+      "hidden_layers_activation": "relu",
+      "max_mem_len": 50000
     },
     "param_range": {
       "lr": [0.001, 0.005, 0.01],
diff --git a/test/test_atari.py b/test/test_atari.py
new file mode 100644
index 0000000..0888757
--- /dev/null
+++ b/test/test_atari.py
@@ -0,0 +1,21 @@
+import unittest
+import pytest
+from os import environ
+from rl.experiment import run
+from . import conftest
+import pandas as pd
+
+
+class AtariTest(unittest.TestCase):
+
+    @unittest.skipIf(environ.get('CI'), "Delay CI test until dev stable")
+    @classmethod
+    def test_breakout_dqn(cls):
+        data_df = run('breakout_dqn')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @unittest.skipIf(environ.get('CI'), "Delay CI test until dev stable")
+    @classmethod
+    def test_breakout_double_dqn(cls):
+        data_df = run('breakout_double_dqn')
+        assert isinstance(data_df, pd.DataFrame)
diff --git a/test/test_box2d.py b/test/test_box2d.py
new file mode 100644
index 0000000..db0ea41
--- /dev/null
+++ b/test/test_box2d.py
@@ -0,0 +1,29 @@
+import unittest
+import pytest
+from os import environ
+from rl.experiment import run
+from . import conftest
+import pandas as pd
+
+
+class Box2DTest(unittest.TestCase):
+
+    @classmethod
+    def test_lunar_dqn(cls):
+        data_df = run('lunar_dqn')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @classmethod
+    def test_lunar_double_dqn(cls):
+        data_df = run('lunar_double_dqn')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @classmethod
+    def test_lunar_freeze(cls):
+        data_df = run('lunar_freeze')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @classmethod
+    def test_walker_ddpg_linearnoise(cls):
+        data_df = run('walker_ddpg_linearnoise')
+        assert isinstance(data_df, pd.DataFrame)
diff --git a/test/test_advanced.py b/test/test_classic.py
similarity index 61%
rename from test/test_advanced.py
rename to test/test_classic.py
index 7407e56..b395ac9 100644
--- a/test/test_advanced.py
+++ b/test/test_classic.py
@@ -6,50 +6,46 @@
 import pandas as pd
 
 
-class AdvancedTest(unittest.TestCase):
+class ClassicTest(unittest.TestCase):
 
     @classmethod
-    def test_sarsa(cls):
-        data_df = run('rand_sarsa')
+    def test_quickstart_dqn(cls):
+        data_df = run('quickstart_dqn')
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
-    def test_exp_sarsa(cls):
-        data_df = run('exp_sarsa')
+    def test_dqn_epsilon(cls):
+        data_df = run('dqn_epsilon')
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
-    def test_offpol_sarsa(cls):
-        data_df = run('offpol_sarsa')
+    def test_dqn(cls):
+        data_df = run('dqn')
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
-    def test_acrobot(cls):
-        data_df = run('acrobot')
+    def test_dqn_per(cls):
+        data_df = run('dqn_per')
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
-    def test_mountain_dqn(cls):
-        data_df = run('mountain_dqn')
+    def test_double_dqn(cls):
+        data_df = run('double_dqn')
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
-    def test_lunar_dqn(cls):
-        data_df = run('lunar_dqn')
+    def test_sarsa(cls):
+        data_df = run('sarsa')
         assert isinstance(data_df, pd.DataFrame)
 
-    @unittest.skipIf(environ.get('CI'),
-                     "Delay CI test until dev stable")
     @classmethod
-    def test_breakout_dqn(cls):
-        data_df = run('breakout_dqn')
+    def test_exp_sarsa(cls):
+        data_df = run('exp_sarsa')
         assert isinstance(data_df, pd.DataFrame)
 
-    @unittest.skipIf(environ.get('CI'),
-                     "Delay CI test until dev stable")
     @classmethod
-    def test_breakout_double_dqn(cls):
-        data_df = run('breakout_double_dqn')
+    def test_offpol_sarsa(cls):
+        data_df = run('offpol_sarsa')
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
@@ -58,6 +54,21 @@ def test_cartpole_ac_argmax(cls):
         assert isinstance(data_df, pd.DataFrame)
 
     @classmethod
-    def test_pendulum_ddpg(cls):
-        data_df = run('pendulum_ddpg')
+    def test_dqn_v1(cls):
+        data_df = run('dqn_v1')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @classmethod
+    def test_acrobot(cls):
+        data_df = run('acrobot')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @classmethod
+    def test_pendulum_ddpg_linearnoise(cls):
+        data_df = run('pendulum_ddpg_linearnoise')
+        assert isinstance(data_df, pd.DataFrame)
+
+    @classmethod
+    def test_mountain_dqn(cls):
+        data_df = run('mountain_dqn')
         assert isinstance(data_df, pd.DataFrame)
diff --git a/test/test_basic.py b/test/test_dev.py
similarity index 78%
rename from test/test_basic.py
rename to test/test_dev.py
index 4cac079..0d577d2 100644
--- a/test/test_basic.py
+++ b/test/test_dev.py
@@ -6,7 +6,7 @@
 import pandas as pd
 
 
-class BasicTest(unittest.TestCase):
+class DevTest(unittest.TestCase):
 
     @classmethod
     def test_clean_import(cls):
@@ -46,18 +46,3 @@ def test_dqn_pass(cls):
     # def test_dqn_random_search(cls):
     #     data_df = run('test_dqn_random_search', param_selection=True)
     #     assert isinstance(data_df, pd.DataFrame)
-
-    @classmethod
-    def test_dqn(cls):
-        data_df = run('dqn')
-        assert isinstance(data_df, pd.DataFrame)
-
-    @classmethod
-    def test_dqn(cls):
-        data_df = run('double_dqn_per')
-        assert isinstance(data_df, pd.DataFrame)
-
-    @classmethod
-    def test_double_dqn(cls):
-        data_df = run('double_dqn')
-        assert isinstance(data_df, pd.DataFrame)