diff --git a/rl/memory/prioritized_exp_replay.py b/rl/memory/prioritized_exp_replay.py
index 90c2ee0..9d1a239 100644
--- a/rl/memory/prioritized_exp_replay.py
+++ b/rl/memory/prioritized_exp_replay.py
@@ -27,15 +27,16 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,
         self.prio_tree = SumTree(self.max_mem_len)
         self.head = 0
 
+        # bump to account for negative terms in reward get_priority
+        # and we cannot abs(reward) cuz it's sign sensitive
         SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD']
-        if SOLVED_MEAN_REWARD > 0:
-            self.min_priority = 0
-        else:
-            self.min_priority = abs(10 * SOLVED_MEAN_REWARD)
+        self.min_priority = abs(10 * SOLVED_MEAN_REWARD)
 
     def get_priority(self, error):
         # add min_priority to prevent root of negative = complex
-        return (self.min_priority + error + self.e) ** self.alpha
+        p = (self.min_priority + error + self.e) ** self.alpha
+        assert not np.isnan(p)
+        return p
 
     def add_exp(self, action, reward, next_state, terminal):
         '''Round robin memory updating'''
diff --git a/rl/spec/box2d_experiment_specs.json b/rl/spec/box2d_experiment_specs.json
index c8b6971..c379ec4 100644
--- a/rl/spec/box2d_experiment_specs.json
+++ b/rl/spec/box2d_experiment_specs.json
@@ -107,14 +107,14 @@
     "PreProcessor": "StackStates",
     "param": {
       "train_per_n_new_exp": 5,
-      "batch_size": 32,
       "lr": 0.005,
       "gamma": 0.99,
       "hidden_layers": [800, 400],
       "hidden_layers_activation": "sigmoid",
       "output_layer_activation": "linear",
       "exploration_anneal_episodes": 150,
-      "epi_change_lr": 200
+      "epi_change_lr": 200,
+      "max_mem_len": 20000
     },
     "param_range": {
       "lr": [0.001, 0.005, 0.01],