spragunr · kmader · Mar 21, 2016
diff --git a/deep_q_rl/q_network.py b/deep_q_rl/q_network.py
@@ -155,13 +155,22 @@ def build_network(self, network_type, input_width, input_height,
         if network_type == "nature_cuda":
             return self.build_nature_network(input_width, input_height,
                                              output_dim, num_frames, batch_size)
+        if network_type == "nature_cpu":
+            return self.build_nature_network_conv2d(input_width, input_height,
+                                                 output_dim, num_frames,
+                                                 batch_size)
         if network_type == "nature_dnn":
             return self.build_nature_network_dnn(input_width, input_height,
                                                  output_dim, num_frames,
                                                  batch_size)
         elif network_type == "nips_cuda":
             return self.build_nips_network(input_width, input_height,
                                            output_dim, num_frames, batch_size)
+        elif network_type == "nips_cpu":
+            return self.build_nips_network_conv2d(input_width, input_height,
+                                               output_dim, num_frames,
+                                               batch_size)
+
         elif network_type == "nips_dnn":
             return self.build_nips_network_dnn(input_width, input_height,
                                                output_dim, num_frames,
@@ -341,6 +350,64 @@ def build_nature_network_dnn(self, input_width, input_height, output_dim,
 
         return l_out
 
+    def build_nature_network_conv2d(self, input_width, input_height, output_dim,
+                                 num_frames, batch_size):
+        """
+        Build a large network consistent with the DeepMind Nature paper.
+        """
+        from lasagne.layers import Conv2DLayer
+
+        l_in = lasagne.layers.InputLayer(
+            shape=(batch_size, num_frames, input_width, input_height)
+        )
+
+        l_conv1 = Conv2DLayer(
+            l_in,
+            num_filters=32,
+            filter_size=(8, 8),
+            stride=(4, 4),
+            nonlinearity=lasagne.nonlinearities.rectify,
+            W=lasagne.init.HeUniform(),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_conv2 = Conv2DLayer(
+            l_conv1,
+            num_filters=64,
+            filter_size=(4, 4),
+            stride=(2, 2),
+            nonlinearity=lasagne.nonlinearities.rectify,
+            W=lasagne.init.HeUniform(),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_conv3 = Conv2DLayer(
+            l_conv2,
+            num_filters=64,
+            filter_size=(3, 3),
+            stride=(1, 1),
+            nonlinearity=lasagne.nonlinearities.rectify,
+            W=lasagne.init.HeUniform(),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_hidden1 = lasagne.layers.DenseLayer(
+            l_conv3,
+            num_units=512,
+            nonlinearity=lasagne.nonlinearities.rectify,
+            W=lasagne.init.HeUniform(),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_out = lasagne.layers.DenseLayer(
+            l_hidden1,
+            num_units=output_dim,
+            nonlinearity=None,
+            W=lasagne.init.HeUniform(),
+            b=lasagne.init.Constant(.1)
+        )
+
+        return l_out
 
 
     def build_nips_network(self, input_width, input_height, output_dim,
@@ -397,7 +464,61 @@ def build_nips_network(self, input_width, input_height, output_dim,
 
         return l_out
 
+    def build_nips_network_conv2d(self, input_width, input_height, output_dim,
+                               num_frames, batch_size):
+        """
+        Build a network consistent with the 2013 NIPS paper.
+        """
+        # use conv2d (not completely compatible but as long as we dont resume it's fine)
+        from lasagne.layers import Conv2DLayer
+
+        l_in = lasagne.layers.InputLayer(
+            shape=(batch_size, num_frames, input_width, input_height)
+        )
+
 
+        l_conv1 = Conv2DLayer(
+            l_in,
+            num_filters=16,
+            filter_size=(8, 8),
+            stride=(4, 4),
+            nonlinearity=lasagne.nonlinearities.rectify,
+            #W=lasagne.init.HeUniform(),
+            W=lasagne.init.Normal(.01),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_conv2 = Conv2DLayer(
+            l_conv1,
+            num_filters=32,
+            filter_size=(4, 4),
+            stride=(2, 2),
+            nonlinearity=lasagne.nonlinearities.rectify,
+            #W=lasagne.init.HeUniform(),
+            W=lasagne.init.Normal(.01),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_hidden1 = lasagne.layers.DenseLayer(
+            l_conv2,
+            num_units=256,
+            nonlinearity=lasagne.nonlinearities.rectify,
+            #W=lasagne.init.HeUniform(),
+            W=lasagne.init.Normal(.01),
+            b=lasagne.init.Constant(.1)
+        )
+
+        l_out = lasagne.layers.DenseLayer(
+            l_hidden1,
+            num_units=output_dim,
+            nonlinearity=None,
+            #W=lasagne.init.HeUniform(),
+            W=lasagne.init.Normal(.01),
+            b=lasagne.init.Constant(.1)
+        )
+
+        return l_out
+
     def build_nips_network_dnn(self, input_width, input_height, output_dim,
                                num_frames, batch_size):
         """

diff --git a/deep_q_rl/run_nature_cpu.py b/deep_q_rl/run_nature_cpu.py
@@ -0,0 +1,65 @@
+#! /usr/bin/env python
+"""
+Execute a training run of deep-Q-Leaning with parameters that
+are consistent with:
+
+Human-level control through deep reinforcement learning.
+Nature, 518(7540):529-533, February 2015
+
+"""
+
+import launcher
+import sys
+
+class Defaults:
+    # ----------------------
+    # Experiment Parameters
+    # ----------------------
+    STEPS_PER_EPOCH = 250000
+    EPOCHS = 200
+    STEPS_PER_TEST = 125000
+
+    # ----------------------
+    # ALE Parameters
+    # ----------------------
+    BASE_ROM_PATH = "../roms/"
+    ROM = 'breakout.bin'
+    FRAME_SKIP = 4
+    REPEAT_ACTION_PROBABILITY = 0
+
+    # ----------------------
+    # Agent/Network parameters:
+    # ----------------------
+    UPDATE_RULE = 'deepmind_rmsprop'
+    BATCH_ACCUMULATOR = 'sum'
+    LEARNING_RATE = .00025
+    DISCOUNT = .99
+    RMS_DECAY = .95 # (Rho)
+    RMS_EPSILON = .01
+    MOMENTUM = 0 # Note that the "momentum" value mentioned in the Nature
+                 # paper is not used in the same way as a traditional momentum
+                 # term.  It is used to track gradient for the purpose of
+                 # estimating the standard deviation. This package uses
+                 # rho/RMS_DECAY to track both the history of the gradient
+                 # and the squared gradient.
+    CLIP_DELTA = 1.0
+    EPSILON_START = 1.0
+    EPSILON_MIN = .1
+    EPSILON_DECAY = 1000000
+    PHI_LENGTH = 4
+    UPDATE_FREQUENCY = 4
+    REPLAY_MEMORY_SIZE = 1000000
+    BATCH_SIZE = 32
+    NETWORK_TYPE = "nature_cpu"
+    FREEZE_INTERVAL = 10000
+    REPLAY_START_SIZE = 50000
+    RESIZE_METHOD = 'scale'
+    RESIZED_WIDTH = 84
+    RESIZED_HEIGHT = 84
+    DEATH_ENDS_EPISODE = 'true'
+    MAX_START_NULLOPS = 30
+    DETERMINISTIC = True
+    CUDNN_DETERMINISTIC = False
+
+if __name__ == "__main__":
+    launcher.launch(sys.argv[1:], Defaults, __doc__)
diff --git a/deep_q_rl/run_nips_cpu.py b/deep_q_rl/run_nips_cpu.py
@@ -0,0 +1,60 @@
+#! /usr/bin/env python
+"""
+Execute a training run of deep-Q-Leaning with parameters that
+are consistent with:
+
+Playing Atari with Deep Reinforcement Learning
+NIPS Deep Learning Workshop 2013
+
+"""
+
+import launcher
+import sys
+
+class Defaults:
+    # ----------------------
+    # Experiment Parameters
+    # ----------------------
+    STEPS_PER_EPOCH = 50000
+    EPOCHS = 100
+    STEPS_PER_TEST = 10000
+
+    # ----------------------
+    # ALE Parameters
+    # ----------------------
+    BASE_ROM_PATH = "../roms/"
+    ROM = 'breakout.bin'
+    FRAME_SKIP = 4
+    REPEAT_ACTION_PROBABILITY = 0
+
+    # ----------------------
+    # Agent/Network parameters:
+    # ----------------------
+    UPDATE_RULE = 'rmsprop'
+    BATCH_ACCUMULATOR = 'mean'
+    LEARNING_RATE = .0002
+    DISCOUNT = .95
+    RMS_DECAY = .99 # (Rho)
+    RMS_EPSILON = 1e-6
+    MOMENTUM = 0
+    CLIP_DELTA = 0
+    EPSILON_START = 1.0
+    EPSILON_MIN = .1
+    EPSILON_DECAY = 1000000
+    PHI_LENGTH = 4
+    UPDATE_FREQUENCY = 1
+    REPLAY_MEMORY_SIZE = 1000000
+    BATCH_SIZE = 32
+    NETWORK_TYPE = "nips_cpu"
+    FREEZE_INTERVAL = -1
+    REPLAY_START_SIZE = 100
+    RESIZE_METHOD = 'crop'
+    RESIZED_WIDTH = 84
+    RESIZED_HEIGHT = 84
+    DEATH_ENDS_EPISODE = 'false'
+    MAX_START_NULLOPS = 0
+    DETERMINISTIC = True
+    CUDNN_DETERMINISTIC = False
+
+if __name__ == "__main__":
+    launcher.launch(sys.argv[1:], Defaults, __doc__)