diff --git a/deep_q_rl/q_network.py b/deep_q_rl/q_network.py index 0fa360b..5b95089 100644 --- a/deep_q_rl/q_network.py +++ b/deep_q_rl/q_network.py @@ -155,6 +155,10 @@ def build_network(self, network_type, input_width, input_height, if network_type == "nature_cuda": return self.build_nature_network(input_width, input_height, output_dim, num_frames, batch_size) + if network_type == "nature_cpu": + return self.build_nature_network_conv2d(input_width, input_height, + output_dim, num_frames, + batch_size) if network_type == "nature_dnn": return self.build_nature_network_dnn(input_width, input_height, output_dim, num_frames, @@ -162,6 +166,11 @@ def build_network(self, network_type, input_width, input_height, elif network_type == "nips_cuda": return self.build_nips_network(input_width, input_height, output_dim, num_frames, batch_size) + elif network_type == "nips_cpu": + return self.build_nips_network_conv2d(input_width, input_height, + output_dim, num_frames, + batch_size) + elif network_type == "nips_dnn": return self.build_nips_network_dnn(input_width, input_height, output_dim, num_frames, @@ -341,6 +350,64 @@ def build_nature_network_dnn(self, input_width, input_height, output_dim, return l_out + def build_nature_network_conv2d(self, input_width, input_height, output_dim, + num_frames, batch_size): + """ + Build a large network consistent with the DeepMind Nature paper. + """ + from lasagne.layers import Conv2DLayer + + l_in = lasagne.layers.InputLayer( + shape=(batch_size, num_frames, input_width, input_height) + ) + + l_conv1 = Conv2DLayer( + l_in, + num_filters=32, + filter_size=(8, 8), + stride=(4, 4), + nonlinearity=lasagne.nonlinearities.rectify, + W=lasagne.init.HeUniform(), + b=lasagne.init.Constant(.1) + ) + + l_conv2 = Conv2DLayer( + l_conv1, + num_filters=64, + filter_size=(4, 4), + stride=(2, 2), + nonlinearity=lasagne.nonlinearities.rectify, + W=lasagne.init.HeUniform(), + b=lasagne.init.Constant(.1) + ) + + l_conv3 = Conv2DLayer( + l_conv2, + num_filters=64, + filter_size=(3, 3), + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + W=lasagne.init.HeUniform(), + b=lasagne.init.Constant(.1) + ) + + l_hidden1 = lasagne.layers.DenseLayer( + l_conv3, + num_units=512, + nonlinearity=lasagne.nonlinearities.rectify, + W=lasagne.init.HeUniform(), + b=lasagne.init.Constant(.1) + ) + + l_out = lasagne.layers.DenseLayer( + l_hidden1, + num_units=output_dim, + nonlinearity=None, + W=lasagne.init.HeUniform(), + b=lasagne.init.Constant(.1) + ) + + return l_out def build_nips_network(self, input_width, input_height, output_dim, @@ -397,7 +464,61 @@ def build_nips_network(self, input_width, input_height, output_dim, return l_out + def build_nips_network_conv2d(self, input_width, input_height, output_dim, + num_frames, batch_size): + """ + Build a network consistent with the 2013 NIPS paper. + """ + # use conv2d (not completely compatible but as long as we dont resume it's fine) + from lasagne.layers import Conv2DLayer + + l_in = lasagne.layers.InputLayer( + shape=(batch_size, num_frames, input_width, input_height) + ) + + l_conv1 = Conv2DLayer( + l_in, + num_filters=16, + filter_size=(8, 8), + stride=(4, 4), + nonlinearity=lasagne.nonlinearities.rectify, + #W=lasagne.init.HeUniform(), + W=lasagne.init.Normal(.01), + b=lasagne.init.Constant(.1) + ) + + l_conv2 = Conv2DLayer( + l_conv1, + num_filters=32, + filter_size=(4, 4), + stride=(2, 2), + nonlinearity=lasagne.nonlinearities.rectify, + #W=lasagne.init.HeUniform(), + W=lasagne.init.Normal(.01), + b=lasagne.init.Constant(.1) + ) + + l_hidden1 = lasagne.layers.DenseLayer( + l_conv2, + num_units=256, + nonlinearity=lasagne.nonlinearities.rectify, + #W=lasagne.init.HeUniform(), + W=lasagne.init.Normal(.01), + b=lasagne.init.Constant(.1) + ) + + l_out = lasagne.layers.DenseLayer( + l_hidden1, + num_units=output_dim, + nonlinearity=None, + #W=lasagne.init.HeUniform(), + W=lasagne.init.Normal(.01), + b=lasagne.init.Constant(.1) + ) + + return l_out + def build_nips_network_dnn(self, input_width, input_height, output_dim, num_frames, batch_size): """ diff --git a/deep_q_rl/run_nature_cpu.py b/deep_q_rl/run_nature_cpu.py new file mode 100755 index 0000000..851de36 --- /dev/null +++ b/deep_q_rl/run_nature_cpu.py @@ -0,0 +1,65 @@ +#! /usr/bin/env python +""" +Execute a training run of deep-Q-Leaning with parameters that +are consistent with: + +Human-level control through deep reinforcement learning. +Nature, 518(7540):529-533, February 2015 + +""" + +import launcher +import sys + +class Defaults: + # ---------------------- + # Experiment Parameters + # ---------------------- + STEPS_PER_EPOCH = 250000 + EPOCHS = 200 + STEPS_PER_TEST = 125000 + + # ---------------------- + # ALE Parameters + # ---------------------- + BASE_ROM_PATH = "../roms/" + ROM = 'breakout.bin' + FRAME_SKIP = 4 + REPEAT_ACTION_PROBABILITY = 0 + + # ---------------------- + # Agent/Network parameters: + # ---------------------- + UPDATE_RULE = 'deepmind_rmsprop' + BATCH_ACCUMULATOR = 'sum' + LEARNING_RATE = .00025 + DISCOUNT = .99 + RMS_DECAY = .95 # (Rho) + RMS_EPSILON = .01 + MOMENTUM = 0 # Note that the "momentum" value mentioned in the Nature + # paper is not used in the same way as a traditional momentum + # term. It is used to track gradient for the purpose of + # estimating the standard deviation. This package uses + # rho/RMS_DECAY to track both the history of the gradient + # and the squared gradient. + CLIP_DELTA = 1.0 + EPSILON_START = 1.0 + EPSILON_MIN = .1 + EPSILON_DECAY = 1000000 + PHI_LENGTH = 4 + UPDATE_FREQUENCY = 4 + REPLAY_MEMORY_SIZE = 1000000 + BATCH_SIZE = 32 + NETWORK_TYPE = "nature_cpu" + FREEZE_INTERVAL = 10000 + REPLAY_START_SIZE = 50000 + RESIZE_METHOD = 'scale' + RESIZED_WIDTH = 84 + RESIZED_HEIGHT = 84 + DEATH_ENDS_EPISODE = 'true' + MAX_START_NULLOPS = 30 + DETERMINISTIC = True + CUDNN_DETERMINISTIC = False + +if __name__ == "__main__": + launcher.launch(sys.argv[1:], Defaults, __doc__) diff --git a/deep_q_rl/run_nips_cpu.py b/deep_q_rl/run_nips_cpu.py new file mode 100755 index 0000000..dddcd46 --- /dev/null +++ b/deep_q_rl/run_nips_cpu.py @@ -0,0 +1,60 @@ +#! /usr/bin/env python +""" +Execute a training run of deep-Q-Leaning with parameters that +are consistent with: + +Playing Atari with Deep Reinforcement Learning +NIPS Deep Learning Workshop 2013 + +""" + +import launcher +import sys + +class Defaults: + # ---------------------- + # Experiment Parameters + # ---------------------- + STEPS_PER_EPOCH = 50000 + EPOCHS = 100 + STEPS_PER_TEST = 10000 + + # ---------------------- + # ALE Parameters + # ---------------------- + BASE_ROM_PATH = "../roms/" + ROM = 'breakout.bin' + FRAME_SKIP = 4 + REPEAT_ACTION_PROBABILITY = 0 + + # ---------------------- + # Agent/Network parameters: + # ---------------------- + UPDATE_RULE = 'rmsprop' + BATCH_ACCUMULATOR = 'mean' + LEARNING_RATE = .0002 + DISCOUNT = .95 + RMS_DECAY = .99 # (Rho) + RMS_EPSILON = 1e-6 + MOMENTUM = 0 + CLIP_DELTA = 0 + EPSILON_START = 1.0 + EPSILON_MIN = .1 + EPSILON_DECAY = 1000000 + PHI_LENGTH = 4 + UPDATE_FREQUENCY = 1 + REPLAY_MEMORY_SIZE = 1000000 + BATCH_SIZE = 32 + NETWORK_TYPE = "nips_cpu" + FREEZE_INTERVAL = -1 + REPLAY_START_SIZE = 100 + RESIZE_METHOD = 'crop' + RESIZED_WIDTH = 84 + RESIZED_HEIGHT = 84 + DEATH_ENDS_EPISODE = 'false' + MAX_START_NULLOPS = 0 + DETERMINISTIC = True + CUDNN_DETERMINISTIC = False + +if __name__ == "__main__": + launcher.launch(sys.argv[1:], Defaults, __doc__)