bibliography.bib

@article{mnih2015humanlevel,
  added-at = {2017-11-15T22:03:15.000+0100},
  author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
  biburl = {https://www.bibsonomy.org/bibtex/2fb15f4471c81dc2b9edf2304cb2f7083/lukasw},
  description = {Human-level control through deep reinforcement learning - nature14236.pdf},
  interhash = {eac59980357d99db87b341b61ef6645f},
  intrahash = {fb15f4471c81dc2b9edf2304cb2f7083},
  issn = {00280836},
  journal = {Nature},
  keywords = {atari final},
  month = feb,
  number = 7540,
  pages = {529--533},
  publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
  timestamp = {2017-11-15T22:03:15.000+0100},
  title = {Human-level control through deep reinforcement learning},
  url = {http://dx.doi.org/10.1038/nature14236},
  volume = 518,
  year = 2015
}

@article{DBLP:journals/corr/MnihBMGLHSK16,
  author    = {Volodymyr Mnih and
               Adri{\`{a}} Puigdom{\`{e}}nech Badia and
               Mehdi Mirza and
               Alex Graves and
               Timothy P. Lillicrap and
               Tim Harley and
               David Silver and
               Koray Kavukcuoglu},
  title     = {Asynchronous Methods for Deep Reinforcement Learning},
  journal   = {CoRR},
  volume    = {abs/1602.01783},
  year      = {2016},
  url       = {http://arxiv.org/abs/1602.01783},
  archivePrefix = {arXiv},
  eprint    = {1602.01783},
  timestamp = {Wed, 07 Jun 2017 14:43:09 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/MnihBMGLHSK16},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{DBLP:journals/corr/WangBHMMKF16,
  author    = {Ziyu Wang and
               Victor Bapst and
               Nicolas Heess and
               Volodymyr Mnih and
               R{\'{e}}mi Munos and
               Koray Kavukcuoglu and
               Nando de Freitas},
  title     = {Sample Efficient Actor-Critic with Experience Replay},
  journal   = {CoRR},
  volume    = {abs/1611.01224},
  year      = {2016},
  url       = {http://arxiv.org/abs/1611.01224},
  archivePrefix = {arXiv},
  eprint    = {1611.01224},
  timestamp = {Tue, 08 Aug 2017 15:06:57 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/WangBHMMKF16},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{DBLP:journals/corr/WangFL15,
  author    = {Ziyu Wang and
               Nando de Freitas and
               Marc Lanctot},
  title     = {Dueling Network Architectures for Deep Reinforcement Learning},
  journal   = {CoRR},
  volume    = {abs/1511.06581},
  year      = {2015},
  url       = {http://arxiv.org/abs/1511.06581},
  archivePrefix = {arXiv},
  eprint    = {1511.06581},
  timestamp = {Tue, 08 Aug 2017 15:06:57 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/WangFL15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@inproceedings{DBLP:conf/amcc/DegrisPS12,
  author    = {Thomas Degris and
               Patrick M. Pilarski and
               Richard S. Sutton},
  title     = {Model-Free reinforcement learning with continuous action in practice},
  booktitle = {American Control Conference, {ACC} 2012, Montreal, QC, Canada, June
               27-29, 2012},
  pages     = {2177--2182},
  year      = {2012},
  crossref  = {DBLP:conf/amcc/2012},
  url       = {http://ieeexplore.ieee.org/document/6315022/},
  timestamp = {Sun, 30 Apr 2017 09:41:50 +0200},
  biburl    = {http://dblp.org/rec/bib/conf/amcc/DegrisPS12},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@proceedings{DBLP:conf/amcc/2012,
  title     = {American Control Conference, {ACC} 2012, Montreal, QC, Canada, June
               27-29, 2012},
  publisher = {{IEEE}},
  year      = {2012},
  url       = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6297579},
  isbn      = {978-1-4577-1095-7},
  timestamp = {Fri, 20 Feb 2015 09:36:34 +0100},
  biburl    = {http://dblp.org/rec/bib/conf/amcc/2012},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{DBLP:journals/corr/PlappertHDSCCAA17,
  author    = {Matthias Plappert and
               Rein Houthooft and
               Prafulla Dhariwal and
               Szymon Sidor and
               Richard Y. Chen and
               Xi Chen and
               Tamim Asfour and
               Pieter Abbeel and
               Marcin Andrychowicz},
  title     = {Parameter Space Noise for Exploration},
  journal   = {CoRR},
  volume    = {abs/1706.01905},
  year      = {2017},
  url       = {http://arxiv.org/abs/1706.01905},
  archivePrefix = {arXiv},
  eprint    = {1706.01905},
  timestamp = {Mon, 03 Jul 2017 13:29:02 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/PlappertHDSCCAA17},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{DBLP:journals/corr/HasseltGS15,
  author    = {Hado van Hasselt and
               Arthur Guez and
               David Silver},
  title     = {Deep Reinforcement Learning with Double Q-learning},
  journal   = {CoRR},
  volume    = {abs/1509.06461},
  year      = {2015},
  url       = {http://arxiv.org/abs/1509.06461},
  archivePrefix = {arXiv},
  eprint    = {1509.06461},
  timestamp = {Wed, 07 Jun 2017 14:40:43 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/HasseltGS15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{DBLP:journals/corr/SchaulQAS15,
  author    = {Tom Schaul and
               John Quan and
               Ioannis Antonoglou and
               David Silver},
  title     = {Prioritized Experience Replay},
  journal   = {CoRR},
  volume    = {abs/1511.05952},
  year      = {2015},
  url       = {http://arxiv.org/abs/1511.05952},
  archivePrefix = {arXiv},
  eprint    = {1511.05952},
  timestamp = {Wed, 07 Jun 2017 14:42:32 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/SchaulQAS15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{DBLP:journals/corr/abs-1207-4708,
  author    = {Marc G. Bellemare and
               Yavar Naddaf and
               Joel Veness and
               Michael Bowling},
  title     = {The Arcade Learning Environment: An Evaluation Platform for General
               Agents},
  journal   = {CoRR},
  volume    = {abs/1207.4708},
  year      = {2012},
  url       = {http://arxiv.org/abs/1207.4708},
  archivePrefix = {arXiv},
  eprint    = {1207.4708},
  timestamp = {Wed, 07 Jun 2017 14:43:03 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1207-4708},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{LayerNorm,
  author    = {Jimmy Lei Ba and
               Jamie Ryan Kiros  and
               Geoffrey Hinton},
  title     = {Layer Normalization},
  year      = {2016},
  url       = {https://arxiv.org/abs/1607.06450},
  archivePrefix = {arXiv},
  eprint    = {1607.06450}
}


@misc{capstone-proposal,
  author    = {Andr\'e Carvalho},
  title     = {Proposal - Learning to play video games with Machine Learning},
  year      = {2017},
  note      = {Last access in 14 January 2018},
  howpublished= {\url{https://github.com/andretadeu/capstone-proposal/blob/master/capstone_proposal.md}}
}

@misc{baselines,
  author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai},
  title = {OpenAI Baselines},
  year = {2017},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/openai/baselines}},
}

@misc{huber_loss,
  author    = {Wikipedia},
  title     = {Huber Loss},
  year      = {2018},
  note      = {Last access in 14 January 2018},
  howpublished= {\url{https://en.wikipedia.org/wiki/Huber_loss}}
}

@misc{rmsprop,
  author    = {Wikipedia},
  title     = {Stochastic Gradient Descent},
  year      = {2018},
  note      = {Last access in 10 March 2018},
  howpublished= {\url{https://en.wikipedia.org/wiki/Stochastic_gradient_descent#RMSProp}}
}

@misc{rmsprop-coursera,
  author    = {Coursera and Geoffrey Hinton and Nitish
              Srivastava and Kevin Swersky},
  title     = {Neural Networks for Machine Learning | Coursera},
  year      = {2018},
  note      = {Last access in 10 March 2018},
  howpublished= {\url{https://www.coursera.org/learn/neural-networks}}
}

@misc{intuitive_a2c,
  author    = {Rudy Gilman},
  title     = {Intuitive RL: Intro to Advantage-Actor-Critic (A2C)},
  year      = {2018},
  note      = {Last access in 31 March 2018},
  howpublished = {\url{https://hackernoon.com/intuitive-rl-intro-to-advantage-actor-critic-a2c-4ff545978752}}
}

@misc{cnn,
  author    = {Andrey Karpathy, Justin Johnson, et al},
  title     = {CS231n Convolutional Neural Networks for Visual Recognition},
  year      = {2018},
  note      = {Last access in 31 March 2018},
  howpublished = {\url{http://cs231n.github.io/convolutional-networks/}}
}

@misc{openai-gym,
  author    = {OpenAI},
  title     = {Gym},
  year      = {2018},
  howpublished = {\url{https://github.com/openai/gym}}
}