DLR-RM
diff --git a/‎.gitlab-ci.yml
+1-1 b/‎.gitlab-ci.yml
+1-1
diff --git a/‎README.md
-1 b/‎README.md
-1
diff --git a/‎docs/guide/algos.rst
+1 b/‎docs/guide/algos.rst
+1
diff --git a/‎docs/guide/examples.rst
+6-6 b/‎docs/guide/examples.rst
+6-6
diff --git a/‎docs/index.rst
+1 b/‎docs/index.rst
+1
diff --git a/‎docs/misc/changelog.rst
+11-1 b/‎docs/misc/changelog.rst
+11-1
diff --git a/‎docs/modules/dqn.rst
+94 b/‎docs/modules/dqn.rst
+94
diff --git a/‎setup.cfg
+1 b/‎setup.cfg
+1
diff --git a/‎setup.py
+3-1 b/‎setup.py
+3-1
diff --git a/‎stable_baselines3/__init__.py
+1 b/‎stable_baselines3/__init__.py
+1
@@ -1,4 +1,4 @@
-image: stablebaselines/stable-baselines3-cpu:0.6.0
+image: stablebaselines/stable-baselines3-cpu:0.8.0a1
 
 type-check:
   script:
 
@@ -40,7 +40,6 @@ These algorithms will make it easier for the research community and industry to
 Please look at the issue for more details.
 Planned features:
 
-- [ ] DQN (almost ready, currently in testing phase)
 - [ ] DDPG (you can use its successor TD3 for now)
 - [ ] HER
 
 
@@ -12,6 +12,7 @@ A2C          ✔️           ✔️            ✔️                 ✔️
 PPO          ✔️           ✔️            ✔️                 ✔️               ✔️
 SAC          ✔️          ❌            ❌                ❌              ❌
 TD3          ✔️          ❌            ❌                ❌              ❌
+DQN          ❌           ✔️           ❌                ❌              ❌
 ============ =========== ============ ================= =============== ================
 
 
 
@@ -33,7 +33,7 @@ notebooks:
 Basic Usage: Training, Saving, Loading
 --------------------------------------
 
-In the following example, we will train, save and load a A2C model on the Lunar Lander environment.
+In the following example, we will train, save and load a DQN model on the Lunar Lander environment.
 
 .. image:: ../_static/img/colab-badge.svg
    :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/saving_loading_dqn.ipynb
@@ -57,31 +57,31 @@ In the following example, we will train, save and load a A2C model on the Lunar
 
   import gym
 
-  from stable_baselines3 import A2C
+  from stable_baselines3 import DQN
   from stable_baselines3.common.evaluation import evaluate_policy
 
 
   # Create environment
   env = gym.make('LunarLander-v2')
 
   # Instantiate the agent
-  model = A2C('MlpPolicy', env, verbose=1)
+  model = DQN('MlpPolicy', env, verbose=1)
   # Train the agent
   model.learn(total_timesteps=int(2e5))
   # Save the agent
-  model.save("a2c_lunar")
+  model.save("dqn_lunar")
   del model  # delete trained model to demonstrate loading
 
   # Load the trained agent
-  model = A2C.load("a2c_lunar")
+  model = DQN.load("dqn_lunar")
 
   # Evaluate the agent
   mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
 
   # Enjoy trained agent
   obs = env.reset()
   for i in range(1000):
-      action, _states = model.predict(obs)
+      action, _states = model.predict(obs, deterministic=True)
       obs, rewards, dones, info = env.step(action)
       env.render()
 
 
@@ -58,6 +58,7 @@ Main Features
   modules/ppo
   modules/sac
   modules/td3
+  modules/dqn
 
 .. toctree::
   :maxdepth: 1
 
@@ -3,15 +3,20 @@
 Changelog
 ==========
 
-Pre-Release 0.8.0a0 (WIP)
+Pre-Release 0.8.0a1 (WIP)
 ------------------------------
 
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
+- ``AtariWrapper`` and other Atari wrappers were updated to match SB2 ones
 - ``save_replay_buffer`` now receives as argument the file path instead of the folder path (@tirafesi)
 
 New Features:
 ^^^^^^^^^^^^^
+- Added ``DQN`` Algorithm (@Artemis-Skade)
+- Buffer dtype is now set according to action and observation spaces for ``ReplayBuffer``
+- Added warning when allocation of a buffer may exceed the available memory of the system
+  when ``psutil`` is available
 
 Bug Fixes:
 ^^^^^^^^^^
@@ -22,13 +27,18 @@ Deprecations:
 
 Others:
 ^^^^^^^
+- Refactored off-policy algorithm to share the same ``.learn()`` method
+- Split the ``collect_rollout()`` method for off-policy algorithms
+- Added ``_on_step()`` for off-policy base class
+- Optimized replay buffer size by removing the need of ``next_observations`` numpy array
 
 Documentation:
 ^^^^^^^^^^^^^^
 - Updated notebook links
 - Fixed a typo in the section of Enjoy a Trained Agent, in RL Baselines3 Zoo README. (@blurLake)
 
 
+
 Pre-Release 0.7.0 (2020-06-10)
 ------------------------------
 
 
@@ -0,0 +1,94 @@
+.. _dqn:
+
+.. automodule:: stable_baselines3.dqn
+
+
+DQN
+===
+
+`Deep Q Network (DQN) <https://arxiv.org/abs/1312.5602>`_
+
+.. rubric:: Available Policies
+
+.. autosummary::
+    :nosignatures:
+
+    MlpPolicy
+    CnnPolicy
+
+
+Notes
+-----
+
+- Original paper: https://arxiv.org/abs/1312.5602
+- Further reference: https://www.nature.com/articles/nature14236
+
+.. note::
+    This implementation provides only vanilla Deep Q-Learning and has no extensions such as Double-DQN, Dueling-DQN and Prioritized Experience Replay.
+
+
+Can I use?
+----------
+
+-  Recurrent policies: ❌
+-  Multi processing: ❌
+-  Gym spaces:
+
+
+============= ====== ===========
+Space         Action Observation
+============= ====== ===========
+Discrete      ✔      ✔
+Box           ❌      ✔
+MultiDiscrete ❌      ✔
+MultiBinary   ❌      ✔
+============= ====== ===========
+
+
+Example
+-------
+
+.. code-block:: python
+
+  import gym
+  import numpy as np
+
+  from stable_baselines3 import DQN
+  from stable_baselines3.dqn import MlpPolicy
+
+  env = gym.make('Pendulum-v0')
+
+  model = DQN(MlpPolicy, env, verbose=1)
+  model.learn(total_timesteps=10000, log_interval=4)
+  model.save("dqn_pendulum")
+
+  del model # remove to demonstrate saving and loading
+
+  model = DQN.load("dqn_pendulum")
+
+  obs = env.reset()
+  while True:
+      action, _states = model.predict(obs, deterministic=True)
+      obs, reward, done, info = env.step(action)
+      env.render()
+      if done:
+        obs = env.reset()
+
+Parameters
+----------
+
+.. autoclass:: DQN
+  :members:
+  :inherited-members:
+
+.. _dqn_policies:
+
+DQN Policies
+-------------
+
+.. autoclass:: MlpPolicy
+  :members:
+  :inherited-members:
+
+.. autoclass:: CnnPolicy
+  :members:
@@ -27,6 +27,7 @@ per-file-ignores =
 	./stable_baselines3/__init__.py:F401
 	./stable_baselines3/common/__init__.py:F401
 	./stable_baselines3/a2c/__init__.py:F401
+	./stable_baselines3/dqn/__init__.py:F401
 	./stable_baselines3/ppo/__init__.py:F401
 	./stable_baselines3/sac/__init__.py:F401
 	./stable_baselines3/td3/__init__.py:F401
 
@@ -108,7 +108,9 @@
               # For atari games,
               'atari_py~=0.2.0', 'pillow',
               # Tensorboard support
-              'tensorboard'
+              'tensorboard',
+              # Checking memory taken by replay buffer
+              'psutil'
           ]
       },
       description='Pytorch version of Stable Baselines, implementations of reinforcement learning algorithms.',
 
@@ -4,6 +4,7 @@
 from stable_baselines3.ppo import PPO
 from stable_baselines3.sac import SAC
 from stable_baselines3.td3 import TD3
+from stable_baselines3.dqn import DQN
 
 # Read version from file
 version_file = os.path.join(os.path.dirname(__file__), 'version.txt')
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-image: stablebaselines/stable-baselines3-cpu:0.6.0`
	`1`	`+image: stablebaselines/stable-baselines3-cpu:0.8.0a1`
`2`	`2`
`3`	`3`	`type-check:`
`4`	`4`	`script:`