DLR-RM
diff --git a/‎.github/ISSUE_TEMPLATE/custom_env.yml
+5-4 b/‎.github/ISSUE_TEMPLATE/custom_env.yml
+5-4
diff --git a/‎.github/workflows/ci.yml
+2 b/‎.github/workflows/ci.yml
+2
diff --git a/‎Dockerfile
+11-27 b/‎Dockerfile
+11-27
diff --git a/‎Makefile
+6 b/‎Makefile
+6
diff --git a/‎docs/conda_env.yml
+3-3 b/‎docs/conda_env.yml
+3-3
diff --git a/‎docs/guide/callbacks.rst
+5-5 b/‎docs/guide/callbacks.rst
+5-5
diff --git a/‎docs/guide/checking_nan.rst
+3-3 b/‎docs/guide/checking_nan.rst
+3-3
diff --git a/‎docs/guide/custom_env.rst
+4-4 b/‎docs/guide/custom_env.rst
+4-4
diff --git a/‎docs/guide/custom_policy.rst
+4-4 b/‎docs/guide/custom_policy.rst
+4-4
@@ -49,15 +49,16 @@ body:
                 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(14,))
                 self.action_space = spaces.Box(low=-1, high=1, shape=(6,))
 
-            def reset(self):
-                return self.observation_space.sample()
+            def reset(self, seed=None):
+                return self.observation_space.sample(), {}
 
             def step(self, action):
                 obs = self.observation_space.sample()
                 reward = 1.0
-                done = False
+                terminated = False
+                truncated = False
                 info = {}
-                return obs, reward, done, info
+                return obs, reward, terminated, truncated, info
 
         env = CustomEnv()
         check_env(env)
 
@@ -55,6 +55,8 @@ jobs:
     - name: Type check
       run: |
         make type
+      # skip mypy type check for python3.7 (result is different to all other versions)
+      if: "!(matrix.python-version == '3.7')"
     - name: Test with pytest
       run: |
         make pytest
@@ -1,41 +1,25 @@
 ARG PARENT_IMAGE
 FROM $PARENT_IMAGE
 ARG PYTORCH_DEPS=cpuonly
-ARG PYTHON_VERSION=3.7
+ARG PYTHON_VERSION=3.8
+ARG MAMBA_DOCKERFILE_ACTIVATE=1  # (otherwise python will not be found)
 
-RUN apt-get update && apt-get install -y --no-install-recommends \
-         build-essential \
-         cmake \
-         git \
-         curl \
-         ca-certificates \
-         libjpeg-dev \
-         libpng-dev \
-         libglib2.0-0 && \
-     rm -rf /var/lib/apt/lists/*
+# Install micromamba env and dependencies
+RUN micromamba install -n base -y python=$PYTHON_VERSION \
+    pytorch $PYTORCH_DEPS -c conda-forge -c pytorch -c nvidia && \
+    micromamba clean --all --yes
 
-# Install Anaconda and dependencies
-RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-     chmod +x ~/miniconda.sh && \
-     ~/miniconda.sh -b -p /opt/conda && \
-     rm ~/miniconda.sh && \
-     /opt/conda/bin/conda install -y python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl mkl-include && \
-     /opt/conda/bin/conda install -y pytorch $PYTORCH_DEPS -c pytorch && \
-     /opt/conda/bin/conda clean -ya
-ENV PATH /opt/conda/bin:$PATH
-
-ENV CODE_DIR /root/code
+ENV CODE_DIR /home/$MAMBA_USER
 
 # Copy setup file only to install dependencies
-COPY ./setup.py ${CODE_DIR}/stable-baselines3/setup.py
-COPY ./stable_baselines3/version.txt ${CODE_DIR}/stable-baselines3/stable_baselines3/version.txt
+COPY --chown=$MAMBA_USER:$MAMBA_USER ./setup.py ${CODE_DIR}/stable-baselines3/setup.py
+COPY --chown=$MAMBA_USER:$MAMBA_USER ./stable_baselines3/version.txt ${CODE_DIR}/stable-baselines3/stable_baselines3/version.txt
 
-RUN \
-    cd ${CODE_DIR}/stable-baselines3 3&& \
+RUN cd ${CODE_DIR}/stable-baselines3 && \
     pip install -e .[extra,tests,docs] && \
     # Use headless version for docker
     pip uninstall -y opencv-python && \
     pip install opencv-python-headless && \
-    rm -rf $HOME/.cache/pip
+    pip cache purge
 
 CMD /bin/bash
@@ -10,6 +10,12 @@ pytype:
 mypy:
 	mypy ${LINT_PATHS}
 
+missing-annotations:
+	mypy --disallow-untyped-calls --disallow-untyped-defs --ignore-missing-imports stable_baselines3
+
+# missing docstrings
+# pylint -d R,C,W,E -e C0116 stable_baselines3 -j 4
+
 type: pytype mypy
 
 lint:
 
@@ -4,11 +4,11 @@ channels:
   - defaults
 dependencies:
   - cpuonly=1.0=0
-  - pip=21.1
+  - pip=22.3.1
   - python=3.7
-  - pytorch=1.11=py3.7_cpu_0
+  - pytorch=1.11.0=py3.7_cpu_0
   - pip:
-    - gym==0.21
+    - gymnasium
     - cloudpickle
     - opencv-python-headless
     - pandas
 
@@ -210,7 +210,7 @@ It will save the best model if ``best_model_save_path`` folder is specified and
 
 .. code-block:: python
 
-    import gym
+    import gymnasium as gym
 
     from stable_baselines3 import SAC
     from stable_baselines3.common.callbacks import EvalCallback
@@ -260,7 +260,7 @@ Alternatively, you can pass directly a list of callbacks to the ``learn()`` meth
 
 .. code-block:: python
 
-    import gym
+    import gymnasium as gym
 
     from stable_baselines3 import SAC
     from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
@@ -290,7 +290,7 @@ It must be used with the :ref:`EvalCallback` and use the event triggered by a ne
 
 .. code-block:: python
 
-    import gym
+    import gymnasium as gym
 
     from stable_baselines3 import SAC
     from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
@@ -322,7 +322,7 @@ An :ref:`EventCallback` that will trigger its child callback every ``n_steps`` t
 
 .. code-block:: python
 
-  import gym
+  import gymnasium as gym
 
   from stable_baselines3 import PPO
   from stable_baselines3.common.callbacks import CheckpointCallback, EveryNTimesteps
@@ -379,7 +379,7 @@ It must be used with the :ref:`EvalCallback` and use the event triggered after e
 
 .. code-block:: python
 
-    import gym
+    import gymnasium as gym
 
     from stable_baselines3 import SAC
     from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement
 
@@ -100,8 +100,8 @@ It will monitor the actions, observations, and rewards, indicating what action o
 
 .. code-block:: python
 
-  import gym
-  from gym import spaces
+  import gymnasium as gym
+  from gymnasium import spaces
   import numpy as np
 
   from stable_baselines3 import PPO
@@ -129,7 +129,7 @@ It will monitor the actions, observations, and rewards, indicating what action o
       def reset(self):
           return [0.0]
 
-      def render(self, mode="human", close=False):
+      def render(self, close=False):
           pass
 
   # Create environment
 
@@ -26,9 +26,9 @@ That is to say, your environment must implement the following methods (and inher
 
 .. code-block:: python
 
-  import gym
+  import gymnasium as gym
   import numpy as np
-  from gym import spaces
+  from gymnasium import spaces
 
 
   class CustomEnv(gym.Env):
@@ -54,7 +54,7 @@ That is to say, your environment must implement the following methods (and inher
           ...
           return observation  # reward, done, info can't be included
 
-      def render(self, mode="human"):
+      def render(self):
           ...
 
       def close(self):
@@ -91,7 +91,7 @@ Optionally, you can also register the environment with gym, that will allow you
 
 .. code-block:: python
 
-	from gym.envs.registration import register
+	from gymnasium.envs.registration import register
 	# Example for the CartPole environment
 	register(
 	    # unique identifier for the env `name-version`
 
@@ -101,7 +101,7 @@ using ``policy_kwargs`` parameter:
 
 .. code-block:: python
 
-  import gym
+  import gymnasium as gym
   import torch as th
 
   from stable_baselines3 import PPO
@@ -143,7 +143,7 @@ that derives from ``BaseFeaturesExtractor`` and then pass it to the model when t
 
   import torch as th
   import torch.nn as nn
-  from gym import spaces
+  from gymnasium import spaces
 
   from stable_baselines3 import PPO
   from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
@@ -208,7 +208,7 @@ downsampling and "vector" with a single linear layer.
 
 .. code-block:: python
 
-  import gym
+  import gymnasium as gym
   import torch as th
   from torch import nn
 
@@ -308,7 +308,7 @@ If your task requires even more granular control over the policy/value architect
 
   from typing import Callable, Dict, List, Optional, Tuple, Type, Union
 
-  from gym import spaces
+  from gymnasium import spaces
   import torch as th
   from torch import nn