From c390a6bc0452bc99b498a918e42a350cfc4524ae Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Wed, 8 Jun 2022 17:19:50 +0100
Subject: [PATCH 01/22] Allows a new RNG to be generated with seed=-1 and
 updated env_checker to fix bug if environment doesn't use np_random in reset

---
 gym/core.py              | 4 +++-
 gym/utils/env_checker.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gym/core.py b/gym/core.py
index c3d06cd8801..8bf926c970e 100644
--- a/gym/core.py
+++ b/gym/core.py
@@ -178,7 +178,9 @@ def reset(
                 the ``info`` returned by :meth:`step`.
         """
         # Initialize the RNG if the seed is manually passed
-        if seed is not None:
+        if seed == -1:
+            self._np_random, seed = seeding.np_random(None)
+        elif seed is not None:
             self._np_random, seed = seeding.np_random(seed)
 
     # TODO: remove kwarg mode with gym 1.0
diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py
index 9fe621ffaf9..c52959e670b 100644
--- a/gym/utils/env_checker.py
+++ b/gym/utils/env_checker.py
@@ -77,7 +77,7 @@ def check_reset_seed(env: gym.Env):
             seed_123_rng = deepcopy(env.unwrapped.np_random)
 
             # Note: for some environment, they may initialise at the same state, therefore we cannot check the obs_1 != obs_3
-            obs_4 = env.reset(seed=None)
+            obs_4 = env.reset(seed=-1)
             assert obs_4 in env.observation_space
 
             assert (

From 654476ee08b9f6ccd765f4d6dec66ea26db84766 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Wed, 8 Jun 2022 17:56:43 +0100
Subject: [PATCH 02/22] Revert "fixed `gym.vector.make` where the checker was
 being applied in the opposite case than was intended to (#2871)"

This reverts commit 519dfd9117e98e4f52d38064d2b0f79974fb676d.
---
 gym/vector/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gym/vector/__init__.py b/gym/vector/__init__.py
index b71ec11940d..89c2a80b229 100644
--- a/gym/vector/__init__.py
+++ b/gym/vector/__init__.py
@@ -63,6 +63,7 @@ def _make_env():
         return _make_env
 
     env_fns = [
-        create_env(disable_env_checker or env_num > 0) for env_num in range(num_envs)
+        create_env(env_num == 0 and disable_env_checker is False)
+        for env_num in range(num_envs)
     ]
     return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns)

From 2e5dc9c9a9fa42adbca908e664b185613eb4c98f Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Mon, 13 Jun 2022 12:29:06 +0100
Subject: [PATCH 03/22] Remove bad pushed commits

---
 gym/core.py              | 4 +---
 gym/utils/env_checker.py | 2 +-
 gym/vector/__init__.py   | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/gym/core.py b/gym/core.py
index 8bf926c970e..c3d06cd8801 100644
--- a/gym/core.py
+++ b/gym/core.py
@@ -178,9 +178,7 @@ def reset(
                 the ``info`` returned by :meth:`step`.
         """
         # Initialize the RNG if the seed is manually passed
-        if seed == -1:
-            self._np_random, seed = seeding.np_random(None)
-        elif seed is not None:
+        if seed is not None:
             self._np_random, seed = seeding.np_random(seed)
 
     # TODO: remove kwarg mode with gym 1.0
diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py
index c52959e670b..9fe621ffaf9 100644
--- a/gym/utils/env_checker.py
+++ b/gym/utils/env_checker.py
@@ -77,7 +77,7 @@ def check_reset_seed(env: gym.Env):
             seed_123_rng = deepcopy(env.unwrapped.np_random)
 
             # Note: for some environment, they may initialise at the same state, therefore we cannot check the obs_1 != obs_3
-            obs_4 = env.reset(seed=-1)
+            obs_4 = env.reset(seed=None)
             assert obs_4 in env.observation_space
 
             assert (
diff --git a/gym/vector/__init__.py b/gym/vector/__init__.py
index 89c2a80b229..b71ec11940d 100644
--- a/gym/vector/__init__.py
+++ b/gym/vector/__init__.py
@@ -63,7 +63,6 @@ def _make_env():
         return _make_env
 
     env_fns = [
-        create_env(env_num == 0 and disable_env_checker is False)
-        for env_num in range(num_envs)
+        create_env(disable_env_checker or env_num > 0) for env_num in range(num_envs)
     ]
     return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns)

From 400b1e9f0277f8b80a1add71764c4c94c45a7c12 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 17 Jun 2022 11:27:27 +0100
Subject: [PATCH 04/22] Fixed spelling in core.py

---
 gym/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym/core.py b/gym/core.py
index 2b507322e45..931770d8cbe 100644
--- a/gym/core.py
+++ b/gym/core.py
@@ -231,7 +231,7 @@ def seed(self, seed=None):
             there aren't accidental correlations between multiple generators.
 
         Args:
-            seed(Optional int): The seed value for the random number geneartor
+            seed(Optional int): The seed value for the random number generator
 
         Returns:
             seeds (List[int]): Returns the list of seeds used in this environment's random

From 4281c760af05bd08d3ef337700f309d8baf26219 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 17 Jun 2022 11:39:33 +0100
Subject: [PATCH 05/22] Pins pytest to the last py 3.6 version

---
 py.Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py.Dockerfile b/py.Dockerfile
index 0d4c290cb75..e7db5e8fe91 100644
--- a/py.Dockerfile
+++ b/py.Dockerfile
@@ -14,6 +14,6 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin
 COPY . /usr/local/gym/
 WORKDIR /usr/local/gym/
 
-RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest mock ; else pip install .[testing] ; fi
+RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest==6.2.5 ; else pip install .[testing] ; fi
 
 ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]

From 5dee690aac30be083f2582b24b837cf85a334859 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 17 Jun 2022 14:19:58 +0100
Subject: [PATCH 06/22] Add support for action masking in
 Space.sample(mask=...)

---
 gym/envs/toy_text/taxi.py    | 25 +++++++++++++++++++++++--
 gym/spaces/box.py            |  5 ++++-
 gym/spaces/dict.py           | 14 +++++++++++++-
 gym/spaces/discrete.py       | 11 ++++++++++-
 gym/spaces/graph.py          |  8 +++++++-
 gym/spaces/multi_binary.py   |  8 +++++++-
 gym/spaces/multi_discrete.py | 16 +++++++++++++++-
 gym/spaces/space.py          |  2 +-
 gym/spaces/tuple.py          | 12 ++++++++++--
 py.Dockerfile                |  2 +-
 10 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
index abdccd8cab0..4a6b7930364 100644
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -214,6 +214,22 @@ def decode(self, i):
         assert 0 <= i < 5
         return reversed(out)
 
+    def valid_mask(self, row, col, pass_loc, dest_idx, max_row):
+        mask = np.zeros(6, dtype=bool)
+        if row < max_row:
+            mask[0] = 1
+        if row > 0:
+            mask[1] = 1
+        if self.desc[1 + row, 2 * col + 2] == b":":
+            mask[2] = 1
+        if self.desc[1 + row, 2 * col] == b":":
+            mask[3] = 1
+        if (row, col) == self.locs[pass_loc]:
+            mask[4] = 1
+        if (row, col) == self.locs[dest_idx]:
+            mask[5] = 1
+        return mask
+
     def step(self, a):
         transitions = self.P[self.s][a]
         i = categorical_sample([t[0] for t in transitions], self.np_random)
@@ -221,7 +237,10 @@ def step(self, a):
         self.s = s
         self.lastaction = a
         self.renderer.render_step()
-        return (int(s), r, d, {"prob": p})
+
+        taxi_row, taxi_col, pass_loc, dest_idx = self.decode(s)
+        mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4)
+        return int(s), r, d, {"prob": p, "action_mask": mask}
 
     def reset(
         self,
@@ -239,7 +258,9 @@ def reset(
         if not return_info:
             return int(self.s)
         else:
-            return int(self.s), {"prob": 1}
+            taxi_row, taxi_col, pass_loc, dest_idx = self.decode(self.s)
+            mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4)
+            return int(self.s), {"prob": 1, "action_mask": mask}
 
     def render(self, mode="human"):
         if self.render_mode is not None:
diff --git a/gym/spaces/box.py b/gym/spaces/box.py
index e9b62c0a27d..f43bb57e3bd 100644
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -146,7 +146,7 @@ def is_bounded(self, manner: str = "both") -> bool:
         else:
             raise ValueError("manner is not in {'below', 'above', 'both'}")
 
-    def sample(self) -> np.ndarray:
+    def sample(self, mask: np.ndarray = None) -> np.ndarray:
         r"""Generates a single random sample inside the Box.
 
         In creating a sample of the box, each coordinate is sampled (independently) from a distribution
@@ -160,6 +160,9 @@ def sample(self) -> np.ndarray:
         Returns:
             A sampled value from the Box
         """
+        if mask is not None:
+            return np.zeros(self.shape, self.dtype)
+
         high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1
         sample = np.empty(self.shape)
 
diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py
index 1aec7b0b01a..61e309c1495 100644
--- a/gym/spaces/dict.py
+++ b/gym/spaces/dict.py
@@ -1,6 +1,7 @@
 """Implementation of a space that represents the cartesian product of other spaces as a dictionary."""
 from collections import OrderedDict
 from collections.abc import Mapping, Sequence
+from typing import Dict
 from typing import Dict as TypingDict
 from typing import Optional, Union
 
@@ -137,7 +138,7 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list:
 
         return seeds
 
-    def sample(self) -> dict:
+    def sample(self, mask: Dict[str, np.ndarray] = None) -> dict:
         """Generates a single random sample from this space.
 
         The sample is an ordered dictionary of independent samples from the constituent spaces.
@@ -145,6 +146,17 @@ def sample(self) -> dict:
         Returns:
             A dictionary with the same key and sampled values from :attr:`self.spaces`
         """
+        if mask is not None:
+            assert isinstance(
+                mask, dict
+            ), f"Expects mask to be a dict, actual type: {type(dict)}"
+            assert (
+                mask.keys == self.keys()
+            ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.keys()}"
+            return OrderedDict(
+                [(k, space.sample(mask[k])) for k, space in self.spaces.items()]
+            )
+
         return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()])
 
     def contains(self, x) -> bool:
diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py
index f2ed1569ad1..6b32d971c80 100644
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -40,7 +40,7 @@ def __init__(
         self.start = int(start)
         super().__init__((), np.int64, seed)
 
-    def sample(self) -> int:
+    def sample(self, mask: np.ndarray = None) -> int:
         """Generates a single random sample from this space.
 
         A sample will be chosen uniformly at random.
@@ -48,6 +48,15 @@ def sample(self) -> int:
         Returns:
             A sampled integer from the space
         """
+        if mask is not None:
+            assert isinstance(mask, np.ndarray)
+            assert mask.dtype == np.int8
+            assert mask.shape == (self.n,)
+            if np.any(mask):
+                return int(self.start + self.np_random.choice(np.where(mask)))
+            else:
+                return self.start
+
         return int(self.start + self.np_random.integers(self.n))
 
     def contains(self, x) -> bool:
diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index c26b7ff08d5..fc751c173f9 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 
+import gym
 from gym.spaces.box import Box
 from gym.spaces.discrete import Discrete
 from gym.spaces.multi_discrete import MultiDiscrete
@@ -98,12 +99,17 @@ def _sample_sample_space(self, sample_space) -> Optional[np.ndarray]:
         else:
             return None
 
-    def sample(self) -> NamedTuple:
+    def sample(self, mask=None) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
         """
+        if mask is not None:
+            raise gym.error.Error(
+                "Action masking for graphs are not supported at this time, please raise an issue on github."
+            )
+
         num_nodes = self.np_random.integers(low=1, high=10)
 
         # we only have edges when we have at least 2 nodes
diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py
index 1439536012b..5c93731072c 100644
--- a/gym/spaces/multi_binary.py
+++ b/gym/spaces/multi_binary.py
@@ -51,7 +51,7 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than gym.Space - never None."""
         return self._shape  # type: ignore
 
-    def sample(self) -> np.ndarray:
+    def sample(self, mask: np.ndarray = None) -> np.ndarray:
         """Generates a single random sample from this space.
 
         A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space).
@@ -59,6 +59,12 @@ def sample(self) -> np.ndarray:
         Returns:
             Sampled values from space
         """
+        if mask is not None:
+            assert isinstance(mask, np.ndarray)
+            assert mask.dtype == np.int8
+            assert mask.shape == self.shape
+            return mask * self.np_random.integers(0, 2, self.n, self.dtype)
+
         return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype)
 
     def contains(self, x) -> bool:
diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index cb43420157b..465c57516c3 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -63,8 +63,22 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than :class:`gym.Space` - never None."""
         return self._shape  # type: ignore
 
-    def sample(self) -> np.ndarray:
+    def sample(self, mask: np.ndarray = None) -> np.ndarray:
         """Generates a single random sample this space."""
+        if mask is not None:
+            assert isinstance(mask, np.ndarray)
+            assert mask.dtype == np.int8
+            assert mask.shape == self.shape
+
+            multi_mask = [np.where(row) for row in mask]
+            return np.array(
+                [
+                    self.np_random.choice(row_mask) if len(row_mask) > 0 else 0
+                    for row_mask in multi_mask
+                ],
+                dtype=self.dtype,
+            )
+
         return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype)
 
     def contains(self, x) -> bool:
diff --git a/gym/spaces/space.py b/gym/spaces/space.py
index 204a2dd4413..b5680e18942 100644
--- a/gym/spaces/space.py
+++ b/gym/spaces/space.py
@@ -81,7 +81,7 @@ def shape(self) -> Optional[Tuple[int, ...]]:
         """Return the shape of the space as an immutable property."""
         return self._shape
 
-    def sample(self) -> T_cov:
+    def sample(self, mask=None) -> T_cov:
         """Randomly sample an element of this space. Can be uniform or non-uniform sampling based on boundedness of space."""
         raise NotImplementedError
 
diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py
index 51eef635164..c8de59defc1 100644
--- a/gym/spaces/tuple.py
+++ b/gym/spaces/tuple.py
@@ -1,5 +1,5 @@
 """Implementation of a space that represents the cartesian product of other spaces."""
-from typing import Iterable, List, Optional, Sequence, Union
+from typing import Iterable, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
@@ -72,7 +72,7 @@ def seed(self, seed: Optional[Union[int, List[int]]] = None) -> list:
 
         return seeds
 
-    def sample(self) -> tuple:
+    def sample(self, mask: Tuple[np.ndarray] = None) -> tuple:
         """Generates a single random sample inside this space.
 
         This method draws independent samples from the subspaces.
@@ -80,6 +80,14 @@ def sample(self) -> tuple:
         Returns:
             Tuple of the subspace's samples
         """
+        if mask is not None:
+            assert isinstance(mask, tuple)
+            assert len(mask) == len(self.spaces)
+            return tuple(
+                space.sample(mask=sub_mask)
+                for space, sub_mask in zip(self.spaces, mask)
+            )
+
         return tuple(space.sample() for space in self.spaces)
 
     def contains(self, x) -> bool:
diff --git a/py.Dockerfile b/py.Dockerfile
index e7db5e8fe91..0d4c290cb75 100644
--- a/py.Dockerfile
+++ b/py.Dockerfile
@@ -14,6 +14,6 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin
 COPY . /usr/local/gym/
 WORKDIR /usr/local/gym/
 
-RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest==6.2.5 ; else pip install .[testing] ; fi
+RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest mock ; else pip install .[testing] ; fi
 
 ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]

From bc6ab4a729627393aff90368220d4c35424807c1 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 17 Jun 2022 15:28:28 +0100
Subject: [PATCH 07/22] Fix action mask

---
 gym/envs/toy_text/taxi.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
index 4a6b7930364..6287a7b0972 100644
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -214,19 +214,20 @@ def decode(self, i):
         assert 0 <= i < 5
         return reversed(out)
 
-    def valid_mask(self, row, col, pass_loc, dest_idx, max_row):
+    def action_mask(self, row, col, pass_loc, dest_idx):
+        """Computes an action mask for the action space using the state information."""
         mask = np.zeros(6, dtype=bool)
-        if row < max_row:
+        if row < 5:
             mask[0] = 1
         if row > 0:
             mask[1] = 1
-        if self.desc[1 + row, 2 * col + 2] == b":":
+        if col < 5 and self.desc[1 + row, 2 * col + 2] == b":":
             mask[2] = 1
-        if self.desc[1 + row, 2 * col] == b":":
+        if col > 0 and self.desc[1 + row, 2 * col] == b":":
             mask[3] = 1
-        if (row, col) == self.locs[pass_loc]:
+        if pass_loc < 4 and (row, col) == self.locs[pass_loc]:
             mask[4] = 1
-        if (row, col) == self.locs[dest_idx]:
+        if pass_loc == 4 and (row, col) == self.locs[dest_idx]:
             mask[5] = 1
         return mask
 
@@ -239,7 +240,7 @@ def step(self, a):
         self.renderer.render_step()
 
         taxi_row, taxi_col, pass_loc, dest_idx = self.decode(s)
-        mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4)
+        mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx)
         return int(s), r, d, {"prob": p, "action_mask": mask}
 
     def reset(
@@ -259,7 +260,7 @@ def reset(
             return int(self.s)
         else:
             taxi_row, taxi_col, pass_loc, dest_idx = self.decode(self.s)
-            mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4)
+            mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx)
             return int(self.s), {"prob": 1, "action_mask": mask}
 
     def render(self, mode="human"):

From 1700e9d63dd40b9537bedb0cb85132a98c61e61a Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 17 Jun 2022 17:41:05 +0100
Subject: [PATCH 08/22] Fix action_mask

---
 gym/envs/toy_text/taxi.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
index 6287a7b0972..12f12e15892 100644
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -214,20 +214,24 @@ def decode(self, i):
         assert 0 <= i < 5
         return reversed(out)
 
-    def action_mask(self, row, col, pass_loc, dest_idx):
+    def action_mask(self, state: int):
         """Computes an action mask for the action space using the state information."""
-        mask = np.zeros(6, dtype=bool)
-        if row < 5:
+        mask = np.zeros(6, dtype=np.int8)
+        taxi_row, taxi_col, pass_loc, dest_idx = self.decode(state)
+        if taxi_row < 4:
             mask[0] = 1
-        if row > 0:
+        if taxi_row > 0:
             mask[1] = 1
-        if col < 5 and self.desc[1 + row, 2 * col + 2] == b":":
+        if taxi_col < 4 and self.desc[taxi_row + 1, 2 * taxi_col + 2] == b":":
             mask[2] = 1
-        if col > 0 and self.desc[1 + row, 2 * col] == b":":
+        if taxi_col > 0 and self.desc[taxi_row + 1, 2 * taxi_col] == b":":
             mask[3] = 1
-        if pass_loc < 4 and (row, col) == self.locs[pass_loc]:
+        if pass_loc < 4 and (taxi_row, taxi_col) == self.locs[pass_loc]:
             mask[4] = 1
-        if pass_loc == 4 and (row, col) == self.locs[dest_idx]:
+        if pass_loc == 4 and (
+            (taxi_row, taxi_col) == self.locs[dest_idx]
+            or (taxi_row, taxi_col) in self.locs
+        ):
             mask[5] = 1
         return mask
 
@@ -239,9 +243,7 @@ def step(self, a):
         self.lastaction = a
         self.renderer.render_step()
 
-        taxi_row, taxi_col, pass_loc, dest_idx = self.decode(s)
-        mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx)
-        return int(s), r, d, {"prob": p, "action_mask": mask}
+        return int(s), r, d, {"prob": p, "action_mask": self.action_mask(s)}
 
     def reset(
         self,
@@ -259,9 +261,7 @@ def reset(
         if not return_info:
             return int(self.s)
         else:
-            taxi_row, taxi_col, pass_loc, dest_idx = self.decode(self.s)
-            mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx)
-            return int(self.s), {"prob": 1, "action_mask": mask}
+            return int(self.s), {"prob": 1, "action_mask": self.action_mask(self.s)}
 
     def render(self, mode="human"):
         if self.render_mode is not None:

From 7f46df2db4ce00e22309ec2a9eccf18eef840bdc Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 17 Jun 2022 17:41:25 +0100
Subject: [PATCH 09/22] Fix action_mask

---
 tests/envs/test_env_implementation.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py
index 844d422c12a..7a088d4698c 100644
--- a/tests/envs/test_env_implementation.py
+++ b/tests/envs/test_env_implementation.py
@@ -3,6 +3,7 @@
 import gym
 from gym.envs.box2d import BipedalWalker
 from gym.envs.box2d.lunar_lander import demo_heuristic_lander
+from gym.envs.toy_text import TaxiEnv
 from gym.envs.toy_text.frozen_lake import generate_random_map
 
 
@@ -80,3 +81,24 @@ def test_frozenlake_dfs_map_generation(map_size: int):
                     if new_frozenlake[new_row][new_col] not in "#H":
                         frontier.append((new_row, new_col))
     raise AssertionError("No path through the frozenlake was found.")
+
+
+def test_taxi_action_mask():
+    env = TaxiEnv()
+
+    for state in env.P:
+        mask = env.action_mask(state)
+        for action, possible in enumerate(mask):
+            _, next_state, _, _ = env.P[state][action][0]
+            assert state != next_state if possible else state == next_state
+
+
+def test_taxi_encode_decode():
+    env = TaxiEnv()
+
+    state = env.reset()
+    for _ in range(100):
+        assert (
+            env.encode(*env.decode(state)) == state
+        ), f"{state=}, encode decode: {env.encode(*env.decode(state))}"
+        state, _, _, _ = env.step(env.action_space.sample())

From cd910072066d16744178d96197068ed4c009ea11 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Sun, 19 Jun 2022 18:15:41 +0100
Subject: [PATCH 10/22] Added docstrings, fixed bugs and added taxi examples

---
 gym/envs/toy_text/taxi.py    |  8 +++-
 gym/spaces/box.py            | 10 ++++-
 gym/spaces/dict.py           | 12 +++---
 gym/spaces/discrete.py       | 26 ++++++++----
 gym/spaces/graph.py          | 34 ++++++++--------
 gym/spaces/multi_binary.py   | 21 ++++++++--
 gym/spaces/multi_discrete.py | 51 +++++++++++++++---------
 gym/spaces/space.py          | 14 ++++++-
 gym/spaces/tuple.py          | 14 +++++--
 tests/spaces/test_spaces.py  | 77 +++++++++++++++++++++++++++---------
 10 files changed, 191 insertions(+), 76 deletions(-)

diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
index 12f12e15892..ce831b095fb 100644
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -56,6 +56,12 @@ class TaxiEnv(Env):
     - 4: pickup passenger
     - 5: drop off passenger
 
+    For some cases, taking these actions will have no effect on the state of the agent.
+    In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying
+    if the action will change the state.
+    To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``
+    Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``.
+
     ### Observations
     There are 500 discrete states since there are 25 taxi positions, 5 possible
     locations of the passenger (including the case when the passenger is in the
@@ -99,7 +105,7 @@ class TaxiEnv(Env):
     ```
 
     ### Version History
-    * v3: Map Correction + Cleaner Domain Description
+    * v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information
     * v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold.
     * v1: Remove (3,2) from locs, add passidx<4 check
     * v0: Initial versions release
diff --git a/gym/spaces/box.py b/gym/spaces/box.py
index f43bb57e3bd..35fbeb2d258 100644
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 
+import gym.error
 from gym import logger
 from gym.spaces.space import Space
 from gym.utils import seeding
@@ -146,7 +147,7 @@ def is_bounded(self, manner: str = "both") -> bool:
         else:
             raise ValueError("manner is not in {'below', 'above', 'both'}")
 
-    def sample(self, mask: np.ndarray = None) -> np.ndarray:
+    def sample(self, mask: None = None) -> np.ndarray:
         r"""Generates a single random sample inside the Box.
 
         In creating a sample of the box, each coordinate is sampled (independently) from a distribution
@@ -157,11 +158,16 @@ def sample(self, mask: np.ndarray = None) -> np.ndarray:
         * :math:`(-\infty, b]` : shifted negative exponential distribution
         * :math:`(-\infty, \infty)` : normal distribution
 
+        Args:
+            mask: A mask for sampling values from the Box space, currently unsupported.
+
         Returns:
             A sampled value from the Box
         """
         if mask is not None:
-            return np.zeros(self.shape, self.dtype)
+            raise gym.error.Error(
+                f"Box.sample cannot be provided a mask, actual value: {mask}"
+            )
 
         high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1
         sample = np.empty(self.shape)
diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py
index 61e309c1495..ec668e44347 100644
--- a/gym/spaces/dict.py
+++ b/gym/spaces/dict.py
@@ -1,7 +1,6 @@
 """Implementation of a space that represents the cartesian product of other spaces as a dictionary."""
 from collections import OrderedDict
 from collections.abc import Mapping, Sequence
-from typing import Dict
 from typing import Dict as TypingDict
 from typing import Optional, Union
 
@@ -138,21 +137,24 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list:
 
         return seeds
 
-    def sample(self, mask: Dict[str, np.ndarray] = None) -> dict:
+    def sample(self, mask: Optional[TypingDict[str, np.ndarray]] = None) -> dict:
         """Generates a single random sample from this space.
 
         The sample is an ordered dictionary of independent samples from the constituent spaces.
 
+        Args:
+            mask: An optional mask for each of the subspaces, expects the same keys as the space
+
         Returns:
             A dictionary with the same key and sampled values from :attr:`self.spaces`
         """
         if mask is not None:
             assert isinstance(
                 mask, dict
-            ), f"Expects mask to be a dict, actual type: {type(dict)}"
+            ), f"Expects mask to be a dict, actual type: {type(mask)}"
             assert (
-                mask.keys == self.keys()
-            ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.keys()}"
+                mask.keys() == self.spaces.keys()
+            ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.spaces.keys()}"
             return OrderedDict(
                 [(k, space.sample(mask[k])) for k, space in self.spaces.items()]
             )
diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py
index 6b32d971c80..be9361194de 100644
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -40,20 +40,32 @@ def __init__(
         self.start = int(start)
         super().__init__((), np.int64, seed)
 
-    def sample(self, mask: np.ndarray = None) -> int:
+    def sample(self, mask: Optional[np.ndarray] = None) -> int:
         """Generates a single random sample from this space.
 
-        A sample will be chosen uniformly at random.
+        A sample will be chosen uniformly at random with the mask if provided
+
+        Args:
+            mask: An optional mask for if an action can be selected. Expected shape is (n,). If not possible actions, will default to `space.start`
 
         Returns:
             A sampled integer from the space
         """
         if mask is not None:
-            assert isinstance(mask, np.ndarray)
-            assert mask.dtype == np.int8
-            assert mask.shape == (self.n,)
-            if np.any(mask):
-                return int(self.start + self.np_random.choice(np.where(mask)))
+            assert isinstance(
+                mask, np.ndarray
+            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
+            assert (
+                mask.dtype == np.int8
+            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+            assert mask.shape == (
+                self.n,
+            ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}"
+            assert np.all(
+                np.logical_or(mask == 0, mask == 1)
+            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+            if np.any(mask == 1):
+                return int(self.start + self.np_random.choice(np.where(mask)[0]))
             else:
                 return self.start
 
diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index fc751c173f9..44bb58f95e4 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -1,10 +1,9 @@
 """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space."""
 from collections import namedtuple
-from typing import NamedTuple, Optional, Sequence, Union
+from typing import NamedTuple, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
-import gym
 from gym.spaces.box import Box
 from gym.spaces.discrete import Discrete
 from gym.spaces.multi_discrete import MultiDiscrete
@@ -93,23 +92,18 @@ def _generate_sample_space(
                 f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error."
             )
 
-    def _sample_sample_space(self, sample_space) -> Optional[np.ndarray]:
-        if sample_space is not None:
-            return sample_space.sample()
-        else:
-            return None
-
-    def sample(self, mask=None) -> NamedTuple:
+    def sample(
+        self, mask: Optional[Tuple[Optional[np.ndarray], Optional[np.ndarray]]] = None
+    ) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
+        Args:
+            mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces)
+
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
         """
-        if mask is not None:
-            raise gym.error.Error(
-                "Action masking for graphs are not supported at this time, please raise an issue on github."
-            )
-
+        node_mask, edge_mask = mask if mask is not None else (None, None)
         num_nodes = self.np_random.integers(low=1, high=10)
 
         # we only have edges when we have at least 2 nodes
@@ -121,8 +115,16 @@ def sample(self, mask=None) -> NamedTuple:
         node_sample_space = self._generate_sample_space(self.node_space, num_nodes)
         edge_sample_space = self._generate_sample_space(self.edge_space, num_edges)
 
-        sampled_nodes = self._sample_sample_space(node_sample_space)
-        sampled_edges = self._sample_sample_space(edge_sample_space)
+        sampled_nodes = (
+            node_sample_space.sample(node_mask)
+            if node_sample_space is not None
+            else None
+        )
+        sampled_edges = (
+            edge_sample_space.sample(edge_mask)
+            if edge_sample_space is not None
+            else None
+        )
 
         sampled_edge_links = None
         if sampled_edges is not None and num_edges > 0:
diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py
index 5c93731072c..dd1a62c11e5 100644
--- a/gym/spaces/multi_binary.py
+++ b/gym/spaces/multi_binary.py
@@ -51,18 +51,31 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than gym.Space - never None."""
         return self._shape  # type: ignore
 
-    def sample(self, mask: np.ndarray = None) -> np.ndarray:
+    def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
         """Generates a single random sample from this space.
 
         A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space).
 
+        Args:
+            mask: An optional np.ndarray to mask samples, where mask == 0 will have samples == 0
+
         Returns:
             Sampled values from space
         """
         if mask is not None:
-            assert isinstance(mask, np.ndarray)
-            assert mask.dtype == np.int8
-            assert mask.shape == self.shape
+            assert isinstance(
+                mask, np.ndarray
+            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
+            assert (
+                mask.dtype == np.int8
+            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+            assert (
+                mask.shape == self.shape
+            ), f"The expected shape of the mask is {self.shape}, actual shape: {mask.shape}"
+            assert np.all(
+                np.logical_or(mask == 0, mask == 1)
+            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+
             return mask * self.np_random.integers(0, 2, self.n, self.dtype)
 
         return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype)
diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index 465c57516c3..5eec3a30437 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -23,8 +23,17 @@ class MultiDiscrete(Space[np.ndarray]):
     2. Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
     3. Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
 
-    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])``
+    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample is array([3, 1, 0])
 
+    Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes
+    if ``nvec`` has several axes:
+
+    Example::
+
+        >> d = MultiDiscrete(np.array([[1, 2], [3, 4]]))
+        >> d.sample()
+        array([[0, 0],
+               [2, 3]])
     """
 
     def __init__(
@@ -37,16 +46,6 @@ def __init__(
 
         The argument ``nvec`` will determine the number of values each categorical variable can take.
 
-        Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes
-        if ``nvec`` has several axes:
-
-        Example::
-
-            >> d = MultiDiscrete(np.array([[1, 2], [3, 4]]))
-            >> d.sample()
-            array([[0, 0],
-                   [2, 3]])
-
         Args:
             nvec: vector of counts of each categorical variable. This will usually be a list of integers. However,
                 you may also pass a more complicated numpy array if you'd like the space to have several axes.
@@ -63,14 +62,30 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than :class:`gym.Space` - never None."""
         return self._shape  # type: ignore
 
-    def sample(self, mask: np.ndarray = None) -> np.ndarray:
-        """Generates a single random sample this space."""
-        if mask is not None:
-            assert isinstance(mask, np.ndarray)
-            assert mask.dtype == np.int8
-            assert mask.shape == self.shape
+    def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
+        """Generates a single random sample this space.
+
+        Args:
+            mask: An optional mask for multi-discrete, expected shape is `space.nvec`. If there are no possible actions, defaults to 0
 
-            multi_mask = [np.where(row) for row in mask]
+        Returns:
+            An np.ndarray of shape `space.shape`
+        """
+        if mask is not None:
+            assert isinstance(
+                mask, np.ndarray
+            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
+            assert (
+                mask.dtype == np.int8
+            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+            assert np.all(
+                mask.shape == self.nvec
+            ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently."
+            assert np.all(
+                np.logical_or(mask == 0, mask == 1)
+            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+
+            multi_mask = [np.where(row)[0] for row in mask]
             return np.array(
                 [
                     self.np_random.choice(row_mask) if len(row_mask) > 0 else 0
diff --git a/gym/spaces/space.py b/gym/spaces/space.py
index b5680e18942..3b006228b9e 100644
--- a/gym/spaces/space.py
+++ b/gym/spaces/space.py
@@ -1,6 +1,7 @@
 """Implementation of the `Space` metaclass."""
 
 from typing import (
+    Any,
     Generic,
     Iterable,
     List,
@@ -81,8 +82,17 @@ def shape(self) -> Optional[Tuple[int, ...]]:
         """Return the shape of the space as an immutable property."""
         return self._shape
 
-    def sample(self, mask=None) -> T_cov:
-        """Randomly sample an element of this space. Can be uniform or non-uniform sampling based on boundedness of space."""
+    def sample(self, mask: Optional[Any] = None) -> T_cov:
+        """Randomly sample an element of this space.
+
+        Can be uniform or non-uniform sampling based on boundedness of space.
+
+        Args:
+            mask: A mask used for sampling, see Space for implementation details.
+
+        Returns:
+            A sampled actions from the space
+        """
         raise NotImplementedError
 
     def seed(self, seed: Optional[int] = None) -> list:
diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py
index c8de59defc1..57bc6b40cab 100644
--- a/gym/spaces/tuple.py
+++ b/gym/spaces/tuple.py
@@ -72,17 +72,25 @@ def seed(self, seed: Optional[Union[int, List[int]]] = None) -> list:
 
         return seeds
 
-    def sample(self, mask: Tuple[np.ndarray] = None) -> tuple:
+    def sample(self, mask: Optional[Tuple[Optional[np.ndarray]]] = None) -> tuple:
         """Generates a single random sample inside this space.
 
         This method draws independent samples from the subspaces.
 
+        Args:
+            mask: An optional tuple of optional masks for each of the subspace's samples, expects the same number of masks as spaces
+
         Returns:
             Tuple of the subspace's samples
         """
         if mask is not None:
-            assert isinstance(mask, tuple)
-            assert len(mask) == len(self.spaces)
+            assert isinstance(
+                mask, tuple
+            ), f"Expected type of mask is tuple, actual type: {type(mask)}"
+            assert len(mask) == len(
+                self.spaces
+            ), f"Expected length of mask is {len(self.spaces)}, actual length: {len(mask)}"
+
             return tuple(
                 space.sample(mask=sub_mask)
                 for space, sub_mask in zip(self.spaces, mask)
diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py
index fbc98aa1fae..fded6654afe 100644
--- a/tests/spaces/test_spaces.py
+++ b/tests/spaces/test_spaces.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 
+from gym import Space
 from gym.spaces import Box, Dict, Discrete, Graph, MultiBinary, MultiDiscrete, Tuple
 
 
@@ -154,31 +155,71 @@ def test_inequality(spaces):
     [
         Discrete(5),
         Discrete(8, start=-20),
-        Box(low=0, high=255, shape=(2,), dtype="uint8"),
+        Box(low=0, high=255, shape=(2,)),
         Box(low=-np.inf, high=np.inf, shape=(3, 3)),
         Box(low=1.0, high=np.inf, shape=(3, 3)),
         Box(low=-np.inf, high=2.0, shape=(3, 3)),
+        Box(low=np.array([0, 2]), high=np.array([10, 4])),
+        MultiDiscrete([3, 5]),
+        MultiDiscrete(np.array([[3, 5], [2, 2]])),
+        MultiBinary([2, 4]),
     ],
 )
-def test_sample(space):
+def test_sample(space: Space, n_trials: int = 1_000):
+    """Test the space sample works, todo, add chi-squared testing for the distribution"""
     space.seed(0)
-    n_trials = 100
     samples = np.array([space.sample() for _ in range(n_trials)])
-    expected_mean = 0.0
-    if isinstance(space, Box):
-        if space.is_bounded():
-            expected_mean = (space.high + space.low) / 2
-        elif space.is_bounded("below"):
-            expected_mean = 1 + space.low
-        elif space.is_bounded("above"):
-            expected_mean = -1 + space.high
-        else:
-            expected_mean = 0.0
-    elif isinstance(space, Discrete):
-        expected_mean = space.start + space.n / 2
-    else:
-        raise NotImplementedError
-    np.testing.assert_allclose(expected_mean, samples.mean(), atol=3.0 * samples.std())
+    assert len(samples) == n_trials
+
+
+@pytest.mark.parametrize(
+    "space,mask",
+    [
+        (Discrete(5), np.array([0, 1, 1, 0, 1], dtype=np.int8)),
+        (Discrete(4, start=-20), np.array([1, 1, 0, 1], dtype=np.int8)),
+        (Discrete(4, start=1), np.array([0, 0, 0, 0], dtype=np.int8)),
+        (MultiDiscrete([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)),
+        # (MultiDiscrete(np.array([[3, 2], [2, 2]])), np.array([[[0, 1], [1, 1], [0, 0]], [[0, 1], [1, 1]]], dtype=np.int8)), Unsupported currently
+        (MultiBinary([2, 4]), np.array([[1, 1, 0, 0], [0, 0, 0, 0]], dtype=np.int8)),
+    ],
+)
+def test_space_sample_mask(space, mask, n_trials: int = 100):
+    """Test the space sample with mask works, todo, add chi-squared testing for the distribution"""
+    space.seed(0)
+    samples = np.array([space.sample(mask) for _ in range(n_trials)])
+    assert len(samples) == n_trials
+
+
+@pytest.mark.parametrize(
+    "space,mask",
+    [
+        (
+            Dict(a=Discrete(2), b=MultiDiscrete([2, 4])),
+            {
+                "a": np.array([0, 1], dtype=np.int8),
+                "b": np.array([[0, 0, 0, 0], [1, 1, 1, 0]], dtype=np.int8),
+            },
+        ),
+        (
+            Tuple([Box(0, 1, ()), Discrete(3), MultiBinary([2, 1])]),
+            (
+                None,
+                np.array([0, 1, 0], dtype=np.int8),
+                np.array([[0], [1]], dtype=np.int8),
+            ),
+        ),
+        (
+            Dict(a=Tuple([Box(0, 1, ()), Discrete(3)]), b=Discrete(3)),
+            {
+                "a": (None, np.array([1, 0, 0], dtype=np.int8)),
+                "b": np.array([0, 1, 1], dtype=np.int8),
+            },
+        ),
+    ],
+)
+def test_composite_space_sample_mask(space, mask):
+    """Test that composite space samples use the mask correctly."""
+    space.sample(mask)
 
 
 @pytest.mark.parametrize(

From be4063efbb74d285f357a15e8b3c08c58b705ee3 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Sun, 19 Jun 2022 22:47:50 +0100
Subject: [PATCH 11/22] Fixed bugs

---
 gym/envs/toy_text/taxi.py             | 22 ++++++++++-----
 gym/spaces/multi_discrete.py          | 40 +++++++++++++++------------
 tests/envs/test_env_implementation.py |  2 +-
 3 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
index ce831b095fb..ba578b3846e 100644
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -56,12 +56,6 @@ class TaxiEnv(Env):
     - 4: pickup passenger
     - 5: drop off passenger
 
-    For some cases, taking these actions will have no effect on the state of the agent.
-    In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying
-    if the action will change the state.
-    To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``
-    Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``.
-
     ### Observations
     There are 500 discrete states since there are 25 taxi positions, 5 possible
     locations of the passenger (including the case when the passenger is in the
@@ -93,6 +87,20 @@ class TaxiEnv(Env):
     - 2: Y(ellow)
     - 3: B(lue)
 
+    ### Info
+
+    ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask".
+
+    As Taxi is a stochastic environment for transitions then the "p" key represents the probability of the
+    transition. However, this value is permanently 1.0 for an unknown reason.
+
+    For some cases, taking these actions will have no effect on the state of the agent.
+    In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying
+    if the action will change the state.
+
+    To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``
+    Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``.
+
     ### Rewards
     - -1 per step unless other reward is triggered.
     - +20 delivering passenger.
@@ -267,7 +275,7 @@ def reset(
         if not return_info:
             return int(self.s)
         else:
-            return int(self.s), {"prob": 1, "action_mask": self.action_mask(self.s)}
+            return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)}
 
     def render(self, mode="human"):
         if self.render_mode is not None:
diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index 5eec3a30437..06b381aa578 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 
+import gym
 from gym import logger
 from gym.spaces.discrete import Discrete
 from gym.spaces.space import Space
@@ -75,24 +76,27 @@ def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
             assert isinstance(
                 mask, np.ndarray
             ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
-            assert (
-                mask.dtype == np.int8
-            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
-            assert np.all(
-                mask.shape == self.nvec
-            ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently."
-            assert np.all(
-                np.logical_or(mask == 0, mask == 1)
-            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
-
-            multi_mask = [np.where(row)[0] for row in mask]
-            return np.array(
-                [
-                    self.np_random.choice(row_mask) if len(row_mask) > 0 else 0
-                    for row_mask in multi_mask
-                ],
-                dtype=self.dtype,
-            )
+            if self.nvec.ndim == 1:
+                assert (
+                    mask.dtype == np.int8
+                ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+                assert np.all(
+                    mask.shape == self.nvec
+                ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently."
+                assert np.all(
+                    np.logical_or(mask == 0, mask == 1)
+                ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+
+                multi_mask = [np.where(row)[0] for row in mask]
+                return np.array(
+                    [
+                        self.np_random.choice(row_mask) if len(row_mask) > 0 else 0
+                        for row_mask in multi_mask
+                    ],
+                    dtype=self.dtype,
+                )
+            else:
+                raise gym.error.Error()
 
         return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype)
 
diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py
index 7a088d4698c..248b36199f5 100644
--- a/tests/envs/test_env_implementation.py
+++ b/tests/envs/test_env_implementation.py
@@ -100,5 +100,5 @@ def test_taxi_encode_decode():
     for _ in range(100):
         assert (
             env.encode(*env.decode(state)) == state
-        ), f"{state=}, encode decode: {env.encode(*env.decode(state))}"
+        ), f"state={state}, encode(decode(state))={env.encode(*env.decode(state))}"
         state, _, _, _ = env.step(env.action_space.sample())

From 2f14eb7a455ad676d5d4f6128ac5481359592c9f Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Mon, 20 Jun 2022 14:26:35 +0100
Subject: [PATCH 12/22] Add tests for sample

---
 tests/spaces/test_spaces.py | 105 +++++++++++++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py
index fded6654afe..101d22bb5d9 100644
--- a/tests/spaces/test_spaces.py
+++ b/tests/spaces/test_spaces.py
@@ -150,6 +150,37 @@ def test_inequality(spaces):
     assert space1 != space2, f"Expected {space1} != {space2}"
 
 
+# The expected sum of variance for an alpha of 0.05
+# CHI_SQUARED = [0] + [scipy.stats.chi2.isf(0.05, df=df) for df in range(1, 25)]
+CHI_SQUARED = [
+    0,
+    3.8414588206941285,
+    5.991464547107983,
+    7.814727903251178,
+    9.487729036781158,
+    11.070497693516355,
+    12.59158724374398,
+    14.067140449340167,
+    15.507313055865454,
+    16.91897760462045,
+    18.30703805327515,
+    19.67513757268249,
+    21.02606981748307,
+    22.362032494826945,
+    23.684791304840576,
+    24.99579013972863,
+    26.296227604864242,
+    27.587111638275335,
+    28.869299430392637,
+    30.143527205646155,
+    31.41043284423092,
+    32.670573340917315,
+    33.92443847144379,
+    35.17246162690807,
+    36.415028501807306,
+]
+
+
 @pytest.mark.parametrize(
     "space",
     [
@@ -166,11 +197,83 @@ def test_inequality(spaces):
     ],
 )
 def test_sample(space: Space, n_trials: int = 1_000):
-    """Test the space sample works, todo, add chi-squared testing for the distribution"""
+    """Test the space sample has the expected distribution with the chi-squared test and KS test.
+
+    Example code with scipy.stats.chisquared
+
+    import scipy.stats
+    variance = np.sum(np.square(observed_frequency - expected_frequency) / expected_frequency)
+    f'X2 at alpha=0.05 = {scipy.stats.chi2.isf(0.05, df=4)}'
+    f'p-value = {scipy.stats.chi2.sf(variance, df=4)}'
+    scipy.stats.chisquare(f_obs=observed_frequency)
+    """
     space.seed(0)
     samples = np.array([space.sample() for _ in range(n_trials)])
     assert len(samples) == n_trials
 
+    if isinstance(space, Discrete):
+        expected_frequency = np.ones(space.n) * n_trials / space.n
+        observed_frequency = np.zeros(space.n)
+        for sample in samples:
+            observed_frequency[sample - space.start] += 1
+        degrees_of_freedom = space.n - 1
+
+        assert observed_frequency.shape == expected_frequency.shape
+        assert np.sum(observed_frequency) == n_trials
+
+        variance = np.sum(
+            np.square(expected_frequency - observed_frequency) / expected_frequency
+        )
+        assert variance < CHI_SQUARED[degrees_of_freedom]
+    elif isinstance(space, MultiBinary):
+        expected_frequency = n_trials / 2
+        observed_frequency = np.sum(samples, axis=0)
+        assert observed_frequency.shape == space.shape
+
+        variance = (
+            2 * np.square(observed_frequency - expected_frequency) / expected_frequency
+        )
+        assert variance.shape == space.shape
+        assert np.all(variance < CHI_SQUARED[1])
+    elif isinstance(space, MultiDiscrete):
+        # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes
+        def _generate_frequency(dim, func):
+            if isinstance(dim, np.ndarray):
+                print(dim)
+                return np.array(
+                    [_generate_frequency(sub_dim, func) for sub_dim in dim],
+                    dtype=object,
+                )
+            else:
+                return func(dim)
+
+        def _update_observed_frequency(obs_sample, obs_freq):
+            if isinstance(obs_sample, np.ndarray):
+                for sub_sample, sub_freq in zip(obs_sample, obs_freq):
+                    _update_observed_frequency(sub_sample, sub_freq)
+            else:
+                obs_freq[obs_sample] += 1
+
+        expected_frequency = _generate_frequency(
+            space.nvec, lambda dim: np.ones(dim) * n_trials / dim
+        )
+        observed_frequency = _generate_frequency(space.nvec, lambda dim: np.zeros(dim))
+        for sample in samples:
+            _update_observed_frequency(sample, observed_frequency)
+
+        def _chi_squared_test(dim, exp_freq, obs_freq):
+            if isinstance(dim, np.ndarray):
+                for sub_dim, sub_exp_freq, sub_obs_freq in zip(dim, exp_freq, obs_freq):
+                    _chi_squared_test(sub_dim, sub_exp_freq, sub_obs_freq)
+            else:
+                assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,)
+                assert np.sum(obs_freq) == n_trials
+                _variance = np.sum(np.square(exp_freq - obs_freq) / exp_freq)
+                _degrees_of_freedom = dim - 1
+                assert _variance < CHI_SQUARED[_degrees_of_freedom]
+
+        _chi_squared_test(space.nvec, expected_frequency, observed_frequency)
+
 
 @pytest.mark.parametrize(
     "space,mask",

From f52d5d58e21fb4fd5e01061d688860bdfda13e8e Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Mon, 20 Jun 2022 23:00:03 +0100
Subject: [PATCH 13/22] Add docstrings and test space sample mask Discrete and
 MultiBinary

---
 gym/spaces/discrete.py       | 12 +++--
 gym/spaces/graph.py          |  3 +-
 gym/spaces/multi_binary.py   |  7 ++-
 gym/spaces/multi_discrete.py | 31 ++----------
 gym/spaces/space.py          |  2 +-
 gym/spaces/tuple.py          |  3 +-
 tests/spaces/test_spaces.py  | 96 ++++++++++++++++++++++--------------
 7 files changed, 80 insertions(+), 74 deletions(-)

diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py
index be9361194de..da05979b02a 100644
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -46,7 +46,8 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int:
         A sample will be chosen uniformly at random with the mask if provided
 
         Args:
-            mask: An optional mask for if an action can be selected. Expected shape is (n,). If not possible actions, will default to `space.start`
+            mask: An optional mask for if an action can be selected. Expected shape is (n,).
+                If there are no possible actions, will default to `space.start`.
 
         Returns:
             A sampled integer from the space
@@ -61,11 +62,14 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int:
             assert mask.shape == (
                 self.n,
             ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}"
+            valid_action_mask = mask == 1
             assert np.all(
-                np.logical_or(mask == 0, mask == 1)
+                np.logical_or(mask == 0, valid_action_mask)
             ), f"All values of a mask should be 0 or 1, actual values: {mask}"
-            if np.any(mask == 1):
-                return int(self.start + self.np_random.choice(np.where(mask)[0]))
+            if np.any(valid_action_mask):
+                return int(
+                    self.start + self.np_random.choice(np.where(valid_action_mask)[0])
+                )
             else:
                 return self.start
 
diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index 44bb58f95e4..4705633ddb9 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -98,7 +98,8 @@ def sample(
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
         Args:
-            mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces)
+            mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces).
+                The expected shape for the node mask is ``node_space.n`` and edge mask is ``edge_space.n``.
 
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py
index dd1a62c11e5..6662b912274 100644
--- a/gym/spaces/multi_binary.py
+++ b/gym/spaces/multi_binary.py
@@ -57,7 +57,8 @@ def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
         A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space).
 
         Args:
-            mask: An optional np.ndarray to mask samples, where mask == 0 will have samples == 0
+            mask: An optional np.ndarray to mask samples with expected shape of ``space.shape``.
+                Where mask == 0 then the samples will be 0.
 
         Returns:
             Sampled values from space
@@ -76,7 +77,9 @@ def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
                 np.logical_or(mask == 0, mask == 1)
             ), f"All values of a mask should be 0 or 1, actual values: {mask}"
 
-            return mask * self.np_random.integers(0, 2, self.n, self.dtype)
+            return mask * self.np_random.integers(
+                low=0, high=2, size=self.n, dtype=self.dtype
+            )
 
         return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype)
 
diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index 06b381aa578..d9754a5e740 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 
-import gym
 from gym import logger
 from gym.spaces.discrete import Discrete
 from gym.spaces.space import Space
@@ -63,40 +62,18 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than :class:`gym.Space` - never None."""
         return self._shape  # type: ignore
 
-    def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
+    def sample(self, mask: Optional[Tuple[np.ndarray, ...]] = None) -> np.ndarray:
         """Generates a single random sample this space.
 
         Args:
-            mask: An optional mask for multi-discrete, expected shape is `space.nvec`. If there are no possible actions, defaults to 0
+            mask: An optional mask for multi-discrete, expected shape is `space.nvec` however for multi-axis nvec then
+                we expect np.ndarray dtype=object. If there are no possible actions, defaults to 0
 
         Returns:
             An np.ndarray of shape `space.shape`
         """
         if mask is not None:
-            assert isinstance(
-                mask, np.ndarray
-            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
-            if self.nvec.ndim == 1:
-                assert (
-                    mask.dtype == np.int8
-                ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
-                assert np.all(
-                    mask.shape == self.nvec
-                ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently."
-                assert np.all(
-                    np.logical_or(mask == 0, mask == 1)
-                ), f"All values of a mask should be 0 or 1, actual values: {mask}"
-
-                multi_mask = [np.where(row)[0] for row in mask]
-                return np.array(
-                    [
-                        self.np_random.choice(row_mask) if len(row_mask) > 0 else 0
-                        for row_mask in multi_mask
-                    ],
-                    dtype=self.dtype,
-                )
-            else:
-                raise gym.error.Error()
+            pass
 
         return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype)
 
diff --git a/gym/spaces/space.py b/gym/spaces/space.py
index 3b006228b9e..5d7dea6f9a5 100644
--- a/gym/spaces/space.py
+++ b/gym/spaces/space.py
@@ -88,7 +88,7 @@ def sample(self, mask: Optional[Any] = None) -> T_cov:
         Can be uniform or non-uniform sampling based on boundedness of space.
 
         Args:
-            mask: A mask used for sampling, see Space for implementation details.
+            mask: A mask used for sampling, expected ``dtype=np.int8`` and see sample implementation for expected shape.
 
         Returns:
             A sampled actions from the space
diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py
index 57bc6b40cab..10b4344ef87 100644
--- a/gym/spaces/tuple.py
+++ b/gym/spaces/tuple.py
@@ -78,7 +78,8 @@ def sample(self, mask: Optional[Tuple[Optional[np.ndarray]]] = None) -> tuple:
         This method draws independent samples from the subspaces.
 
         Args:
-            mask: An optional tuple of optional masks for each of the subspace's samples, expects the same number of masks as spaces
+            mask: An optional tuple of optional masks for each of the subspace's samples,
+                expects the same number of masks as spaces
 
         Returns:
             Tuple of the subspace's samples
diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py
index 101d22bb5d9..7d44134e0c5 100644
--- a/tests/spaces/test_spaces.py
+++ b/tests/spaces/test_spaces.py
@@ -152,47 +152,35 @@ def test_inequality(spaces):
 
 # The expected sum of variance for an alpha of 0.05
 # CHI_SQUARED = [0] + [scipy.stats.chi2.isf(0.05, df=df) for df in range(1, 25)]
-CHI_SQUARED = [
-    0,
-    3.8414588206941285,
-    5.991464547107983,
-    7.814727903251178,
-    9.487729036781158,
-    11.070497693516355,
-    12.59158724374398,
-    14.067140449340167,
-    15.507313055865454,
-    16.91897760462045,
-    18.30703805327515,
-    19.67513757268249,
-    21.02606981748307,
-    22.362032494826945,
-    23.684791304840576,
-    24.99579013972863,
-    26.296227604864242,
-    27.587111638275335,
-    28.869299430392637,
-    30.143527205646155,
-    31.41043284423092,
-    32.670573340917315,
-    33.92443847144379,
-    35.17246162690807,
-    36.415028501807306,
-]
+CHI_SQUARED = np.array(
+    [
+        0.01,
+        3.8414588206941285,
+        5.991464547107983,
+        7.814727903251178,
+        9.487729036781158,
+        11.070497693516355,
+        12.59158724374398,
+        14.067140449340167,
+        15.507313055865454,
+        16.91897760462045,
+    ]
+)
 
 
 @pytest.mark.parametrize(
     "space",
     [
+        Discrete(1),
         Discrete(5),
         Discrete(8, start=-20),
-        Box(low=0, high=255, shape=(2,)),
-        Box(low=-np.inf, high=np.inf, shape=(3, 3)),
-        Box(low=1.0, high=np.inf, shape=(3, 3)),
-        Box(low=-np.inf, high=2.0, shape=(3, 3)),
+        Box(low=0, high=255, shape=(2,), dtype=np.uint8),
+        Box(low=-np.inf, high=np.inf, shape=(3,)),
+        Box(low=1.0, high=np.inf, shape=(3,)),
+        Box(low=-np.inf, high=2.0, shape=(3,)),
         Box(low=np.array([0, 2]), high=np.array([10, 4])),
         MultiDiscrete([3, 5]),
-        MultiDiscrete(np.array([[3, 5], [2, 2]])),
+        MultiDiscrete(np.array([[3, 5], [2, 1]])),
         MultiBinary([2, 4]),
     ],
 )
@@ -211,6 +199,7 @@ def test_sample(space: Space, n_trials: int = 1_000):
     samples = np.array([space.sample() for _ in range(n_trials)])
     assert len(samples) == n_trials
 
+    # todo add Box space test
     if isinstance(space, Discrete):
         expected_frequency = np.ones(space.n) * n_trials / space.n
         observed_frequency = np.zeros(space.n)
@@ -230,6 +219,7 @@ def test_sample(space: Space, n_trials: int = 1_000):
         observed_frequency = np.sum(samples, axis=0)
         assert observed_frequency.shape == space.shape
 
+        # As this is a binary space, then we can be lazy in the variance as the np.square is symmetric for the 0 and 1 categories
         variance = (
             2 * np.square(observed_frequency - expected_frequency) / expected_frequency
         )
@@ -239,7 +229,6 @@ def test_sample(space: Space, n_trials: int = 1_000):
         # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes
         def _generate_frequency(dim, func):
             if isinstance(dim, np.ndarray):
-                print(dim)
                 return np.array(
                     [_generate_frequency(sub_dim, func) for sub_dim in dim],
                     dtype=object,
@@ -268,6 +257,7 @@ def _chi_squared_test(dim, exp_freq, obs_freq):
             else:
                 assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,)
                 assert np.sum(obs_freq) == n_trials
+                assert np.sum(exp_freq) == n_trials
                 _variance = np.sum(np.square(exp_freq - obs_freq) / exp_freq)
                 _degrees_of_freedom = dim - 1
                 assert _variance < CHI_SQUARED[_degrees_of_freedom]
@@ -281,16 +271,46 @@ def _chi_squared_test(dim, exp_freq, obs_freq):
         (Discrete(5), np.array([0, 1, 1, 0, 1], dtype=np.int8)),
         (Discrete(4, start=-20), np.array([1, 1, 0, 1], dtype=np.int8)),
         (Discrete(4, start=1), np.array([0, 0, 0, 0], dtype=np.int8)),
-        (MultiDiscrete([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)),
-        # (MultiDiscrete(np.array([[3, 2], [2, 2]])), np.array([[[0, 1], [1, 1], [0, 0]], [[0, 1], [1, 1]]], dtype=np.int8)), Unsupported currently
-        (MultiBinary([2, 4]), np.array([[1, 1, 0, 0], [0, 0, 0, 0]], dtype=np.int8)),
+        (MultiBinary([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)),
+        # todo MultiDiscrete spaces
     ],
 )
 def test_space_sample_mask(space, mask, n_trials: int = 100):
     """Test the space sample with mask works, todo, add chi-squared testing for the distribution"""
-    space.seed(0)
+    space.seed(1)
     samples = np.array([space.sample(mask) for _ in range(n_trials)])
-    assert len(samples) == n_trials
+
+    if isinstance(space, Discrete):
+        if np.any(mask == 1):
+            expected_frequency = np.ones(space.n) * (n_trials / np.sum(mask)) * mask
+        else:
+            expected_frequency = np.zeros(space.n)
+            expected_frequency[0] = n_trials
+        observed_frequency = np.zeros(space.n)
+        for sample in samples:
+            observed_frequency[sample - space.start] += 1
+        degrees_of_freedom = max(np.sum(mask) - 1, 0)
+
+        assert observed_frequency.shape == expected_frequency.shape
+        assert np.sum(observed_frequency) == n_trials
+        assert np.sum(expected_frequency) == n_trials
+        variance = np.sum(
+            np.square(expected_frequency - observed_frequency)
+            / np.clip(expected_frequency, 1, None)
+        )
+        assert variance < CHI_SQUARED[degrees_of_freedom]
+    elif isinstance(space, MultiBinary):
+        expected_frequency = np.ones(space.shape) * mask * (n_trials / 2)
+        observed_frequency = np.sum(samples, axis=0)
+        assert space.shape == expected_frequency.shape == observed_frequency.shape
+
+        variance = (
+            2
+            * np.square(observed_frequency - expected_frequency)
+            / np.clip(expected_frequency, 1, None)
+        )
+        assert variance.shape == space.shape
+        assert np.all(variance < CHI_SQUARED[1])
 
 
 @pytest.mark.parametrize(

From 5e699e14aeecea53524cd249d68414d9f9f34f0b Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Tue, 21 Jun 2022 15:15:17 +0100
Subject: [PATCH 14/22] Add MultiDiscrete sampling and tests

---
 gym/spaces/multi_discrete.py | 48 ++++++++++++++++++--
 tests/spaces/test_spaces.py  | 88 ++++++++++++++++++++++++++++++++++--
 2 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index d9754a5e740..27336fe22f5 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -8,6 +8,8 @@
 from gym.spaces.space import Space
 from gym.utils import seeding
 
+SAMPLE_MASK_TYPE = Tuple[Union["SAMPLE_MASK_TYPE", np.ndarray]]
+
 
 class MultiDiscrete(Space[np.ndarray]):
     """This represents the cartesian product of arbitrary :class:`Discrete` spaces.
@@ -62,18 +64,54 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than :class:`gym.Space` - never None."""
         return self._shape  # type: ignore
 
-    def sample(self, mask: Optional[Tuple[np.ndarray, ...]] = None) -> np.ndarray:
+    def sample(self, mask: Optional[SAMPLE_MASK_TYPE] = None) -> np.ndarray:
         """Generates a single random sample this space.
 
         Args:
-            mask: An optional mask for multi-discrete, expected shape is `space.nvec` however for multi-axis nvec then
-                we expect np.ndarray dtype=object. If there are no possible actions, defaults to 0
+            mask: An optional mask for multi-discrete, expects tuples with a `np.ndarray` mask in the position of each
+                action with shape `(n,)` where `n` is the number of actions and `dtype=np.int8`.
+                If there are no possible actions, the default action is 0
 
         Returns:
-            An np.ndarray of shape `space.shape`
+            An `np.ndarray` of shape `space.shape`
         """
         if mask is not None:
-            pass
+
+            def _apply_mask(
+                sub_mask: SAMPLE_MASK_TYPE, sub_nvec: np.ndarray
+            ) -> Union[int, List[int]]:
+                if isinstance(sub_mask, np.ndarray):
+                    assert np.isscalar(
+                        sub_nvec
+                    ), f"Expects the mask to be for an action, actual for {sub_nvec}"
+                    assert (
+                        len(sub_mask) == sub_nvec
+                    ), f"Expects the mask length to be equal to the number of actions, mask length: {len(sub_mask)}, nvec length: {sub_nvec}"
+                    assert (
+                        sub_mask.dtype == np.int8
+                    ), f"Expects the mask dtype to be np.int8, actual dtype: {sub_mask.dtype}"
+                    valid_action_mask = sub_mask == 1
+                    assert np.all(
+                        np.logical_or(sub_mask == 0, valid_action_mask)
+                    ), f"Expects all masks values to 0 or 1, actual values: {sub_mask}"
+
+                    if np.any(valid_action_mask):
+                        return self.np_random.choice(np.where(valid_action_mask)[0])
+                    else:
+                        return 0
+                else:
+                    assert isinstance(
+                        sub_mask, tuple
+                    ), f"Expects the mask to be a tuple or np.ndarray, actual type: {type(sub_mask)}"
+                    assert len(sub_mask) == len(
+                        sub_nvec
+                    ), f"Expects the mask length to be equal to the number of actions, mask length: {len(sub_mask)}, nvec length: {len(sub_nvec)}"
+                    return [
+                        _apply_mask(new_mask, new_nvec)
+                        for new_mask, new_nvec in zip(sub_mask, sub_nvec)
+                    ]
+
+            return np.array(_apply_mask(mask, self.nvec), dtype=self.dtype)
 
         return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype)
 
diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py
index 7d44134e0c5..0aa219a7ea8 100644
--- a/tests/spaces/test_spaces.py
+++ b/tests/spaces/test_spaces.py
@@ -2,6 +2,7 @@
 import json  # note: ujson fails this test due to float equality
 import pickle
 import tempfile
+from typing import List, Union
 
 import numpy as np
 import pytest
@@ -272,11 +273,31 @@ def _chi_squared_test(dim, exp_freq, obs_freq):
         (Discrete(4, start=-20), np.array([1, 1, 0, 1], dtype=np.int8)),
         (Discrete(4, start=1), np.array([0, 0, 0, 0], dtype=np.int8)),
         (MultiBinary([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)),
-        # todo MultiDiscrete spaces
+        (
+            MultiDiscrete([5, 3]),
+            (
+                np.array([0, 1, 1, 0, 1], dtype=np.int8),
+                np.array([0, 1, 1], dtype=np.int8),
+            ),
+        ),
+        (
+            MultiDiscrete(np.array([4, 2])),
+            (np.array([0, 0, 0, 0], dtype=np.int8), np.array([1, 1], dtype=np.int8)),
+        ),
+        (
+            MultiDiscrete(np.array([[2, 2], [4, 3]])),
+            (
+                (np.array([0, 1], dtype=np.int8), np.array([1, 1], dtype=np.int8)),
+                (
+                    np.array([0, 1, 1, 0], dtype=np.int8),
+                    np.array([1, 0, 0], dtype=np.int8),
+                ),
+            ),
+        ),
     ],
 )
 def test_space_sample_mask(space, mask, n_trials: int = 100):
-    """Test the space sample with mask works, todo, add chi-squared testing for the distribution"""
+    """Test the space sample with mask works using the pearson chi-squared test."""
     space.seed(1)
     samples = np.array([space.sample(mask) for _ in range(n_trials)])
 
@@ -311,6 +332,64 @@ def test_space_sample_mask(space, mask, n_trials: int = 100):
         )
         assert variance.shape == space.shape
         assert np.all(variance < CHI_SQUARED[1])
+    elif isinstance(space, MultiDiscrete):
+        # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes
+        def _generate_frequency(
+            _dim: Union[np.ndarray, int], _mask, func: callable
+        ) -> List:
+            if isinstance(_dim, np.ndarray):
+                return [
+                    _generate_frequency(sub_dim, sub_mask, func)
+                    for sub_dim, sub_mask in zip(_dim, _mask)
+                ]
+            else:
+                return func(_dim, _mask)
+
+        def _update_observed_frequency(obs_sample, obs_freq):
+            if isinstance(obs_sample, np.ndarray):
+                for sub_sample, sub_freq in zip(obs_sample, obs_freq):
+                    _update_observed_frequency(sub_sample, sub_freq)
+            else:
+                obs_freq[obs_sample] += 1
+
+        def _exp_freq_fn(_dim: int, _mask: np.ndarray):
+            if np.any(_mask == 1):
+                print(f"{_dim=}, {_mask=}")
+                assert _dim == len(_mask)
+                return np.ones(_dim) * (n_trials / np.sum(_mask)) * _mask
+            else:
+                freq = np.zeros(_dim)
+                freq[0] = n_trials
+                return freq
+
+        expected_frequency = _generate_frequency(
+            space.nvec, mask, lambda dim, _mask: _exp_freq_fn(dim, _mask)
+        )
+        observed_frequency = _generate_frequency(
+            space.nvec, mask, lambda dim, _: np.zeros(dim)
+        )
+        for sample in samples:
+            _update_observed_frequency(sample, observed_frequency)
+
+        def _chi_squared_test(dim, _mask, exp_freq, obs_freq):
+            if isinstance(dim, np.ndarray):
+                for sub_dim, sub_mask, sub_exp_freq, sub_obs_freq in zip(
+                    dim, _mask, exp_freq, obs_freq
+                ):
+                    _chi_squared_test(sub_dim, sub_mask, sub_exp_freq, sub_obs_freq)
+            else:
+                assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,)
+                assert np.sum(obs_freq) == n_trials
+                assert np.sum(exp_freq) == n_trials
+                _variance = np.sum(
+                    np.square(exp_freq - obs_freq) / np.clip(exp_freq, 1, None)
+                )
+                _degrees_of_freedom = max(np.sum(_mask) - 1, 0)
+                assert _variance < CHI_SQUARED[_degrees_of_freedom]
+
+        _chi_squared_test(space.nvec, mask, expected_frequency, observed_frequency)
+    else:
+        raise NotImplementedError()
 
 
 @pytest.mark.parametrize(
@@ -320,7 +399,10 @@ def test_space_sample_mask(space, mask, n_trials: int = 100):
             Dict(a=Discrete(2), b=MultiDiscrete([2, 4])),
             {
                 "a": np.array([0, 1], dtype=np.int8),
-                "b": np.array([[0, 0, 0, 0], [1, 1, 1, 0]], dtype=np.int8),
+                "b": (
+                    np.array([0, 1], dtype=np.int8),
+                    np.array([1, 1, 0, 0], dtype=np.int8),
+                ),
             },
         ),
         (

From 634da1218c9e34131561264379948483d3a33eb2 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Tue, 21 Jun 2022 17:36:26 +0100
Subject: [PATCH 15/22] Remove sample mask from graph

---
 gym/spaces/graph.py         | 25 +++++++++++--------------
 tests/spaces/test_spaces.py | 12 +++++++++++-
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index 4705633ddb9..5c6cf6d96b6 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -1,6 +1,6 @@
 """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space."""
 from collections import namedtuple
-from typing import NamedTuple, Optional, Sequence, Tuple, Union
+from typing import NamedTuple, Optional, Sequence, Union
 
 import numpy as np
 
@@ -92,39 +92,36 @@ def _generate_sample_space(
                 f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error."
             )
 
-    def sample(
-        self, mask: Optional[Tuple[Optional[np.ndarray], Optional[np.ndarray]]] = None
-    ) -> NamedTuple:
+    def sample(self, mask: None = None) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
         Args:
-            mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces).
-                The expected shape for the node mask is ``node_space.n`` and edge mask is ``edge_space.n``.
+            mask: As the number of nodes to determined during sample, it is not possible to know the mask beforehand.
 
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
         """
-        node_mask, edge_mask = mask if mask is not None else (None, None)
+        if mask is not None:
+            raise NotImplementedError(
+                "Graph.sample(mask) is not implemented as the number of nodes is determined within the function."
+            )
+
         num_nodes = self.np_random.integers(low=1, high=10)
 
         # we only have edges when we have at least 2 nodes
         num_edges = 0
         if num_nodes > 1:
-            # maximal number of edges is (n*n) allowing self connections and two way is allowed
+            # maximal number of edges is (n*n) allowing self connections and two-way is allowed
             num_edges = self.np_random.integers(num_nodes * num_nodes)
 
         node_sample_space = self._generate_sample_space(self.node_space, num_nodes)
         edge_sample_space = self._generate_sample_space(self.edge_space, num_edges)
 
         sampled_nodes = (
-            node_sample_space.sample(node_mask)
-            if node_sample_space is not None
-            else None
+            node_sample_space.sample() if node_sample_space is not None else None
         )
         sampled_edges = (
-            edge_sample_space.sample(edge_mask)
-            if edge_sample_space is not None
-            else None
+            edge_sample_space.sample() if edge_sample_space is not None else None
         )
 
         sampled_edge_links = None
diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py
index 0aa219a7ea8..25d26e37aed 100644
--- a/tests/spaces/test_spaces.py
+++ b/tests/spaces/test_spaces.py
@@ -354,7 +354,6 @@ def _update_observed_frequency(obs_sample, obs_freq):
 
         def _exp_freq_fn(_dim: int, _mask: np.ndarray):
             if np.any(_mask == 1):
-                print(f"{_dim=}, {_mask=}")
                 assert _dim == len(_mask)
                 return np.ones(_dim) * (n_trials / np.sum(_mask)) * _mask
             else:
@@ -420,6 +419,17 @@ def _chi_squared_test(dim, _mask, exp_freq, obs_freq):
                 "b": np.array([0, 1, 1], dtype=np.int8),
             },
         ),
+        (Graph(node_space=Discrete(5), edge_space=Discrete(3)), None),
+        (
+            Graph(node_space=Discrete(3), edge_space=Box(low=0, high=1, shape=(5,))),
+            None,
+        ),
+        (
+            Graph(
+                node_space=Box(low=-100, high=100, shape=(3,)), edge_space=Discrete(3)
+            ),
+            None,
+        ),
     ],
 )
 def test_composite_space_sample_mask(space, mask):

From f85055c1648b0eed3afa67fd5a42648ae730e199 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark.m.towers@gmail.com>
Date: Thu, 23 Jun 2022 18:57:06 +0100
Subject: [PATCH 16/22] Update gym/spaces/multi_discrete.py

Co-authored-by: Markus Krimmel <montcyril@gmail.com>
---
 gym/spaces/multi_discrete.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index 27336fe22f5..2e7bc09a0b1 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -25,7 +25,7 @@ class MultiDiscrete(Space[np.ndarray]):
     2. Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
     3. Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
 
-    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample is array([3, 1, 0])
+    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample might be ``array([3, 1, 0])``.
 
     Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes
     if ``nvec`` has several axes:

From 4a4b166fd05bf7794f23d73a0f1953253b745989 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Thu, 23 Jun 2022 19:48:13 +0100
Subject: [PATCH 17/22] Updates based on Marcus28 and jjshoots for Graph.py

---
 gym/envs/toy_text/taxi.py    | 12 +++---
 gym/spaces/dict.py           |  3 +-
 gym/spaces/discrete.py       |  5 ++-
 gym/spaces/graph.py          | 73 +++++++++++++++++++++++-------------
 gym/spaces/multi_discrete.py |  2 +-
 5 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
index ba578b3846e..7cead13a468 100644
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -89,13 +89,15 @@ class TaxiEnv(Env):
 
     ### Info
 
-    ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask".
+    ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask" containing
+        the probability that the state is taken and a mask of what actions will result in a change of state to speed up training.
 
-    As Taxi is a stochastic environment for transitions then the "p" key represents the probability of the
-    transition. However, this value is permanently 1.0 for an unknown reason.
+    As Taxi's initial state is a stochastic, the "p" key represents the probability of the
+    transition however this value is currently bugged being 1.0, this will be fixed soon.
+    As the steps are deterministic, "p" represents the probability of the transition which is always 1.0
 
-    For some cases, taking these actions will have no effect on the state of the agent.
-    In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying
+    For some cases, taking an action will have no effect on the state of the agent.
+    In v0.25.0, ``info["action_mask"]`` contains a np.ndarray for each of the action specifying
     if the action will change the state.
 
     To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``
diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py
index ec668e44347..a830ac8819f 100644
--- a/gym/spaces/dict.py
+++ b/gym/spaces/dict.py
@@ -1,6 +1,7 @@
 """Implementation of a space that represents the cartesian product of other spaces as a dictionary."""
 from collections import OrderedDict
 from collections.abc import Mapping, Sequence
+from typing import Any
 from typing import Dict as TypingDict
 from typing import Optional, Union
 
@@ -137,7 +138,7 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list:
 
         return seeds
 
-    def sample(self, mask: Optional[TypingDict[str, np.ndarray]] = None) -> dict:
+    def sample(self, mask: Optional[TypingDict[str, Any]] = None) -> dict:
         """Generates a single random sample from this space.
 
         The sample is an ordered dictionary of independent samples from the constituent spaces.
diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py
index da05979b02a..7f8f17dfa17 100644
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -46,8 +46,9 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int:
         A sample will be chosen uniformly at random with the mask if provided
 
         Args:
-            mask: An optional mask for if an action can be selected. Expected shape is (n,).
-                If there are no possible actions, will default to `space.start`.
+            mask: An optional mask for if an action can be selected.
+                Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions.
+                If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned.
 
         Returns:
             A sampled integer from the space
diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index 5c6cf6d96b6..84523045cff 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -1,12 +1,12 @@
 """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space."""
 from collections import namedtuple
-from typing import NamedTuple, Optional, Sequence, Union
+from typing import NamedTuple, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
 from gym.spaces.box import Box
 from gym.spaces.discrete import Discrete
-from gym.spaces.multi_discrete import MultiDiscrete
+from gym.spaces.multi_discrete import SAMPLE_MASK_TYPE, MultiDiscrete
 from gym.spaces.space import Space
 from gym.utils import seeding
 
@@ -70,58 +70,79 @@ def __init__(
 
     def _generate_sample_space(
         self, base_space: Union[None, Box, Discrete], num: int
-    ) -> Optional[Union[Box, Discrete]]:
-        # the possibility of this space , got {type(base_space)}aving nothing
-        if num == 0:
+    ) -> Optional[Union[Box, MultiDiscrete]]:
+        if num == 0 or base_space is None:
             return None
 
         if isinstance(base_space, Box):
             return Box(
                 low=np.array(max(1, num) * [base_space.low]),
                 high=np.array(max(1, num) * [base_space.high]),
-                shape=(num, *base_space.shape),
+                shape=(num,) + base_space.shape,
                 dtype=base_space.dtype,
-                seed=self._np_random,
+                seed=self.np_random,
             )
         elif isinstance(base_space, Discrete):
-            return MultiDiscrete(nvec=[base_space.n] * num, seed=self._np_random)
-        elif base_space is None:
-            return None
+            return MultiDiscrete(nvec=[base_space.n] * num, seed=self.np_random)
         else:
             raise AssertionError(
-                f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error."
+                f"Expects base space to be Box and Discrete, actual space: {type(base_space)}."
             )
 
-    def sample(self, mask: None = None) -> NamedTuple:
+    def sample(
+        self,
+        num_nodes: int,
+        num_edges: Optional[int] = None,
+        mask: Optional[
+            Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]]
+        ] = None,
+    ) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
         Args:
-            mask: As the number of nodes to determined during sample, it is not possible to know the mask beforehand.
+            num_nodes: The number of nodes that will be sampled
+            num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2
+            mask: An optional tuple of optional node and edge mask that is only possible with Discrete spaces
+                (Box spaces don't support sample masks).
+                If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges
 
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
         """
-        if mask is not None:
-            raise NotImplementedError(
-                "Graph.sample(mask) is not implemented as the number of nodes is determined within the function."
-            )
+        assert (
+            num_nodes > 0
+        ), f"The number of nodes is expected to be greater than 0, actual value: {num_nodes}"
 
-        num_nodes = self.np_random.integers(low=1, high=10)
+        if mask is not None:
+            node_space_mask, edge_space_mask = mask
+        else:
+            node_space_mask, edge_space_mask = None, None
 
         # we only have edges when we have at least 2 nodes
-        num_edges = 0
-        if num_nodes > 1:
-            # maximal number of edges is (n*n) allowing self connections and two-way is allowed
-            num_edges = self.np_random.integers(num_nodes * num_nodes)
+        if num_edges is None:
+            if num_nodes > 1:
+                # maximal number of edges is (n*n) allowing self connections and two-way is allowed
+                num_edges = self.np_random.integers(num_nodes * num_nodes)
+            else:
+                num_edges = 0
+            edge_space_mask = tuple(edge_space_mask for _ in range(num_edges))
+        else:
+            assert (
+                num_edges >= 0
+            ), f"The number of edges is expected to be greater than 0, actual mask: {num_edges}"
 
-        node_sample_space = self._generate_sample_space(self.node_space, num_nodes)
-        edge_sample_space = self._generate_sample_space(self.edge_space, num_edges)
+        sampled_node_space = self._generate_sample_space(self.node_space, num_nodes)
+        sampled_edge_space = self._generate_sample_space(self.edge_space, num_edges)
 
         sampled_nodes = (
-            node_sample_space.sample() if node_sample_space is not None else None
+            sampled_node_space.sample(node_space_mask)
+            if sampled_node_space is not None
+            else None
         )
         sampled_edges = (
-            edge_sample_space.sample() if edge_sample_space is not None else None
+            sampled_edge_space.sample(edge_space_mask)
+            if sampled_edge_space is not None
+            else None
         )
 
         sampled_edge_links = None
diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index 2e7bc09a0b1..955c5fa8f9d 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -70,7 +70,7 @@ def sample(self, mask: Optional[SAMPLE_MASK_TYPE] = None) -> np.ndarray:
         Args:
             mask: An optional mask for multi-discrete, expects tuples with a `np.ndarray` mask in the position of each
                 action with shape `(n,)` where `n` is the number of actions and `dtype=np.int8`.
-                If there are no possible actions, the default action is 0
+                Only mask values == 1 are possible to sample unless all mask values for an action are 0 then the default action 0 is sampled.
 
         Returns:
             An `np.ndarray` of shape `space.shape`

From eb63c6255ce8e171f86c422bf8d4a6eedc0965d2 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Thu, 23 Jun 2022 19:54:30 +0100
Subject: [PATCH 18/22] Updates based on Marcus28 and jjshoots for Graph.py

---
 gym/spaces/graph.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index 84523045cff..89a922c162a 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -91,20 +91,20 @@ def _generate_sample_space(
 
     def sample(
         self,
-        num_nodes: int,
-        num_edges: Optional[int] = None,
         mask: Optional[
             Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]]
         ] = None,
+        num_nodes: int = 10,
+        num_edges: Optional[int] = None,
     ) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
         Args:
-            num_nodes: The number of nodes that will be sampled
-            num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2
             mask: An optional tuple of optional node and edge mask that is only possible with Discrete spaces
                 (Box spaces don't support sample masks).
                 If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges
+            num_nodes: The number of nodes that will be sampled, the default is 10 nodes
+            num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2
 
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
@@ -125,7 +125,8 @@ def sample(
                 num_edges = self.np_random.integers(num_nodes * num_nodes)
             else:
                 num_edges = 0
-            edge_space_mask = tuple(edge_space_mask for _ in range(num_edges))
+            if edge_space_mask is not None:
+                edge_space_mask = tuple(edge_space_mask for _ in range(num_edges))
         else:
             assert (
                 num_edges >= 0

From 89189146e5d9f1a8b9bf4a26e5d59d841edc3c2d Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Fri, 24 Jun 2022 18:39:07 +0100
Subject: [PATCH 19/22] jjshoot review

---
 gym/spaces/graph.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index 89a922c162a..9cde9ed30a5 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -121,7 +121,7 @@ def sample(
         # we only have edges when we have at least 2 nodes
         if num_edges is None:
             if num_nodes > 1:
-                # maximal number of edges is (n*n) allowing self connections and two-way is allowed
+                # maximal number of edges is n*(n-1) allowing self connections and two-way is allowed
                 num_edges = self.np_random.integers(num_nodes * num_nodes)
             else:
                 num_edges = 0
@@ -135,11 +135,7 @@ def sample(
         sampled_node_space = self._generate_sample_space(self.node_space, num_nodes)
         sampled_edge_space = self._generate_sample_space(self.edge_space, num_edges)
 
-        sampled_nodes = (
-            sampled_node_space.sample(node_space_mask)
-            if sampled_node_space is not None
-            else None
-        )
+        sampled_nodes = sampled_node_space.sample(node_space_mask)
         sampled_edges = (
             sampled_edge_space.sample(edge_space_mask)
             if sampled_edge_space is not None

From a53f0e740f01291f4c254d46133fa6124e6a2917 Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Sat, 25 Jun 2022 17:43:19 +0100
Subject: [PATCH 20/22] jjshoot review

---
 gym/spaces/graph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index 9cde9ed30a5..a721d55003e 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -121,8 +121,8 @@ def sample(
         # we only have edges when we have at least 2 nodes
         if num_edges is None:
             if num_nodes > 1:
-                # maximal number of edges is n*(n-1) allowing self connections and two-way is allowed
-                num_edges = self.np_random.integers(num_nodes * num_nodes)
+                # maximal number of edges is `n*(n-1)` allowing self connections and two-way is allowed
+                num_edges = self.np_random.integers(num_nodes * (num_nodes - 1))
             else:
                 num_edges = 0
             if edge_space_mask is not None:

From 8e71e467c38f6502dc76e8a919d4ba1880f3eaae Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Sat, 25 Jun 2022 22:45:41 +0100
Subject: [PATCH 21/22] Update assert check

---
 gym/spaces/multi_discrete.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index 955c5fa8f9d..f63d9c43a57 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -81,8 +81,8 @@ def _apply_mask(
                 sub_mask: SAMPLE_MASK_TYPE, sub_nvec: np.ndarray
             ) -> Union[int, List[int]]:
                 if isinstance(sub_mask, np.ndarray):
-                    assert np.isscalar(
-                        sub_nvec
+                    assert np.issubdtype(
+                        type(sub_nvec, np.integer)
                     ), f"Expects the mask to be for an action, actual for {sub_nvec}"
                     assert (
                         len(sub_mask) == sub_nvec
@@ -90,6 +90,7 @@ def _apply_mask(
                     assert (
                         sub_mask.dtype == np.int8
                     ), f"Expects the mask dtype to be np.int8, actual dtype: {sub_mask.dtype}"
+
                     valid_action_mask = sub_mask == 1
                     assert np.all(
                         np.logical_or(sub_mask == 0, valid_action_mask)

From 875ab441161ee654cdddc47d4bf323d11738035c Mon Sep 17 00:00:00 2001
From: StringTheory <mark.m.towers@gmail.com>
Date: Sat, 25 Jun 2022 23:33:07 +0100
Subject: [PATCH 22/22] Update type hints

---
 gym/spaces/graph.py          | 5 ++++-
 gym/spaces/multi_discrete.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
index a721d55003e..a2ef1cf0d60 100644
--- a/gym/spaces/graph.py
+++ b/gym/spaces/graph.py
@@ -92,7 +92,10 @@ def _generate_sample_space(
     def sample(
         self,
         mask: Optional[
-            Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]]
+            Tuple[
+                Optional[Union[np.ndarray, SAMPLE_MASK_TYPE]],
+                Optional[Union[np.ndarray, SAMPLE_MASK_TYPE]],
+            ]
         ] = None,
         num_nodes: int = 10,
         num_edges: Optional[int] = None,
diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
index f63d9c43a57..71111d4c9dd 100644
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -8,7 +8,7 @@
 from gym.spaces.space import Space
 from gym.utils import seeding
 
-SAMPLE_MASK_TYPE = Tuple[Union["SAMPLE_MASK_TYPE", np.ndarray]]
+SAMPLE_MASK_TYPE = Tuple[Union["SAMPLE_MASK_TYPE", np.ndarray], ...]
 
 
 class MultiDiscrete(Space[np.ndarray]):
@@ -82,7 +82,7 @@ def _apply_mask(
             ) -> Union[int, List[int]]:
                 if isinstance(sub_mask, np.ndarray):
                     assert np.issubdtype(
-                        type(sub_nvec, np.integer)
+                        type(sub_nvec), np.integer
                     ), f"Expects the mask to be for an action, actual for {sub_nvec}"
                     assert (
                         len(sub_mask) == sub_nvec