From c390a6bc0452bc99b498a918e42a350cfc4524ae Mon Sep 17 00:00:00 2001 From: StringTheory Date: Wed, 8 Jun 2022 17:19:50 +0100 Subject: [PATCH 01/22] Allows a new RNG to be generated with seed=-1 and updated env_checker to fix bug if environment doesn't use np_random in reset --- gym/core.py | 4 +++- gym/utils/env_checker.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gym/core.py b/gym/core.py index c3d06cd8801..8bf926c970e 100644 --- a/gym/core.py +++ b/gym/core.py @@ -178,7 +178,9 @@ def reset( the ``info`` returned by :meth:`step`. """ # Initialize the RNG if the seed is manually passed - if seed is not None: + if seed == -1: + self._np_random, seed = seeding.np_random(None) + elif seed is not None: self._np_random, seed = seeding.np_random(seed) # TODO: remove kwarg mode with gym 1.0 diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index 9fe621ffaf9..c52959e670b 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -77,7 +77,7 @@ def check_reset_seed(env: gym.Env): seed_123_rng = deepcopy(env.unwrapped.np_random) # Note: for some environment, they may initialise at the same state, therefore we cannot check the obs_1 != obs_3 - obs_4 = env.reset(seed=None) + obs_4 = env.reset(seed=-1) assert obs_4 in env.observation_space assert ( From 654476ee08b9f6ccd765f4d6dec66ea26db84766 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Wed, 8 Jun 2022 17:56:43 +0100 Subject: [PATCH 02/22] Revert "fixed `gym.vector.make` where the checker was being applied in the opposite case than was intended to (#2871)" This reverts commit 519dfd9117e98e4f52d38064d2b0f79974fb676d. --- gym/vector/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gym/vector/__init__.py b/gym/vector/__init__.py index b71ec11940d..89c2a80b229 100644 --- a/gym/vector/__init__.py +++ b/gym/vector/__init__.py @@ -63,6 +63,7 @@ def _make_env(): return _make_env env_fns = [ - create_env(disable_env_checker or env_num > 0) for env_num in range(num_envs) + create_env(env_num == 0 and disable_env_checker is False) + for env_num in range(num_envs) ] return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns) From 2e5dc9c9a9fa42adbca908e664b185613eb4c98f Mon Sep 17 00:00:00 2001 From: StringTheory Date: Mon, 13 Jun 2022 12:29:06 +0100 Subject: [PATCH 03/22] Remove bad pushed commits --- gym/core.py | 4 +--- gym/utils/env_checker.py | 2 +- gym/vector/__init__.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/gym/core.py b/gym/core.py index 8bf926c970e..c3d06cd8801 100644 --- a/gym/core.py +++ b/gym/core.py @@ -178,9 +178,7 @@ def reset( the ``info`` returned by :meth:`step`. """ # Initialize the RNG if the seed is manually passed - if seed == -1: - self._np_random, seed = seeding.np_random(None) - elif seed is not None: + if seed is not None: self._np_random, seed = seeding.np_random(seed) # TODO: remove kwarg mode with gym 1.0 diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index c52959e670b..9fe621ffaf9 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -77,7 +77,7 @@ def check_reset_seed(env: gym.Env): seed_123_rng = deepcopy(env.unwrapped.np_random) # Note: for some environment, they may initialise at the same state, therefore we cannot check the obs_1 != obs_3 - obs_4 = env.reset(seed=-1) + obs_4 = env.reset(seed=None) assert obs_4 in env.observation_space assert ( diff --git a/gym/vector/__init__.py b/gym/vector/__init__.py index 89c2a80b229..b71ec11940d 100644 --- a/gym/vector/__init__.py +++ b/gym/vector/__init__.py @@ -63,7 +63,6 @@ def _make_env(): return _make_env env_fns = [ - create_env(env_num == 0 and disable_env_checker is False) - for env_num in range(num_envs) + create_env(disable_env_checker or env_num > 0) for env_num in range(num_envs) ] return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns) From 400b1e9f0277f8b80a1add71764c4c94c45a7c12 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 17 Jun 2022 11:27:27 +0100 Subject: [PATCH 04/22] Fixed spelling in core.py --- gym/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gym/core.py b/gym/core.py index 2b507322e45..931770d8cbe 100644 --- a/gym/core.py +++ b/gym/core.py @@ -231,7 +231,7 @@ def seed(self, seed=None): there aren't accidental correlations between multiple generators. Args: - seed(Optional int): The seed value for the random number geneartor + seed(Optional int): The seed value for the random number generator Returns: seeds (List[int]): Returns the list of seeds used in this environment's random From 4281c760af05bd08d3ef337700f309d8baf26219 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 17 Jun 2022 11:39:33 +0100 Subject: [PATCH 05/22] Pins pytest to the last py 3.6 version --- py.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py.Dockerfile b/py.Dockerfile index 0d4c290cb75..e7db5e8fe91 100644 --- a/py.Dockerfile +++ b/py.Dockerfile @@ -14,6 +14,6 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin COPY . /usr/local/gym/ WORKDIR /usr/local/gym/ -RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest mock ; else pip install .[testing] ; fi +RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest==6.2.5 ; else pip install .[testing] ; fi ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] From 5dee690aac30be083f2582b24b837cf85a334859 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 17 Jun 2022 14:19:58 +0100 Subject: [PATCH 06/22] Add support for action masking in Space.sample(mask=...) --- gym/envs/toy_text/taxi.py | 25 +++++++++++++++++++++++-- gym/spaces/box.py | 5 ++++- gym/spaces/dict.py | 14 +++++++++++++- gym/spaces/discrete.py | 11 ++++++++++- gym/spaces/graph.py | 8 +++++++- gym/spaces/multi_binary.py | 8 +++++++- gym/spaces/multi_discrete.py | 16 +++++++++++++++- gym/spaces/space.py | 2 +- gym/spaces/tuple.py | 12 ++++++++++-- py.Dockerfile | 2 +- 10 files changed, 91 insertions(+), 12 deletions(-) diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index abdccd8cab0..4a6b7930364 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -214,6 +214,22 @@ def decode(self, i): assert 0 <= i < 5 return reversed(out) + def valid_mask(self, row, col, pass_loc, dest_idx, max_row): + mask = np.zeros(6, dtype=bool) + if row < max_row: + mask[0] = 1 + if row > 0: + mask[1] = 1 + if self.desc[1 + row, 2 * col + 2] == b":": + mask[2] = 1 + if self.desc[1 + row, 2 * col] == b":": + mask[3] = 1 + if (row, col) == self.locs[pass_loc]: + mask[4] = 1 + if (row, col) == self.locs[dest_idx]: + mask[5] = 1 + return mask + def step(self, a): transitions = self.P[self.s][a] i = categorical_sample([t[0] for t in transitions], self.np_random) @@ -221,7 +237,10 @@ def step(self, a): self.s = s self.lastaction = a self.renderer.render_step() - return (int(s), r, d, {"prob": p}) + + taxi_row, taxi_col, pass_loc, dest_idx = self.decode(s) + mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4) + return int(s), r, d, {"prob": p, "action_mask": mask} def reset( self, @@ -239,7 +258,9 @@ def reset( if not return_info: return int(self.s) else: - return int(self.s), {"prob": 1} + taxi_row, taxi_col, pass_loc, dest_idx = self.decode(self.s) + mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4) + return int(self.s), {"prob": 1, "action_mask": mask} def render(self, mode="human"): if self.render_mode is not None: diff --git a/gym/spaces/box.py b/gym/spaces/box.py index e9b62c0a27d..f43bb57e3bd 100644 --- a/gym/spaces/box.py +++ b/gym/spaces/box.py @@ -146,7 +146,7 @@ def is_bounded(self, manner: str = "both") -> bool: else: raise ValueError("manner is not in {'below', 'above', 'both'}") - def sample(self) -> np.ndarray: + def sample(self, mask: np.ndarray = None) -> np.ndarray: r"""Generates a single random sample inside the Box. In creating a sample of the box, each coordinate is sampled (independently) from a distribution @@ -160,6 +160,9 @@ def sample(self) -> np.ndarray: Returns: A sampled value from the Box """ + if mask is not None: + return np.zeros(self.shape, self.dtype) + high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1 sample = np.empty(self.shape) diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py index 1aec7b0b01a..61e309c1495 100644 --- a/gym/spaces/dict.py +++ b/gym/spaces/dict.py @@ -1,6 +1,7 @@ """Implementation of a space that represents the cartesian product of other spaces as a dictionary.""" from collections import OrderedDict from collections.abc import Mapping, Sequence +from typing import Dict from typing import Dict as TypingDict from typing import Optional, Union @@ -137,7 +138,7 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list: return seeds - def sample(self) -> dict: + def sample(self, mask: Dict[str, np.ndarray] = None) -> dict: """Generates a single random sample from this space. The sample is an ordered dictionary of independent samples from the constituent spaces. @@ -145,6 +146,17 @@ def sample(self) -> dict: Returns: A dictionary with the same key and sampled values from :attr:`self.spaces` """ + if mask is not None: + assert isinstance( + mask, dict + ), f"Expects mask to be a dict, actual type: {type(dict)}" + assert ( + mask.keys == self.keys() + ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.keys()}" + return OrderedDict( + [(k, space.sample(mask[k])) for k, space in self.spaces.items()] + ) + return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()]) def contains(self, x) -> bool: diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py index f2ed1569ad1..6b32d971c80 100644 --- a/gym/spaces/discrete.py +++ b/gym/spaces/discrete.py @@ -40,7 +40,7 @@ def __init__( self.start = int(start) super().__init__((), np.int64, seed) - def sample(self) -> int: + def sample(self, mask: np.ndarray = None) -> int: """Generates a single random sample from this space. A sample will be chosen uniformly at random. @@ -48,6 +48,15 @@ def sample(self) -> int: Returns: A sampled integer from the space """ + if mask is not None: + assert isinstance(mask, np.ndarray) + assert mask.dtype == np.int8 + assert mask.shape == (self.n,) + if np.any(mask): + return int(self.start + self.np_random.choice(np.where(mask))) + else: + return self.start + return int(self.start + self.np_random.integers(self.n)) def contains(self, x) -> bool: diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index c26b7ff08d5..fc751c173f9 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -4,6 +4,7 @@ import numpy as np +import gym from gym.spaces.box import Box from gym.spaces.discrete import Discrete from gym.spaces.multi_discrete import MultiDiscrete @@ -98,12 +99,17 @@ def _sample_sample_space(self, sample_space) -> Optional[np.ndarray]: else: return None - def sample(self) -> NamedTuple: + def sample(self, mask=None) -> NamedTuple: """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. """ + if mask is not None: + raise gym.error.Error( + "Action masking for graphs are not supported at this time, please raise an issue on github." + ) + num_nodes = self.np_random.integers(low=1, high=10) # we only have edges when we have at least 2 nodes diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py index 1439536012b..5c93731072c 100644 --- a/gym/spaces/multi_binary.py +++ b/gym/spaces/multi_binary.py @@ -51,7 +51,7 @@ def shape(self) -> Tuple[int, ...]: """Has stricter type than gym.Space - never None.""" return self._shape # type: ignore - def sample(self) -> np.ndarray: + def sample(self, mask: np.ndarray = None) -> np.ndarray: """Generates a single random sample from this space. A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space). @@ -59,6 +59,12 @@ def sample(self) -> np.ndarray: Returns: Sampled values from space """ + if mask is not None: + assert isinstance(mask, np.ndarray) + assert mask.dtype == np.int8 + assert mask.shape == self.shape + return mask * self.np_random.integers(0, 2, self.n, self.dtype) + return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype) def contains(self, x) -> bool: diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index cb43420157b..465c57516c3 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -63,8 +63,22 @@ def shape(self) -> Tuple[int, ...]: """Has stricter type than :class:`gym.Space` - never None.""" return self._shape # type: ignore - def sample(self) -> np.ndarray: + def sample(self, mask: np.ndarray = None) -> np.ndarray: """Generates a single random sample this space.""" + if mask is not None: + assert isinstance(mask, np.ndarray) + assert mask.dtype == np.int8 + assert mask.shape == self.shape + + multi_mask = [np.where(row) for row in mask] + return np.array( + [ + self.np_random.choice(row_mask) if len(row_mask) > 0 else 0 + for row_mask in multi_mask + ], + dtype=self.dtype, + ) + return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype) def contains(self, x) -> bool: diff --git a/gym/spaces/space.py b/gym/spaces/space.py index 204a2dd4413..b5680e18942 100644 --- a/gym/spaces/space.py +++ b/gym/spaces/space.py @@ -81,7 +81,7 @@ def shape(self) -> Optional[Tuple[int, ...]]: """Return the shape of the space as an immutable property.""" return self._shape - def sample(self) -> T_cov: + def sample(self, mask=None) -> T_cov: """Randomly sample an element of this space. Can be uniform or non-uniform sampling based on boundedness of space.""" raise NotImplementedError diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py index 51eef635164..c8de59defc1 100644 --- a/gym/spaces/tuple.py +++ b/gym/spaces/tuple.py @@ -1,5 +1,5 @@ """Implementation of a space that represents the cartesian product of other spaces.""" -from typing import Iterable, List, Optional, Sequence, Union +from typing import Iterable, List, Optional, Sequence, Tuple, Union import numpy as np @@ -72,7 +72,7 @@ def seed(self, seed: Optional[Union[int, List[int]]] = None) -> list: return seeds - def sample(self) -> tuple: + def sample(self, mask: Tuple[np.ndarray] = None) -> tuple: """Generates a single random sample inside this space. This method draws independent samples from the subspaces. @@ -80,6 +80,14 @@ def sample(self) -> tuple: Returns: Tuple of the subspace's samples """ + if mask is not None: + assert isinstance(mask, tuple) + assert len(mask) == len(self.spaces) + return tuple( + space.sample(mask=sub_mask) + for space, sub_mask in zip(self.spaces, mask) + ) + return tuple(space.sample() for space in self.spaces) def contains(self, x) -> bool: diff --git a/py.Dockerfile b/py.Dockerfile index e7db5e8fe91..0d4c290cb75 100644 --- a/py.Dockerfile +++ b/py.Dockerfile @@ -14,6 +14,6 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin COPY . /usr/local/gym/ WORKDIR /usr/local/gym/ -RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest==6.2.5 ; else pip install .[testing] ; fi +RUN if [ python:$PYTHON_VERSION = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest mock ; else pip install .[testing] ; fi ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] From bc6ab4a729627393aff90368220d4c35424807c1 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 17 Jun 2022 15:28:28 +0100 Subject: [PATCH 07/22] Fix action mask --- gym/envs/toy_text/taxi.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index 4a6b7930364..6287a7b0972 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -214,19 +214,20 @@ def decode(self, i): assert 0 <= i < 5 return reversed(out) - def valid_mask(self, row, col, pass_loc, dest_idx, max_row): + def action_mask(self, row, col, pass_loc, dest_idx): + """Computes an action mask for the action space using the state information.""" mask = np.zeros(6, dtype=bool) - if row < max_row: + if row < 5: mask[0] = 1 if row > 0: mask[1] = 1 - if self.desc[1 + row, 2 * col + 2] == b":": + if col < 5 and self.desc[1 + row, 2 * col + 2] == b":": mask[2] = 1 - if self.desc[1 + row, 2 * col] == b":": + if col > 0 and self.desc[1 + row, 2 * col] == b":": mask[3] = 1 - if (row, col) == self.locs[pass_loc]: + if pass_loc < 4 and (row, col) == self.locs[pass_loc]: mask[4] = 1 - if (row, col) == self.locs[dest_idx]: + if pass_loc == 4 and (row, col) == self.locs[dest_idx]: mask[5] = 1 return mask @@ -239,7 +240,7 @@ def step(self, a): self.renderer.render_step() taxi_row, taxi_col, pass_loc, dest_idx = self.decode(s) - mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4) + mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx) return int(s), r, d, {"prob": p, "action_mask": mask} def reset( @@ -259,7 +260,7 @@ def reset( return int(self.s) else: taxi_row, taxi_col, pass_loc, dest_idx = self.decode(self.s) - mask = self.valid_mask(taxi_row, taxi_col, pass_loc, dest_idx, 4) + mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx) return int(self.s), {"prob": 1, "action_mask": mask} def render(self, mode="human"): From 1700e9d63dd40b9537bedb0cb85132a98c61e61a Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 17 Jun 2022 17:41:05 +0100 Subject: [PATCH 08/22] Fix action_mask --- gym/envs/toy_text/taxi.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index 6287a7b0972..12f12e15892 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -214,20 +214,24 @@ def decode(self, i): assert 0 <= i < 5 return reversed(out) - def action_mask(self, row, col, pass_loc, dest_idx): + def action_mask(self, state: int): """Computes an action mask for the action space using the state information.""" - mask = np.zeros(6, dtype=bool) - if row < 5: + mask = np.zeros(6, dtype=np.int8) + taxi_row, taxi_col, pass_loc, dest_idx = self.decode(state) + if taxi_row < 4: mask[0] = 1 - if row > 0: + if taxi_row > 0: mask[1] = 1 - if col < 5 and self.desc[1 + row, 2 * col + 2] == b":": + if taxi_col < 4 and self.desc[taxi_row + 1, 2 * taxi_col + 2] == b":": mask[2] = 1 - if col > 0 and self.desc[1 + row, 2 * col] == b":": + if taxi_col > 0 and self.desc[taxi_row + 1, 2 * taxi_col] == b":": mask[3] = 1 - if pass_loc < 4 and (row, col) == self.locs[pass_loc]: + if pass_loc < 4 and (taxi_row, taxi_col) == self.locs[pass_loc]: mask[4] = 1 - if pass_loc == 4 and (row, col) == self.locs[dest_idx]: + if pass_loc == 4 and ( + (taxi_row, taxi_col) == self.locs[dest_idx] + or (taxi_row, taxi_col) in self.locs + ): mask[5] = 1 return mask @@ -239,9 +243,7 @@ def step(self, a): self.lastaction = a self.renderer.render_step() - taxi_row, taxi_col, pass_loc, dest_idx = self.decode(s) - mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx) - return int(s), r, d, {"prob": p, "action_mask": mask} + return int(s), r, d, {"prob": p, "action_mask": self.action_mask(s)} def reset( self, @@ -259,9 +261,7 @@ def reset( if not return_info: return int(self.s) else: - taxi_row, taxi_col, pass_loc, dest_idx = self.decode(self.s) - mask = self.action_mask(taxi_row, taxi_col, pass_loc, dest_idx) - return int(self.s), {"prob": 1, "action_mask": mask} + return int(self.s), {"prob": 1, "action_mask": self.action_mask(self.s)} def render(self, mode="human"): if self.render_mode is not None: From 7f46df2db4ce00e22309ec2a9eccf18eef840bdc Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 17 Jun 2022 17:41:25 +0100 Subject: [PATCH 09/22] Fix action_mask --- tests/envs/test_env_implementation.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py index 844d422c12a..7a088d4698c 100644 --- a/tests/envs/test_env_implementation.py +++ b/tests/envs/test_env_implementation.py @@ -3,6 +3,7 @@ import gym from gym.envs.box2d import BipedalWalker from gym.envs.box2d.lunar_lander import demo_heuristic_lander +from gym.envs.toy_text import TaxiEnv from gym.envs.toy_text.frozen_lake import generate_random_map @@ -80,3 +81,24 @@ def test_frozenlake_dfs_map_generation(map_size: int): if new_frozenlake[new_row][new_col] not in "#H": frontier.append((new_row, new_col)) raise AssertionError("No path through the frozenlake was found.") + + +def test_taxi_action_mask(): + env = TaxiEnv() + + for state in env.P: + mask = env.action_mask(state) + for action, possible in enumerate(mask): + _, next_state, _, _ = env.P[state][action][0] + assert state != next_state if possible else state == next_state + + +def test_taxi_encode_decode(): + env = TaxiEnv() + + state = env.reset() + for _ in range(100): + assert ( + env.encode(*env.decode(state)) == state + ), f"{state=}, encode decode: {env.encode(*env.decode(state))}" + state, _, _, _ = env.step(env.action_space.sample()) From cd910072066d16744178d96197068ed4c009ea11 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Sun, 19 Jun 2022 18:15:41 +0100 Subject: [PATCH 10/22] Added docstrings, fixed bugs and added taxi examples --- gym/envs/toy_text/taxi.py | 8 +++- gym/spaces/box.py | 10 ++++- gym/spaces/dict.py | 12 +++--- gym/spaces/discrete.py | 26 ++++++++---- gym/spaces/graph.py | 34 ++++++++-------- gym/spaces/multi_binary.py | 21 ++++++++-- gym/spaces/multi_discrete.py | 51 +++++++++++++++--------- gym/spaces/space.py | 14 ++++++- gym/spaces/tuple.py | 14 +++++-- tests/spaces/test_spaces.py | 77 +++++++++++++++++++++++++++--------- 10 files changed, 191 insertions(+), 76 deletions(-) diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index 12f12e15892..ce831b095fb 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -56,6 +56,12 @@ class TaxiEnv(Env): - 4: pickup passenger - 5: drop off passenger + For some cases, taking these actions will have no effect on the state of the agent. + In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying + if the action will change the state. + To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])`` + Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``. + ### Observations There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the @@ -99,7 +105,7 @@ class TaxiEnv(Env): ``` ### Version History - * v3: Map Correction + Cleaner Domain Description + * v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information * v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold. * v1: Remove (3,2) from locs, add passidx<4 check * v0: Initial versions release diff --git a/gym/spaces/box.py b/gym/spaces/box.py index f43bb57e3bd..35fbeb2d258 100644 --- a/gym/spaces/box.py +++ b/gym/spaces/box.py @@ -3,6 +3,7 @@ import numpy as np +import gym.error from gym import logger from gym.spaces.space import Space from gym.utils import seeding @@ -146,7 +147,7 @@ def is_bounded(self, manner: str = "both") -> bool: else: raise ValueError("manner is not in {'below', 'above', 'both'}") - def sample(self, mask: np.ndarray = None) -> np.ndarray: + def sample(self, mask: None = None) -> np.ndarray: r"""Generates a single random sample inside the Box. In creating a sample of the box, each coordinate is sampled (independently) from a distribution @@ -157,11 +158,16 @@ def sample(self, mask: np.ndarray = None) -> np.ndarray: * :math:`(-\infty, b]` : shifted negative exponential distribution * :math:`(-\infty, \infty)` : normal distribution + Args: + mask: A mask for sampling values from the Box space, currently unsupported. + Returns: A sampled value from the Box """ if mask is not None: - return np.zeros(self.shape, self.dtype) + raise gym.error.Error( + f"Box.sample cannot be provided a mask, actual value: {mask}" + ) high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1 sample = np.empty(self.shape) diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py index 61e309c1495..ec668e44347 100644 --- a/gym/spaces/dict.py +++ b/gym/spaces/dict.py @@ -1,7 +1,6 @@ """Implementation of a space that represents the cartesian product of other spaces as a dictionary.""" from collections import OrderedDict from collections.abc import Mapping, Sequence -from typing import Dict from typing import Dict as TypingDict from typing import Optional, Union @@ -138,21 +137,24 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list: return seeds - def sample(self, mask: Dict[str, np.ndarray] = None) -> dict: + def sample(self, mask: Optional[TypingDict[str, np.ndarray]] = None) -> dict: """Generates a single random sample from this space. The sample is an ordered dictionary of independent samples from the constituent spaces. + Args: + mask: An optional mask for each of the subspaces, expects the same keys as the space + Returns: A dictionary with the same key and sampled values from :attr:`self.spaces` """ if mask is not None: assert isinstance( mask, dict - ), f"Expects mask to be a dict, actual type: {type(dict)}" + ), f"Expects mask to be a dict, actual type: {type(mask)}" assert ( - mask.keys == self.keys() - ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.keys()}" + mask.keys() == self.spaces.keys() + ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.spaces.keys()}" return OrderedDict( [(k, space.sample(mask[k])) for k, space in self.spaces.items()] ) diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py index 6b32d971c80..be9361194de 100644 --- a/gym/spaces/discrete.py +++ b/gym/spaces/discrete.py @@ -40,20 +40,32 @@ def __init__( self.start = int(start) super().__init__((), np.int64, seed) - def sample(self, mask: np.ndarray = None) -> int: + def sample(self, mask: Optional[np.ndarray] = None) -> int: """Generates a single random sample from this space. - A sample will be chosen uniformly at random. + A sample will be chosen uniformly at random with the mask if provided + + Args: + mask: An optional mask for if an action can be selected. Expected shape is (n,). If not possible actions, will default to `space.start` Returns: A sampled integer from the space """ if mask is not None: - assert isinstance(mask, np.ndarray) - assert mask.dtype == np.int8 - assert mask.shape == (self.n,) - if np.any(mask): - return int(self.start + self.np_random.choice(np.where(mask))) + assert isinstance( + mask, np.ndarray + ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" + assert ( + mask.dtype == np.int8 + ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" + assert mask.shape == ( + self.n, + ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}" + assert np.all( + np.logical_or(mask == 0, mask == 1) + ), f"All values of a mask should be 0 or 1, actual values: {mask}" + if np.any(mask == 1): + return int(self.start + self.np_random.choice(np.where(mask)[0])) else: return self.start diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index fc751c173f9..44bb58f95e4 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -1,10 +1,9 @@ """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space.""" from collections import namedtuple -from typing import NamedTuple, Optional, Sequence, Union +from typing import NamedTuple, Optional, Sequence, Tuple, Union import numpy as np -import gym from gym.spaces.box import Box from gym.spaces.discrete import Discrete from gym.spaces.multi_discrete import MultiDiscrete @@ -93,23 +92,18 @@ def _generate_sample_space( f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error." ) - def _sample_sample_space(self, sample_space) -> Optional[np.ndarray]: - if sample_space is not None: - return sample_space.sample() - else: - return None - - def sample(self, mask=None) -> NamedTuple: + def sample( + self, mask: Optional[Tuple[Optional[np.ndarray], Optional[np.ndarray]]] = None + ) -> NamedTuple: """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. + Args: + mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces) + Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. """ - if mask is not None: - raise gym.error.Error( - "Action masking for graphs are not supported at this time, please raise an issue on github." - ) - + node_mask, edge_mask = mask if mask is not None else (None, None) num_nodes = self.np_random.integers(low=1, high=10) # we only have edges when we have at least 2 nodes @@ -121,8 +115,16 @@ def sample(self, mask=None) -> NamedTuple: node_sample_space = self._generate_sample_space(self.node_space, num_nodes) edge_sample_space = self._generate_sample_space(self.edge_space, num_edges) - sampled_nodes = self._sample_sample_space(node_sample_space) - sampled_edges = self._sample_sample_space(edge_sample_space) + sampled_nodes = ( + node_sample_space.sample(node_mask) + if node_sample_space is not None + else None + ) + sampled_edges = ( + edge_sample_space.sample(edge_mask) + if edge_sample_space is not None + else None + ) sampled_edge_links = None if sampled_edges is not None and num_edges > 0: diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py index 5c93731072c..dd1a62c11e5 100644 --- a/gym/spaces/multi_binary.py +++ b/gym/spaces/multi_binary.py @@ -51,18 +51,31 @@ def shape(self) -> Tuple[int, ...]: """Has stricter type than gym.Space - never None.""" return self._shape # type: ignore - def sample(self, mask: np.ndarray = None) -> np.ndarray: + def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: """Generates a single random sample from this space. A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space). + Args: + mask: An optional np.ndarray to mask samples, where mask == 0 will have samples == 0 + Returns: Sampled values from space """ if mask is not None: - assert isinstance(mask, np.ndarray) - assert mask.dtype == np.int8 - assert mask.shape == self.shape + assert isinstance( + mask, np.ndarray + ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" + assert ( + mask.dtype == np.int8 + ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" + assert ( + mask.shape == self.shape + ), f"The expected shape of the mask is {self.shape}, actual shape: {mask.shape}" + assert np.all( + np.logical_or(mask == 0, mask == 1) + ), f"All values of a mask should be 0 or 1, actual values: {mask}" + return mask * self.np_random.integers(0, 2, self.n, self.dtype) return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype) diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index 465c57516c3..5eec3a30437 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -23,8 +23,17 @@ class MultiDiscrete(Space[np.ndarray]): 2. Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 3. Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 - It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` + It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample is array([3, 1, 0]) + Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes + if ``nvec`` has several axes: + + Example:: + + >> d = MultiDiscrete(np.array([[1, 2], [3, 4]])) + >> d.sample() + array([[0, 0], + [2, 3]]) """ def __init__( @@ -37,16 +46,6 @@ def __init__( The argument ``nvec`` will determine the number of values each categorical variable can take. - Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes - if ``nvec`` has several axes: - - Example:: - - >> d = MultiDiscrete(np.array([[1, 2], [3, 4]])) - >> d.sample() - array([[0, 0], - [2, 3]]) - Args: nvec: vector of counts of each categorical variable. This will usually be a list of integers. However, you may also pass a more complicated numpy array if you'd like the space to have several axes. @@ -63,14 +62,30 @@ def shape(self) -> Tuple[int, ...]: """Has stricter type than :class:`gym.Space` - never None.""" return self._shape # type: ignore - def sample(self, mask: np.ndarray = None) -> np.ndarray: - """Generates a single random sample this space.""" - if mask is not None: - assert isinstance(mask, np.ndarray) - assert mask.dtype == np.int8 - assert mask.shape == self.shape + def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: + """Generates a single random sample this space. + + Args: + mask: An optional mask for multi-discrete, expected shape is `space.nvec`. If there are no possible actions, defaults to 0 - multi_mask = [np.where(row) for row in mask] + Returns: + An np.ndarray of shape `space.shape` + """ + if mask is not None: + assert isinstance( + mask, np.ndarray + ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" + assert ( + mask.dtype == np.int8 + ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" + assert np.all( + mask.shape == self.nvec + ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently." + assert np.all( + np.logical_or(mask == 0, mask == 1) + ), f"All values of a mask should be 0 or 1, actual values: {mask}" + + multi_mask = [np.where(row)[0] for row in mask] return np.array( [ self.np_random.choice(row_mask) if len(row_mask) > 0 else 0 diff --git a/gym/spaces/space.py b/gym/spaces/space.py index b5680e18942..3b006228b9e 100644 --- a/gym/spaces/space.py +++ b/gym/spaces/space.py @@ -1,6 +1,7 @@ """Implementation of the `Space` metaclass.""" from typing import ( + Any, Generic, Iterable, List, @@ -81,8 +82,17 @@ def shape(self) -> Optional[Tuple[int, ...]]: """Return the shape of the space as an immutable property.""" return self._shape - def sample(self, mask=None) -> T_cov: - """Randomly sample an element of this space. Can be uniform or non-uniform sampling based on boundedness of space.""" + def sample(self, mask: Optional[Any] = None) -> T_cov: + """Randomly sample an element of this space. + + Can be uniform or non-uniform sampling based on boundedness of space. + + Args: + mask: A mask used for sampling, see Space for implementation details. + + Returns: + A sampled actions from the space + """ raise NotImplementedError def seed(self, seed: Optional[int] = None) -> list: diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py index c8de59defc1..57bc6b40cab 100644 --- a/gym/spaces/tuple.py +++ b/gym/spaces/tuple.py @@ -72,17 +72,25 @@ def seed(self, seed: Optional[Union[int, List[int]]] = None) -> list: return seeds - def sample(self, mask: Tuple[np.ndarray] = None) -> tuple: + def sample(self, mask: Optional[Tuple[Optional[np.ndarray]]] = None) -> tuple: """Generates a single random sample inside this space. This method draws independent samples from the subspaces. + Args: + mask: An optional tuple of optional masks for each of the subspace's samples, expects the same number of masks as spaces + Returns: Tuple of the subspace's samples """ if mask is not None: - assert isinstance(mask, tuple) - assert len(mask) == len(self.spaces) + assert isinstance( + mask, tuple + ), f"Expected type of mask is tuple, actual type: {type(mask)}" + assert len(mask) == len( + self.spaces + ), f"Expected length of mask is {len(self.spaces)}, actual length: {len(mask)}" + return tuple( space.sample(mask=sub_mask) for space, sub_mask in zip(self.spaces, mask) diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index fbc98aa1fae..fded6654afe 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from gym import Space from gym.spaces import Box, Dict, Discrete, Graph, MultiBinary, MultiDiscrete, Tuple @@ -154,31 +155,71 @@ def test_inequality(spaces): [ Discrete(5), Discrete(8, start=-20), - Box(low=0, high=255, shape=(2,), dtype="uint8"), + Box(low=0, high=255, shape=(2,)), Box(low=-np.inf, high=np.inf, shape=(3, 3)), Box(low=1.0, high=np.inf, shape=(3, 3)), Box(low=-np.inf, high=2.0, shape=(3, 3)), + Box(low=np.array([0, 2]), high=np.array([10, 4])), + MultiDiscrete([3, 5]), + MultiDiscrete(np.array([[3, 5], [2, 2]])), + MultiBinary([2, 4]), ], ) -def test_sample(space): +def test_sample(space: Space, n_trials: int = 1_000): + """Test the space sample works, todo, add chi-squared testing for the distribution""" space.seed(0) - n_trials = 100 samples = np.array([space.sample() for _ in range(n_trials)]) - expected_mean = 0.0 - if isinstance(space, Box): - if space.is_bounded(): - expected_mean = (space.high + space.low) / 2 - elif space.is_bounded("below"): - expected_mean = 1 + space.low - elif space.is_bounded("above"): - expected_mean = -1 + space.high - else: - expected_mean = 0.0 - elif isinstance(space, Discrete): - expected_mean = space.start + space.n / 2 - else: - raise NotImplementedError - np.testing.assert_allclose(expected_mean, samples.mean(), atol=3.0 * samples.std()) + assert len(samples) == n_trials + + +@pytest.mark.parametrize( + "space,mask", + [ + (Discrete(5), np.array([0, 1, 1, 0, 1], dtype=np.int8)), + (Discrete(4, start=-20), np.array([1, 1, 0, 1], dtype=np.int8)), + (Discrete(4, start=1), np.array([0, 0, 0, 0], dtype=np.int8)), + (MultiDiscrete([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)), + # (MultiDiscrete(np.array([[3, 2], [2, 2]])), np.array([[[0, 1], [1, 1], [0, 0]], [[0, 1], [1, 1]]], dtype=np.int8)), Unsupported currently + (MultiBinary([2, 4]), np.array([[1, 1, 0, 0], [0, 0, 0, 0]], dtype=np.int8)), + ], +) +def test_space_sample_mask(space, mask, n_trials: int = 100): + """Test the space sample with mask works, todo, add chi-squared testing for the distribution""" + space.seed(0) + samples = np.array([space.sample(mask) for _ in range(n_trials)]) + assert len(samples) == n_trials + + +@pytest.mark.parametrize( + "space,mask", + [ + ( + Dict(a=Discrete(2), b=MultiDiscrete([2, 4])), + { + "a": np.array([0, 1], dtype=np.int8), + "b": np.array([[0, 0, 0, 0], [1, 1, 1, 0]], dtype=np.int8), + }, + ), + ( + Tuple([Box(0, 1, ()), Discrete(3), MultiBinary([2, 1])]), + ( + None, + np.array([0, 1, 0], dtype=np.int8), + np.array([[0], [1]], dtype=np.int8), + ), + ), + ( + Dict(a=Tuple([Box(0, 1, ()), Discrete(3)]), b=Discrete(3)), + { + "a": (None, np.array([1, 0, 0], dtype=np.int8)), + "b": np.array([0, 1, 1], dtype=np.int8), + }, + ), + ], +) +def test_composite_space_sample_mask(space, mask): + """Test that composite space samples use the mask correctly.""" + space.sample(mask) @pytest.mark.parametrize( From be4063efbb74d285f357a15e8b3c08c58b705ee3 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Sun, 19 Jun 2022 22:47:50 +0100 Subject: [PATCH 11/22] Fixed bugs --- gym/envs/toy_text/taxi.py | 22 ++++++++++----- gym/spaces/multi_discrete.py | 40 +++++++++++++++------------ tests/envs/test_env_implementation.py | 2 +- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index ce831b095fb..ba578b3846e 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -56,12 +56,6 @@ class TaxiEnv(Env): - 4: pickup passenger - 5: drop off passenger - For some cases, taking these actions will have no effect on the state of the agent. - In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying - if the action will change the state. - To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])`` - Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``. - ### Observations There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the @@ -93,6 +87,20 @@ class TaxiEnv(Env): - 2: Y(ellow) - 3: B(lue) + ### Info + + ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask". + + As Taxi is a stochastic environment for transitions then the "p" key represents the probability of the + transition. However, this value is permanently 1.0 for an unknown reason. + + For some cases, taking these actions will have no effect on the state of the agent. + In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying + if the action will change the state. + + To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])`` + Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``. + ### Rewards - -1 per step unless other reward is triggered. - +20 delivering passenger. @@ -267,7 +275,7 @@ def reset( if not return_info: return int(self.s) else: - return int(self.s), {"prob": 1, "action_mask": self.action_mask(self.s)} + return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)} def render(self, mode="human"): if self.render_mode is not None: diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index 5eec3a30437..06b381aa578 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -3,6 +3,7 @@ import numpy as np +import gym from gym import logger from gym.spaces.discrete import Discrete from gym.spaces.space import Space @@ -75,24 +76,27 @@ def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: assert isinstance( mask, np.ndarray ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" - assert ( - mask.dtype == np.int8 - ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" - assert np.all( - mask.shape == self.nvec - ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently." - assert np.all( - np.logical_or(mask == 0, mask == 1) - ), f"All values of a mask should be 0 or 1, actual values: {mask}" - - multi_mask = [np.where(row)[0] for row in mask] - return np.array( - [ - self.np_random.choice(row_mask) if len(row_mask) > 0 else 0 - for row_mask in multi_mask - ], - dtype=self.dtype, - ) + if self.nvec.ndim == 1: + assert ( + mask.dtype == np.int8 + ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" + assert np.all( + mask.shape == self.nvec + ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently." + assert np.all( + np.logical_or(mask == 0, mask == 1) + ), f"All values of a mask should be 0 or 1, actual values: {mask}" + + multi_mask = [np.where(row)[0] for row in mask] + return np.array( + [ + self.np_random.choice(row_mask) if len(row_mask) > 0 else 0 + for row_mask in multi_mask + ], + dtype=self.dtype, + ) + else: + raise gym.error.Error() return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype) diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py index 7a088d4698c..248b36199f5 100644 --- a/tests/envs/test_env_implementation.py +++ b/tests/envs/test_env_implementation.py @@ -100,5 +100,5 @@ def test_taxi_encode_decode(): for _ in range(100): assert ( env.encode(*env.decode(state)) == state - ), f"{state=}, encode decode: {env.encode(*env.decode(state))}" + ), f"state={state}, encode(decode(state))={env.encode(*env.decode(state))}" state, _, _, _ = env.step(env.action_space.sample()) From 2f14eb7a455ad676d5d4f6128ac5481359592c9f Mon Sep 17 00:00:00 2001 From: StringTheory Date: Mon, 20 Jun 2022 14:26:35 +0100 Subject: [PATCH 12/22] Add tests for sample --- tests/spaces/test_spaces.py | 105 +++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index fded6654afe..101d22bb5d9 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -150,6 +150,37 @@ def test_inequality(spaces): assert space1 != space2, f"Expected {space1} != {space2}" +# The expected sum of variance for an alpha of 0.05 +# CHI_SQUARED = [0] + [scipy.stats.chi2.isf(0.05, df=df) for df in range(1, 25)] +CHI_SQUARED = [ + 0, + 3.8414588206941285, + 5.991464547107983, + 7.814727903251178, + 9.487729036781158, + 11.070497693516355, + 12.59158724374398, + 14.067140449340167, + 15.507313055865454, + 16.91897760462045, + 18.30703805327515, + 19.67513757268249, + 21.02606981748307, + 22.362032494826945, + 23.684791304840576, + 24.99579013972863, + 26.296227604864242, + 27.587111638275335, + 28.869299430392637, + 30.143527205646155, + 31.41043284423092, + 32.670573340917315, + 33.92443847144379, + 35.17246162690807, + 36.415028501807306, +] + + @pytest.mark.parametrize( "space", [ @@ -166,11 +197,83 @@ def test_inequality(spaces): ], ) def test_sample(space: Space, n_trials: int = 1_000): - """Test the space sample works, todo, add chi-squared testing for the distribution""" + """Test the space sample has the expected distribution with the chi-squared test and KS test. + + Example code with scipy.stats.chisquared + + import scipy.stats + variance = np.sum(np.square(observed_frequency - expected_frequency) / expected_frequency) + f'X2 at alpha=0.05 = {scipy.stats.chi2.isf(0.05, df=4)}' + f'p-value = {scipy.stats.chi2.sf(variance, df=4)}' + scipy.stats.chisquare(f_obs=observed_frequency) + """ space.seed(0) samples = np.array([space.sample() for _ in range(n_trials)]) assert len(samples) == n_trials + if isinstance(space, Discrete): + expected_frequency = np.ones(space.n) * n_trials / space.n + observed_frequency = np.zeros(space.n) + for sample in samples: + observed_frequency[sample - space.start] += 1 + degrees_of_freedom = space.n - 1 + + assert observed_frequency.shape == expected_frequency.shape + assert np.sum(observed_frequency) == n_trials + + variance = np.sum( + np.square(expected_frequency - observed_frequency) / expected_frequency + ) + assert variance < CHI_SQUARED[degrees_of_freedom] + elif isinstance(space, MultiBinary): + expected_frequency = n_trials / 2 + observed_frequency = np.sum(samples, axis=0) + assert observed_frequency.shape == space.shape + + variance = ( + 2 * np.square(observed_frequency - expected_frequency) / expected_frequency + ) + assert variance.shape == space.shape + assert np.all(variance < CHI_SQUARED[1]) + elif isinstance(space, MultiDiscrete): + # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes + def _generate_frequency(dim, func): + if isinstance(dim, np.ndarray): + print(dim) + return np.array( + [_generate_frequency(sub_dim, func) for sub_dim in dim], + dtype=object, + ) + else: + return func(dim) + + def _update_observed_frequency(obs_sample, obs_freq): + if isinstance(obs_sample, np.ndarray): + for sub_sample, sub_freq in zip(obs_sample, obs_freq): + _update_observed_frequency(sub_sample, sub_freq) + else: + obs_freq[obs_sample] += 1 + + expected_frequency = _generate_frequency( + space.nvec, lambda dim: np.ones(dim) * n_trials / dim + ) + observed_frequency = _generate_frequency(space.nvec, lambda dim: np.zeros(dim)) + for sample in samples: + _update_observed_frequency(sample, observed_frequency) + + def _chi_squared_test(dim, exp_freq, obs_freq): + if isinstance(dim, np.ndarray): + for sub_dim, sub_exp_freq, sub_obs_freq in zip(dim, exp_freq, obs_freq): + _chi_squared_test(sub_dim, sub_exp_freq, sub_obs_freq) + else: + assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,) + assert np.sum(obs_freq) == n_trials + _variance = np.sum(np.square(exp_freq - obs_freq) / exp_freq) + _degrees_of_freedom = dim - 1 + assert _variance < CHI_SQUARED[_degrees_of_freedom] + + _chi_squared_test(space.nvec, expected_frequency, observed_frequency) + @pytest.mark.parametrize( "space,mask", From f52d5d58e21fb4fd5e01061d688860bdfda13e8e Mon Sep 17 00:00:00 2001 From: StringTheory Date: Mon, 20 Jun 2022 23:00:03 +0100 Subject: [PATCH 13/22] Add docstrings and test space sample mask Discrete and MultiBinary --- gym/spaces/discrete.py | 12 +++-- gym/spaces/graph.py | 3 +- gym/spaces/multi_binary.py | 7 ++- gym/spaces/multi_discrete.py | 31 ++---------- gym/spaces/space.py | 2 +- gym/spaces/tuple.py | 3 +- tests/spaces/test_spaces.py | 96 ++++++++++++++++++++++-------------- 7 files changed, 80 insertions(+), 74 deletions(-) diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py index be9361194de..da05979b02a 100644 --- a/gym/spaces/discrete.py +++ b/gym/spaces/discrete.py @@ -46,7 +46,8 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int: A sample will be chosen uniformly at random with the mask if provided Args: - mask: An optional mask for if an action can be selected. Expected shape is (n,). If not possible actions, will default to `space.start` + mask: An optional mask for if an action can be selected. Expected shape is (n,). + If there are no possible actions, will default to `space.start`. Returns: A sampled integer from the space @@ -61,11 +62,14 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int: assert mask.shape == ( self.n, ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}" + valid_action_mask = mask == 1 assert np.all( - np.logical_or(mask == 0, mask == 1) + np.logical_or(mask == 0, valid_action_mask) ), f"All values of a mask should be 0 or 1, actual values: {mask}" - if np.any(mask == 1): - return int(self.start + self.np_random.choice(np.where(mask)[0])) + if np.any(valid_action_mask): + return int( + self.start + self.np_random.choice(np.where(valid_action_mask)[0]) + ) else: return self.start diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index 44bb58f95e4..4705633ddb9 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -98,7 +98,8 @@ def sample( """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. Args: - mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces) + mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces). + The expected shape for the node mask is ``node_space.n`` and edge mask is ``edge_space.n``. Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py index dd1a62c11e5..6662b912274 100644 --- a/gym/spaces/multi_binary.py +++ b/gym/spaces/multi_binary.py @@ -57,7 +57,8 @@ def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space). Args: - mask: An optional np.ndarray to mask samples, where mask == 0 will have samples == 0 + mask: An optional np.ndarray to mask samples with expected shape of ``space.shape``. + Where mask == 0 then the samples will be 0. Returns: Sampled values from space @@ -76,7 +77,9 @@ def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: np.logical_or(mask == 0, mask == 1) ), f"All values of a mask should be 0 or 1, actual values: {mask}" - return mask * self.np_random.integers(0, 2, self.n, self.dtype) + return mask * self.np_random.integers( + low=0, high=2, size=self.n, dtype=self.dtype + ) return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype) diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index 06b381aa578..d9754a5e740 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -3,7 +3,6 @@ import numpy as np -import gym from gym import logger from gym.spaces.discrete import Discrete from gym.spaces.space import Space @@ -63,40 +62,18 @@ def shape(self) -> Tuple[int, ...]: """Has stricter type than :class:`gym.Space` - never None.""" return self._shape # type: ignore - def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: + def sample(self, mask: Optional[Tuple[np.ndarray, ...]] = None) -> np.ndarray: """Generates a single random sample this space. Args: - mask: An optional mask for multi-discrete, expected shape is `space.nvec`. If there are no possible actions, defaults to 0 + mask: An optional mask for multi-discrete, expected shape is `space.nvec` however for multi-axis nvec then + we expect np.ndarray dtype=object. If there are no possible actions, defaults to 0 Returns: An np.ndarray of shape `space.shape` """ if mask is not None: - assert isinstance( - mask, np.ndarray - ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" - if self.nvec.ndim == 1: - assert ( - mask.dtype == np.int8 - ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" - assert np.all( - mask.shape == self.nvec - ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently." - assert np.all( - np.logical_or(mask == 0, mask == 1) - ), f"All values of a mask should be 0 or 1, actual values: {mask}" - - multi_mask = [np.where(row)[0] for row in mask] - return np.array( - [ - self.np_random.choice(row_mask) if len(row_mask) > 0 else 0 - for row_mask in multi_mask - ], - dtype=self.dtype, - ) - else: - raise gym.error.Error() + pass return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype) diff --git a/gym/spaces/space.py b/gym/spaces/space.py index 3b006228b9e..5d7dea6f9a5 100644 --- a/gym/spaces/space.py +++ b/gym/spaces/space.py @@ -88,7 +88,7 @@ def sample(self, mask: Optional[Any] = None) -> T_cov: Can be uniform or non-uniform sampling based on boundedness of space. Args: - mask: A mask used for sampling, see Space for implementation details. + mask: A mask used for sampling, expected ``dtype=np.int8`` and see sample implementation for expected shape. Returns: A sampled actions from the space diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py index 57bc6b40cab..10b4344ef87 100644 --- a/gym/spaces/tuple.py +++ b/gym/spaces/tuple.py @@ -78,7 +78,8 @@ def sample(self, mask: Optional[Tuple[Optional[np.ndarray]]] = None) -> tuple: This method draws independent samples from the subspaces. Args: - mask: An optional tuple of optional masks for each of the subspace's samples, expects the same number of masks as spaces + mask: An optional tuple of optional masks for each of the subspace's samples, + expects the same number of masks as spaces Returns: Tuple of the subspace's samples diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index 101d22bb5d9..7d44134e0c5 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -152,47 +152,35 @@ def test_inequality(spaces): # The expected sum of variance for an alpha of 0.05 # CHI_SQUARED = [0] + [scipy.stats.chi2.isf(0.05, df=df) for df in range(1, 25)] -CHI_SQUARED = [ - 0, - 3.8414588206941285, - 5.991464547107983, - 7.814727903251178, - 9.487729036781158, - 11.070497693516355, - 12.59158724374398, - 14.067140449340167, - 15.507313055865454, - 16.91897760462045, - 18.30703805327515, - 19.67513757268249, - 21.02606981748307, - 22.362032494826945, - 23.684791304840576, - 24.99579013972863, - 26.296227604864242, - 27.587111638275335, - 28.869299430392637, - 30.143527205646155, - 31.41043284423092, - 32.670573340917315, - 33.92443847144379, - 35.17246162690807, - 36.415028501807306, -] +CHI_SQUARED = np.array( + [ + 0.01, + 3.8414588206941285, + 5.991464547107983, + 7.814727903251178, + 9.487729036781158, + 11.070497693516355, + 12.59158724374398, + 14.067140449340167, + 15.507313055865454, + 16.91897760462045, + ] +) @pytest.mark.parametrize( "space", [ + Discrete(1), Discrete(5), Discrete(8, start=-20), - Box(low=0, high=255, shape=(2,)), - Box(low=-np.inf, high=np.inf, shape=(3, 3)), - Box(low=1.0, high=np.inf, shape=(3, 3)), - Box(low=-np.inf, high=2.0, shape=(3, 3)), + Box(low=0, high=255, shape=(2,), dtype=np.uint8), + Box(low=-np.inf, high=np.inf, shape=(3,)), + Box(low=1.0, high=np.inf, shape=(3,)), + Box(low=-np.inf, high=2.0, shape=(3,)), Box(low=np.array([0, 2]), high=np.array([10, 4])), MultiDiscrete([3, 5]), - MultiDiscrete(np.array([[3, 5], [2, 2]])), + MultiDiscrete(np.array([[3, 5], [2, 1]])), MultiBinary([2, 4]), ], ) @@ -211,6 +199,7 @@ def test_sample(space: Space, n_trials: int = 1_000): samples = np.array([space.sample() for _ in range(n_trials)]) assert len(samples) == n_trials + # todo add Box space test if isinstance(space, Discrete): expected_frequency = np.ones(space.n) * n_trials / space.n observed_frequency = np.zeros(space.n) @@ -230,6 +219,7 @@ def test_sample(space: Space, n_trials: int = 1_000): observed_frequency = np.sum(samples, axis=0) assert observed_frequency.shape == space.shape + # As this is a binary space, then we can be lazy in the variance as the np.square is symmetric for the 0 and 1 categories variance = ( 2 * np.square(observed_frequency - expected_frequency) / expected_frequency ) @@ -239,7 +229,6 @@ def test_sample(space: Space, n_trials: int = 1_000): # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes def _generate_frequency(dim, func): if isinstance(dim, np.ndarray): - print(dim) return np.array( [_generate_frequency(sub_dim, func) for sub_dim in dim], dtype=object, @@ -268,6 +257,7 @@ def _chi_squared_test(dim, exp_freq, obs_freq): else: assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,) assert np.sum(obs_freq) == n_trials + assert np.sum(exp_freq) == n_trials _variance = np.sum(np.square(exp_freq - obs_freq) / exp_freq) _degrees_of_freedom = dim - 1 assert _variance < CHI_SQUARED[_degrees_of_freedom] @@ -281,16 +271,46 @@ def _chi_squared_test(dim, exp_freq, obs_freq): (Discrete(5), np.array([0, 1, 1, 0, 1], dtype=np.int8)), (Discrete(4, start=-20), np.array([1, 1, 0, 1], dtype=np.int8)), (Discrete(4, start=1), np.array([0, 0, 0, 0], dtype=np.int8)), - (MultiDiscrete([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)), - # (MultiDiscrete(np.array([[3, 2], [2, 2]])), np.array([[[0, 1], [1, 1], [0, 0]], [[0, 1], [1, 1]]], dtype=np.int8)), Unsupported currently - (MultiBinary([2, 4]), np.array([[1, 1, 0, 0], [0, 0, 0, 0]], dtype=np.int8)), + (MultiBinary([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)), + # todo MultiDiscrete spaces ], ) def test_space_sample_mask(space, mask, n_trials: int = 100): """Test the space sample with mask works, todo, add chi-squared testing for the distribution""" - space.seed(0) + space.seed(1) samples = np.array([space.sample(mask) for _ in range(n_trials)]) - assert len(samples) == n_trials + + if isinstance(space, Discrete): + if np.any(mask == 1): + expected_frequency = np.ones(space.n) * (n_trials / np.sum(mask)) * mask + else: + expected_frequency = np.zeros(space.n) + expected_frequency[0] = n_trials + observed_frequency = np.zeros(space.n) + for sample in samples: + observed_frequency[sample - space.start] += 1 + degrees_of_freedom = max(np.sum(mask) - 1, 0) + + assert observed_frequency.shape == expected_frequency.shape + assert np.sum(observed_frequency) == n_trials + assert np.sum(expected_frequency) == n_trials + variance = np.sum( + np.square(expected_frequency - observed_frequency) + / np.clip(expected_frequency, 1, None) + ) + assert variance < CHI_SQUARED[degrees_of_freedom] + elif isinstance(space, MultiBinary): + expected_frequency = np.ones(space.shape) * mask * (n_trials / 2) + observed_frequency = np.sum(samples, axis=0) + assert space.shape == expected_frequency.shape == observed_frequency.shape + + variance = ( + 2 + * np.square(observed_frequency - expected_frequency) + / np.clip(expected_frequency, 1, None) + ) + assert variance.shape == space.shape + assert np.all(variance < CHI_SQUARED[1]) @pytest.mark.parametrize( From 5e699e14aeecea53524cd249d68414d9f9f34f0b Mon Sep 17 00:00:00 2001 From: StringTheory Date: Tue, 21 Jun 2022 15:15:17 +0100 Subject: [PATCH 14/22] Add MultiDiscrete sampling and tests --- gym/spaces/multi_discrete.py | 48 ++++++++++++++++++-- tests/spaces/test_spaces.py | 88 ++++++++++++++++++++++++++++++++++-- 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index d9754a5e740..27336fe22f5 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -8,6 +8,8 @@ from gym.spaces.space import Space from gym.utils import seeding +SAMPLE_MASK_TYPE = Tuple[Union["SAMPLE_MASK_TYPE", np.ndarray]] + class MultiDiscrete(Space[np.ndarray]): """This represents the cartesian product of arbitrary :class:`Discrete` spaces. @@ -62,18 +64,54 @@ def shape(self) -> Tuple[int, ...]: """Has stricter type than :class:`gym.Space` - never None.""" return self._shape # type: ignore - def sample(self, mask: Optional[Tuple[np.ndarray, ...]] = None) -> np.ndarray: + def sample(self, mask: Optional[SAMPLE_MASK_TYPE] = None) -> np.ndarray: """Generates a single random sample this space. Args: - mask: An optional mask for multi-discrete, expected shape is `space.nvec` however for multi-axis nvec then - we expect np.ndarray dtype=object. If there are no possible actions, defaults to 0 + mask: An optional mask for multi-discrete, expects tuples with a `np.ndarray` mask in the position of each + action with shape `(n,)` where `n` is the number of actions and `dtype=np.int8`. + If there are no possible actions, the default action is 0 Returns: - An np.ndarray of shape `space.shape` + An `np.ndarray` of shape `space.shape` """ if mask is not None: - pass + + def _apply_mask( + sub_mask: SAMPLE_MASK_TYPE, sub_nvec: np.ndarray + ) -> Union[int, List[int]]: + if isinstance(sub_mask, np.ndarray): + assert np.isscalar( + sub_nvec + ), f"Expects the mask to be for an action, actual for {sub_nvec}" + assert ( + len(sub_mask) == sub_nvec + ), f"Expects the mask length to be equal to the number of actions, mask length: {len(sub_mask)}, nvec length: {sub_nvec}" + assert ( + sub_mask.dtype == np.int8 + ), f"Expects the mask dtype to be np.int8, actual dtype: {sub_mask.dtype}" + valid_action_mask = sub_mask == 1 + assert np.all( + np.logical_or(sub_mask == 0, valid_action_mask) + ), f"Expects all masks values to 0 or 1, actual values: {sub_mask}" + + if np.any(valid_action_mask): + return self.np_random.choice(np.where(valid_action_mask)[0]) + else: + return 0 + else: + assert isinstance( + sub_mask, tuple + ), f"Expects the mask to be a tuple or np.ndarray, actual type: {type(sub_mask)}" + assert len(sub_mask) == len( + sub_nvec + ), f"Expects the mask length to be equal to the number of actions, mask length: {len(sub_mask)}, nvec length: {len(sub_nvec)}" + return [ + _apply_mask(new_mask, new_nvec) + for new_mask, new_nvec in zip(sub_mask, sub_nvec) + ] + + return np.array(_apply_mask(mask, self.nvec), dtype=self.dtype) return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype) diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index 7d44134e0c5..0aa219a7ea8 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -2,6 +2,7 @@ import json # note: ujson fails this test due to float equality import pickle import tempfile +from typing import List, Union import numpy as np import pytest @@ -272,11 +273,31 @@ def _chi_squared_test(dim, exp_freq, obs_freq): (Discrete(4, start=-20), np.array([1, 1, 0, 1], dtype=np.int8)), (Discrete(4, start=1), np.array([0, 0, 0, 0], dtype=np.int8)), (MultiBinary([3, 2]), np.array([[0, 1], [1, 1], [0, 0]], dtype=np.int8)), - # todo MultiDiscrete spaces + ( + MultiDiscrete([5, 3]), + ( + np.array([0, 1, 1, 0, 1], dtype=np.int8), + np.array([0, 1, 1], dtype=np.int8), + ), + ), + ( + MultiDiscrete(np.array([4, 2])), + (np.array([0, 0, 0, 0], dtype=np.int8), np.array([1, 1], dtype=np.int8)), + ), + ( + MultiDiscrete(np.array([[2, 2], [4, 3]])), + ( + (np.array([0, 1], dtype=np.int8), np.array([1, 1], dtype=np.int8)), + ( + np.array([0, 1, 1, 0], dtype=np.int8), + np.array([1, 0, 0], dtype=np.int8), + ), + ), + ), ], ) def test_space_sample_mask(space, mask, n_trials: int = 100): - """Test the space sample with mask works, todo, add chi-squared testing for the distribution""" + """Test the space sample with mask works using the pearson chi-squared test.""" space.seed(1) samples = np.array([space.sample(mask) for _ in range(n_trials)]) @@ -311,6 +332,64 @@ def test_space_sample_mask(space, mask, n_trials: int = 100): ) assert variance.shape == space.shape assert np.all(variance < CHI_SQUARED[1]) + elif isinstance(space, MultiDiscrete): + # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes + def _generate_frequency( + _dim: Union[np.ndarray, int], _mask, func: callable + ) -> List: + if isinstance(_dim, np.ndarray): + return [ + _generate_frequency(sub_dim, sub_mask, func) + for sub_dim, sub_mask in zip(_dim, _mask) + ] + else: + return func(_dim, _mask) + + def _update_observed_frequency(obs_sample, obs_freq): + if isinstance(obs_sample, np.ndarray): + for sub_sample, sub_freq in zip(obs_sample, obs_freq): + _update_observed_frequency(sub_sample, sub_freq) + else: + obs_freq[obs_sample] += 1 + + def _exp_freq_fn(_dim: int, _mask: np.ndarray): + if np.any(_mask == 1): + print(f"{_dim=}, {_mask=}") + assert _dim == len(_mask) + return np.ones(_dim) * (n_trials / np.sum(_mask)) * _mask + else: + freq = np.zeros(_dim) + freq[0] = n_trials + return freq + + expected_frequency = _generate_frequency( + space.nvec, mask, lambda dim, _mask: _exp_freq_fn(dim, _mask) + ) + observed_frequency = _generate_frequency( + space.nvec, mask, lambda dim, _: np.zeros(dim) + ) + for sample in samples: + _update_observed_frequency(sample, observed_frequency) + + def _chi_squared_test(dim, _mask, exp_freq, obs_freq): + if isinstance(dim, np.ndarray): + for sub_dim, sub_mask, sub_exp_freq, sub_obs_freq in zip( + dim, _mask, exp_freq, obs_freq + ): + _chi_squared_test(sub_dim, sub_mask, sub_exp_freq, sub_obs_freq) + else: + assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,) + assert np.sum(obs_freq) == n_trials + assert np.sum(exp_freq) == n_trials + _variance = np.sum( + np.square(exp_freq - obs_freq) / np.clip(exp_freq, 1, None) + ) + _degrees_of_freedom = max(np.sum(_mask) - 1, 0) + assert _variance < CHI_SQUARED[_degrees_of_freedom] + + _chi_squared_test(space.nvec, mask, expected_frequency, observed_frequency) + else: + raise NotImplementedError() @pytest.mark.parametrize( @@ -320,7 +399,10 @@ def test_space_sample_mask(space, mask, n_trials: int = 100): Dict(a=Discrete(2), b=MultiDiscrete([2, 4])), { "a": np.array([0, 1], dtype=np.int8), - "b": np.array([[0, 0, 0, 0], [1, 1, 1, 0]], dtype=np.int8), + "b": ( + np.array([0, 1], dtype=np.int8), + np.array([1, 1, 0, 0], dtype=np.int8), + ), }, ), ( From 634da1218c9e34131561264379948483d3a33eb2 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Tue, 21 Jun 2022 17:36:26 +0100 Subject: [PATCH 15/22] Remove sample mask from graph --- gym/spaces/graph.py | 25 +++++++++++-------------- tests/spaces/test_spaces.py | 12 +++++++++++- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index 4705633ddb9..5c6cf6d96b6 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -1,6 +1,6 @@ """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space.""" from collections import namedtuple -from typing import NamedTuple, Optional, Sequence, Tuple, Union +from typing import NamedTuple, Optional, Sequence, Union import numpy as np @@ -92,39 +92,36 @@ def _generate_sample_space( f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error." ) - def sample( - self, mask: Optional[Tuple[Optional[np.ndarray], Optional[np.ndarray]]] = None - ) -> NamedTuple: + def sample(self, mask: None = None) -> NamedTuple: """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. Args: - mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces). - The expected shape for the node mask is ``node_space.n`` and edge mask is ``edge_space.n``. + mask: As the number of nodes to determined during sample, it is not possible to know the mask beforehand. Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. """ - node_mask, edge_mask = mask if mask is not None else (None, None) + if mask is not None: + raise NotImplementedError( + "Graph.sample(mask) is not implemented as the number of nodes is determined within the function." + ) + num_nodes = self.np_random.integers(low=1, high=10) # we only have edges when we have at least 2 nodes num_edges = 0 if num_nodes > 1: - # maximal number of edges is (n*n) allowing self connections and two way is allowed + # maximal number of edges is (n*n) allowing self connections and two-way is allowed num_edges = self.np_random.integers(num_nodes * num_nodes) node_sample_space = self._generate_sample_space(self.node_space, num_nodes) edge_sample_space = self._generate_sample_space(self.edge_space, num_edges) sampled_nodes = ( - node_sample_space.sample(node_mask) - if node_sample_space is not None - else None + node_sample_space.sample() if node_sample_space is not None else None ) sampled_edges = ( - edge_sample_space.sample(edge_mask) - if edge_sample_space is not None - else None + edge_sample_space.sample() if edge_sample_space is not None else None ) sampled_edge_links = None diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index 0aa219a7ea8..25d26e37aed 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -354,7 +354,6 @@ def _update_observed_frequency(obs_sample, obs_freq): def _exp_freq_fn(_dim: int, _mask: np.ndarray): if np.any(_mask == 1): - print(f"{_dim=}, {_mask=}") assert _dim == len(_mask) return np.ones(_dim) * (n_trials / np.sum(_mask)) * _mask else: @@ -420,6 +419,17 @@ def _chi_squared_test(dim, _mask, exp_freq, obs_freq): "b": np.array([0, 1, 1], dtype=np.int8), }, ), + (Graph(node_space=Discrete(5), edge_space=Discrete(3)), None), + ( + Graph(node_space=Discrete(3), edge_space=Box(low=0, high=1, shape=(5,))), + None, + ), + ( + Graph( + node_space=Box(low=-100, high=100, shape=(3,)), edge_space=Discrete(3) + ), + None, + ), ], ) def test_composite_space_sample_mask(space, mask): From f85055c1648b0eed3afa67fd5a42648ae730e199 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 23 Jun 2022 18:57:06 +0100 Subject: [PATCH 16/22] Update gym/spaces/multi_discrete.py Co-authored-by: Markus Krimmel --- gym/spaces/multi_discrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index 27336fe22f5..2e7bc09a0b1 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -25,7 +25,7 @@ class MultiDiscrete(Space[np.ndarray]): 2. Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 3. Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 - It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample is array([3, 1, 0]) + It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample might be ``array([3, 1, 0])``. Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes if ``nvec`` has several axes: From 4a4b166fd05bf7794f23d73a0f1953253b745989 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Thu, 23 Jun 2022 19:48:13 +0100 Subject: [PATCH 17/22] Updates based on Marcus28 and jjshoots for Graph.py --- gym/envs/toy_text/taxi.py | 12 +++--- gym/spaces/dict.py | 3 +- gym/spaces/discrete.py | 5 ++- gym/spaces/graph.py | 73 +++++++++++++++++++++++------------- gym/spaces/multi_discrete.py | 2 +- 5 files changed, 60 insertions(+), 35 deletions(-) diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index ba578b3846e..7cead13a468 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -89,13 +89,15 @@ class TaxiEnv(Env): ### Info - ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask". + ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask" containing + the probability that the state is taken and a mask of what actions will result in a change of state to speed up training. - As Taxi is a stochastic environment for transitions then the "p" key represents the probability of the - transition. However, this value is permanently 1.0 for an unknown reason. + As Taxi's initial state is a stochastic, the "p" key represents the probability of the + transition however this value is currently bugged being 1.0, this will be fixed soon. + As the steps are deterministic, "p" represents the probability of the transition which is always 1.0 - For some cases, taking these actions will have no effect on the state of the agent. - In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying + For some cases, taking an action will have no effect on the state of the agent. + In v0.25.0, ``info["action_mask"]`` contains a np.ndarray for each of the action specifying if the action will change the state. To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])`` diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py index ec668e44347..a830ac8819f 100644 --- a/gym/spaces/dict.py +++ b/gym/spaces/dict.py @@ -1,6 +1,7 @@ """Implementation of a space that represents the cartesian product of other spaces as a dictionary.""" from collections import OrderedDict from collections.abc import Mapping, Sequence +from typing import Any from typing import Dict as TypingDict from typing import Optional, Union @@ -137,7 +138,7 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list: return seeds - def sample(self, mask: Optional[TypingDict[str, np.ndarray]] = None) -> dict: + def sample(self, mask: Optional[TypingDict[str, Any]] = None) -> dict: """Generates a single random sample from this space. The sample is an ordered dictionary of independent samples from the constituent spaces. diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py index da05979b02a..7f8f17dfa17 100644 --- a/gym/spaces/discrete.py +++ b/gym/spaces/discrete.py @@ -46,8 +46,9 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int: A sample will be chosen uniformly at random with the mask if provided Args: - mask: An optional mask for if an action can be selected. Expected shape is (n,). - If there are no possible actions, will default to `space.start`. + mask: An optional mask for if an action can be selected. + Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions. + If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned. Returns: A sampled integer from the space diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index 5c6cf6d96b6..84523045cff 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -1,12 +1,12 @@ """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space.""" from collections import namedtuple -from typing import NamedTuple, Optional, Sequence, Union +from typing import NamedTuple, Optional, Sequence, Tuple, Union import numpy as np from gym.spaces.box import Box from gym.spaces.discrete import Discrete -from gym.spaces.multi_discrete import MultiDiscrete +from gym.spaces.multi_discrete import SAMPLE_MASK_TYPE, MultiDiscrete from gym.spaces.space import Space from gym.utils import seeding @@ -70,58 +70,79 @@ def __init__( def _generate_sample_space( self, base_space: Union[None, Box, Discrete], num: int - ) -> Optional[Union[Box, Discrete]]: - # the possibility of this space , got {type(base_space)}aving nothing - if num == 0: + ) -> Optional[Union[Box, MultiDiscrete]]: + if num == 0 or base_space is None: return None if isinstance(base_space, Box): return Box( low=np.array(max(1, num) * [base_space.low]), high=np.array(max(1, num) * [base_space.high]), - shape=(num, *base_space.shape), + shape=(num,) + base_space.shape, dtype=base_space.dtype, - seed=self._np_random, + seed=self.np_random, ) elif isinstance(base_space, Discrete): - return MultiDiscrete(nvec=[base_space.n] * num, seed=self._np_random) - elif base_space is None: - return None + return MultiDiscrete(nvec=[base_space.n] * num, seed=self.np_random) else: raise AssertionError( - f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error." + f"Expects base space to be Box and Discrete, actual space: {type(base_space)}." ) - def sample(self, mask: None = None) -> NamedTuple: + def sample( + self, + num_nodes: int, + num_edges: Optional[int] = None, + mask: Optional[ + Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]] + ] = None, + ) -> NamedTuple: """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. Args: - mask: As the number of nodes to determined during sample, it is not possible to know the mask beforehand. + num_nodes: The number of nodes that will be sampled + num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2 + mask: An optional tuple of optional node and edge mask that is only possible with Discrete spaces + (Box spaces don't support sample masks). + If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. """ - if mask is not None: - raise NotImplementedError( - "Graph.sample(mask) is not implemented as the number of nodes is determined within the function." - ) + assert ( + num_nodes > 0 + ), f"The number of nodes is expected to be greater than 0, actual value: {num_nodes}" - num_nodes = self.np_random.integers(low=1, high=10) + if mask is not None: + node_space_mask, edge_space_mask = mask + else: + node_space_mask, edge_space_mask = None, None # we only have edges when we have at least 2 nodes - num_edges = 0 - if num_nodes > 1: - # maximal number of edges is (n*n) allowing self connections and two-way is allowed - num_edges = self.np_random.integers(num_nodes * num_nodes) + if num_edges is None: + if num_nodes > 1: + # maximal number of edges is (n*n) allowing self connections and two-way is allowed + num_edges = self.np_random.integers(num_nodes * num_nodes) + else: + num_edges = 0 + edge_space_mask = tuple(edge_space_mask for _ in range(num_edges)) + else: + assert ( + num_edges >= 0 + ), f"The number of edges is expected to be greater than 0, actual mask: {num_edges}" - node_sample_space = self._generate_sample_space(self.node_space, num_nodes) - edge_sample_space = self._generate_sample_space(self.edge_space, num_edges) + sampled_node_space = self._generate_sample_space(self.node_space, num_nodes) + sampled_edge_space = self._generate_sample_space(self.edge_space, num_edges) sampled_nodes = ( - node_sample_space.sample() if node_sample_space is not None else None + sampled_node_space.sample(node_space_mask) + if sampled_node_space is not None + else None ) sampled_edges = ( - edge_sample_space.sample() if edge_sample_space is not None else None + sampled_edge_space.sample(edge_space_mask) + if sampled_edge_space is not None + else None ) sampled_edge_links = None diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index 2e7bc09a0b1..955c5fa8f9d 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -70,7 +70,7 @@ def sample(self, mask: Optional[SAMPLE_MASK_TYPE] = None) -> np.ndarray: Args: mask: An optional mask for multi-discrete, expects tuples with a `np.ndarray` mask in the position of each action with shape `(n,)` where `n` is the number of actions and `dtype=np.int8`. - If there are no possible actions, the default action is 0 + Only mask values == 1 are possible to sample unless all mask values for an action are 0 then the default action 0 is sampled. Returns: An `np.ndarray` of shape `space.shape` From eb63c6255ce8e171f86c422bf8d4a6eedc0965d2 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Thu, 23 Jun 2022 19:54:30 +0100 Subject: [PATCH 18/22] Updates based on Marcus28 and jjshoots for Graph.py --- gym/spaces/graph.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index 84523045cff..89a922c162a 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -91,20 +91,20 @@ def _generate_sample_space( def sample( self, - num_nodes: int, - num_edges: Optional[int] = None, mask: Optional[ Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]] ] = None, + num_nodes: int = 10, + num_edges: Optional[int] = None, ) -> NamedTuple: """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. Args: - num_nodes: The number of nodes that will be sampled - num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2 mask: An optional tuple of optional node and edge mask that is only possible with Discrete spaces (Box spaces don't support sample masks). If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges + num_nodes: The number of nodes that will be sampled, the default is 10 nodes + num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2 Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. @@ -125,7 +125,8 @@ def sample( num_edges = self.np_random.integers(num_nodes * num_nodes) else: num_edges = 0 - edge_space_mask = tuple(edge_space_mask for _ in range(num_edges)) + if edge_space_mask is not None: + edge_space_mask = tuple(edge_space_mask for _ in range(num_edges)) else: assert ( num_edges >= 0 From 89189146e5d9f1a8b9bf4a26e5d59d841edc3c2d Mon Sep 17 00:00:00 2001 From: StringTheory Date: Fri, 24 Jun 2022 18:39:07 +0100 Subject: [PATCH 19/22] jjshoot review --- gym/spaces/graph.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index 89a922c162a..9cde9ed30a5 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -121,7 +121,7 @@ def sample( # we only have edges when we have at least 2 nodes if num_edges is None: if num_nodes > 1: - # maximal number of edges is (n*n) allowing self connections and two-way is allowed + # maximal number of edges is n*(n-1) allowing self connections and two-way is allowed num_edges = self.np_random.integers(num_nodes * num_nodes) else: num_edges = 0 @@ -135,11 +135,7 @@ def sample( sampled_node_space = self._generate_sample_space(self.node_space, num_nodes) sampled_edge_space = self._generate_sample_space(self.edge_space, num_edges) - sampled_nodes = ( - sampled_node_space.sample(node_space_mask) - if sampled_node_space is not None - else None - ) + sampled_nodes = sampled_node_space.sample(node_space_mask) sampled_edges = ( sampled_edge_space.sample(edge_space_mask) if sampled_edge_space is not None From a53f0e740f01291f4c254d46133fa6124e6a2917 Mon Sep 17 00:00:00 2001 From: StringTheory Date: Sat, 25 Jun 2022 17:43:19 +0100 Subject: [PATCH 20/22] jjshoot review --- gym/spaces/graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index 9cde9ed30a5..a721d55003e 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -121,8 +121,8 @@ def sample( # we only have edges when we have at least 2 nodes if num_edges is None: if num_nodes > 1: - # maximal number of edges is n*(n-1) allowing self connections and two-way is allowed - num_edges = self.np_random.integers(num_nodes * num_nodes) + # maximal number of edges is `n*(n-1)` allowing self connections and two-way is allowed + num_edges = self.np_random.integers(num_nodes * (num_nodes - 1)) else: num_edges = 0 if edge_space_mask is not None: From 8e71e467c38f6502dc76e8a919d4ba1880f3eaae Mon Sep 17 00:00:00 2001 From: StringTheory Date: Sat, 25 Jun 2022 22:45:41 +0100 Subject: [PATCH 21/22] Update assert check --- gym/spaces/multi_discrete.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index 955c5fa8f9d..f63d9c43a57 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -81,8 +81,8 @@ def _apply_mask( sub_mask: SAMPLE_MASK_TYPE, sub_nvec: np.ndarray ) -> Union[int, List[int]]: if isinstance(sub_mask, np.ndarray): - assert np.isscalar( - sub_nvec + assert np.issubdtype( + type(sub_nvec, np.integer) ), f"Expects the mask to be for an action, actual for {sub_nvec}" assert ( len(sub_mask) == sub_nvec @@ -90,6 +90,7 @@ def _apply_mask( assert ( sub_mask.dtype == np.int8 ), f"Expects the mask dtype to be np.int8, actual dtype: {sub_mask.dtype}" + valid_action_mask = sub_mask == 1 assert np.all( np.logical_or(sub_mask == 0, valid_action_mask) From 875ab441161ee654cdddc47d4bf323d11738035c Mon Sep 17 00:00:00 2001 From: StringTheory Date: Sat, 25 Jun 2022 23:33:07 +0100 Subject: [PATCH 22/22] Update type hints --- gym/spaces/graph.py | 5 ++++- gym/spaces/multi_discrete.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py index a721d55003e..a2ef1cf0d60 100644 --- a/gym/spaces/graph.py +++ b/gym/spaces/graph.py @@ -92,7 +92,10 @@ def _generate_sample_space( def sample( self, mask: Optional[ - Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]] + Tuple[ + Optional[Union[np.ndarray, SAMPLE_MASK_TYPE]], + Optional[Union[np.ndarray, SAMPLE_MASK_TYPE]], + ] ] = None, num_nodes: int = 10, num_edges: Optional[int] = None, diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py index f63d9c43a57..71111d4c9dd 100644 --- a/gym/spaces/multi_discrete.py +++ b/gym/spaces/multi_discrete.py @@ -8,7 +8,7 @@ from gym.spaces.space import Space from gym.utils import seeding -SAMPLE_MASK_TYPE = Tuple[Union["SAMPLE_MASK_TYPE", np.ndarray]] +SAMPLE_MASK_TYPE = Tuple[Union["SAMPLE_MASK_TYPE", np.ndarray], ...] class MultiDiscrete(Space[np.ndarray]): @@ -82,7 +82,7 @@ def _apply_mask( ) -> Union[int, List[int]]: if isinstance(sub_mask, np.ndarray): assert np.issubdtype( - type(sub_nvec, np.integer) + type(sub_nvec), np.integer ), f"Expects the mask to be for an action, actual for {sub_nvec}" assert ( len(sub_mask) == sub_nvec