From 80008b873c745aea326dcd97d67ba1390b228042 Mon Sep 17 00:00:00 2001 From: Antony Kellermann Date: Fri, 25 Dec 2020 19:06:00 -0500 Subject: [PATCH] Minesweeper Env (#2) ### Changes * Implements minesweeper gym env * Adds unit tests * Changes to using `virtualenv` over `pipenv` due to [locking taking an eternity](https://github.com/pypa/pipenv/issues/3827) --- .circleci/config.yml | 25 +-- .gitignore | 3 +- Pipfile | 19 -- gym_minesweeper/__init__.py | 5 + gym_minesweeper/minesweeper.py | 205 ++++++++++++++++++++++ gym_minesweeper/tests/dummy_test.py | 6 - gym_minesweeper/tests/minesweeper_test.py | 167 ++++++++++++++++++ scripts/format.sh | 4 +- scripts/lint.sh | 2 +- scripts/python_format.sh | 9 - scripts/test.sh | 2 +- setup.cfg | 13 ++ setup.py | 4 + 13 files changed, 415 insertions(+), 49 deletions(-) delete mode 100644 Pipfile create mode 100644 gym_minesweeper/minesweeper.py delete mode 100644 gym_minesweeper/tests/dummy_test.py create mode 100644 gym_minesweeper/tests/minesweeper_test.py delete mode 100755 scripts/python_format.sh create mode 100644 setup.py diff --git a/.circleci/config.yml b/.circleci/config.yml index c5d2ddf..bb0d156 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,26 +3,31 @@ version: 2 jobs: build: docker: - - image: alpine - environment: - LANG: en_us.UTF-8 - PIPENV_VENV_IN_PROJECT: true - PIPENV_DEV: true + - image: archlinux:base-devel steps: - checkout - run: name: Install Required Tools + + # 1. Python deps + # 2. python-pillow does not provide wheel, so must be built with these deps + # 3. Formatting/linting command: | - sed -i -e 's/v[[:digit:]]\..*\//edge\//g' /etc/apk/repositories - echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories - apk add --no-cache bash py3-pip shellcheck shfmt - pip install pipenv + pacman -Sy --noconfirm \ + python python-pip python-virtualenv \ + lcms2 libtiff openjpeg2 libimagequant libxcb \ + shellcheck shfmt - run: name: Set Up Virtualenv - command: pipenv install + command: | + python -m venv venv + echo "source venv/bin/activate" >> $BASH_ENV + source venv/bin/activate + pip install --upgrade pip wheel + pip install -e .[dev] - run: name: Format diff --git a/.gitignore b/.gitignore index eccf426..9401b57 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /.idea/ -/Pipfile.lock __pycache__ /test_results/ +/venv/ +*.egg* diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 8174224..0000000 --- a/Pipfile +++ /dev/null @@ -1,19 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] - -[dev-packages] -pytest = "*" -yapf = "*" -pylint = "*" - -[requires] -python_version = "3.8" - -[scripts] -format = "./scripts/python_format.sh" -lint = "pylint --rcfile=setup.cfg gym_minesweeper" -test = "pytest --junitxml=test_results/gym_minesweeper/report.xml gym_minesweeper/tests" diff --git a/gym_minesweeper/__init__.py b/gym_minesweeper/__init__.py index e69de29..c7adda1 100644 --- a/gym_minesweeper/__init__.py +++ b/gym_minesweeper/__init__.py @@ -0,0 +1,5 @@ +"""OpenAI gym environment for minesweeper.""" + +__all__ = ['MinesweeperEnv', 'SPACE_MINE', 'SPACE_UNKNOWN', 'REWARD_WIN', 'REWARD_LOSE', 'REWARD_CLEAR'] + +from gym_minesweeper.minesweeper import MinesweeperEnv, SPACE_MINE, SPACE_UNKNOWN, REWARD_WIN, REWARD_LOSE, REWARD_CLEAR diff --git a/gym_minesweeper/minesweeper.py b/gym_minesweeper/minesweeper.py new file mode 100644 index 0000000..7c724e0 --- /dev/null +++ b/gym_minesweeper/minesweeper.py @@ -0,0 +1,205 @@ +"""OpenAI gym environment for minesweeper.""" + +import sys +from io import StringIO + +import gym +import numpy as np +from gym import spaces +from gym.utils import seeding + +DEFAULT_BOARD_SIZE = (16, 30) +DEFAULT_NUM_MINES = 99 + +SPACE_MINE = -2 +SPACE_UNKNOWN = -1 +SPACE_MAX = 8 + +REWARD_WIN = 1000 +REWARD_LOSE = -100 +REWARD_CLEAR = 5 + + +# Based on https://github.com/genyrosk/gym-chess/blob/master/gym_chess/envs/chess.py +# pylint: disable=R0902 +class MinesweeperEnv(gym.Env): + """Minesweeper gym environment.""" + + metadata = {"render.modes": ["ansi", "human"]} + + def __init__(self, board_size=DEFAULT_BOARD_SIZE, num_mines=DEFAULT_NUM_MINES): + assert np.prod(board_size) >= num_mines + assert len(board_size) == 2 + self.board_size, self.num_mines = board_size, num_mines + self.hist, self.board, self._board, self._rng = None, None, None, None + + self.observation_space = spaces.Box(SPACE_MINE, SPACE_MAX + 1, board_size, np.int) + self.action_space = spaces.Discrete(np.prod(board_size)) + self.reset() + + def step(self, action): + """Run one timestep of the environment's dynamics. When end of + episode is reached, you are responsible for calling `reset()` + to reset this environment's state. + + Accepts an action and returns a tuple (observation, reward, done, info). + + Args: + action (np.array): [x, y] coordinate pair of space to clear + + Returns: + observation (np.array[np.array]): current board state + reward (float) : amount of reward returned after previous action + done (bool): whether the episode has ended, in which case further step() calls will return undefined results + info (dict): currently contains nothing + """ + + target_x, target_y = tuple(action) + assert self._is_clearable_space(target_x, target_y), "Invalid action: {}".format(action) + + # If first step, populate board + # We do this here so that the first move never triggers a mine to explode + if len(self.hist) == 0: + # Place mines in private board + mines_placed = 0 + while mines_placed < self.num_mines: + mine_indices = list( + zip(* + [self._rng.randint(0, dim_size, self.num_mines - mines_placed) + for dim_size in self.board_size])) + for i in mine_indices: + if self._board[i] == SPACE_UNKNOWN: + # prohibit mines adjacent or equal to target on first step + if i[0] > target_x + 1 or i[0] < target_x - 1 or i[1] > target_y + 1 or i[1] < target_y - 1: + self._board[i] = SPACE_MINE + mines_placed += 1 + + # Calculate nearby mines in private board + for calc_x in range(self.board_size[0]): + for calc_y in range(self.board_size[1]): + if self._board[calc_x, calc_y] == SPACE_UNKNOWN: + self._board[calc_x, calc_y] = self._num_nearby_mines(calc_x, calc_y) + + self._clear_space(target_x, target_y) + + status = self.get_status() + + if status is None: + return self.board, 5, False, dict() + if status: + # if won, no need to reveal mines + return self.board, 1000, True, dict() + # if lost, reveal mines + self.board = self._board + return self.board, -100, True, dict() + + def reset(self): + """Resets the environment to an initial state and returns an initial + observation. + + Note that this function does not reset the environment's random + number generator(s); random variables in the environment's state are + sampled independently between multiple calls to `reset()`. In other + words, each call of `reset()` yields an environment suitable for + a new episode, independent of previous episodes. + + Returns: + observation (np.array[np.array]): current board state (all unknown) + """ + + self.hist = [] + self._board = np.full(self.board_size, SPACE_UNKNOWN, np.int) + self.board = np.array(self._board) + return self.board + + def render(self, mode='human'): + """Renders the environment. + + If mode is: + + - human: render to the current display or terminal and + return nothing. Usually for human consumption. + - ansi: Return a StringIO.StringIO containing a + terminal-style text representation. The text may include newlines + and ANSI escape sequences (e.g. for colors). + + Args: + mode (str): the mode to render with + + Returns: + outfile (StringIO or None): StringIO stream if mode is ansi, otherwise None + """ + + outfile = StringIO() if mode == 'ansi' else sys.stdout if mode == 'human' else super().render(mode) + for i, dim_1 in enumerate(self.board): + for j, dim_2 in enumerate(dim_1): + if dim_2 == SPACE_MINE: + outfile.write('X') + elif dim_2 == SPACE_UNKNOWN: + outfile.write('-') + else: + outfile.write(str(dim_2)) + if j != self.board_size[1] - 1: + outfile.write(' ') + if i != self.board_size[0] - 1: + outfile.write('\n') + if mode == 'ansi': + return outfile + return None + + def seed(self, seed=None): + """Sets the seed for this env's random number generator(s). + + Returns: + list: Returns the list of seeds used in this env's random + number generators. In this case, the length is 1. + """ + + self._rng, seed = seeding.np_random(seed) + return [seed] + + def _is_valid_space(self, target_x, target_y): + return 0 <= target_x < self.board_size[0] and 0 <= target_y < self.board_size[1] + + def _is_clearable_space(self, target_x, target_y): + return self._is_valid_space(target_x, target_y) and self.board[target_x, target_y] == SPACE_UNKNOWN + + def _num_nearby_mines(self, target_x, target_y): + num_mines = 0 + for i in range(target_x - 1, target_x + 2): + for j in range(target_y - 1, target_y + 2): + if (target_x != i or target_y != j) and self._is_valid_space(i, j) and self._board[i, j] == SPACE_MINE: + num_mines += 1 + return num_mines + + def _clear_space(self, target_x, target_y): + spaces_to_clear = {(target_x, target_y)} + spaces_cleared = set() + + update_hist = True + while spaces_to_clear: + current_space = next(iter(spaces_to_clear)) + self.board[current_space[0], current_space[1]] = self._board[current_space[0], current_space[1]] + if update_hist: + self.hist.append(current_space) + update_hist = False + + spaces_to_clear.remove(current_space) + spaces_cleared.add(current_space) + + if self.board[current_space[0], current_space[1]] == 0: + for i in range(current_space[0] - 1, current_space[0] + 2): + for j in range(current_space[1] - 1, current_space[1] + 2): + if self._is_valid_space(i, j) and (i, j) not in spaces_cleared: + spaces_to_clear.add((i, j)) + + def get_status(self): + """Gets the status of the game. + + Returns: + status (bool): True if game won, False if game lost, None if game in progress + """ + + if np.count_nonzero(self.board == SPACE_MINE): + return False + return True if np.count_nonzero(self.board == SPACE_UNKNOWN) == self.num_mines else None diff --git a/gym_minesweeper/tests/dummy_test.py b/gym_minesweeper/tests/dummy_test.py deleted file mode 100644 index 367a7d0..0000000 --- a/gym_minesweeper/tests/dummy_test.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Dummy.""" - - -def test_dummy(): - """Dummy.""" - assert True diff --git a/gym_minesweeper/tests/minesweeper_test.py b/gym_minesweeper/tests/minesweeper_test.py new file mode 100644 index 0000000..2e97f06 --- /dev/null +++ b/gym_minesweeper/tests/minesweeper_test.py @@ -0,0 +1,167 @@ +"""Tests for minesweeper env implementation.""" +from unittest.mock import patch + +import numpy.testing as npt +import pytest + +from gym_minesweeper import MinesweeperEnv, SPACE_UNKNOWN, REWARD_WIN, REWARD_LOSE, REWARD_CLEAR + +TEST_BOARD_SIZE = (4, 5) +TEST_NUM_MINES = 3 +TEST_SEED = 42069 + + +def test_no_mines_init(): + """Asserts that initializing with no mines works properly""" + + size = (30, 50) + ms_game = MinesweeperEnv(size, 0) + assert size == ms_game.board_size + assert ms_game.num_mines == 0 + npt.assert_array_equal([], ms_game.hist) + npt.assert_array_equal([[SPACE_UNKNOWN] * size[1]] * size[0], ms_game.board) + + +def test_no_mines_step(): + """Asserts that taking one step with no mines wins""" + + size = (30, 50) + ms_game = MinesweeperEnv(size, 0) + action = (21, 5) + board, reward, done, info = ms_game.step(action) + + expected_board = [[0] * size[1]] * size[0] + npt.assert_array_equal(ms_game.board, expected_board) + npt.assert_array_equal(ms_game.hist, [action]) + + npt.assert_array_equal(board, expected_board) + assert reward == REWARD_WIN + assert done + assert info == dict() + + +def create_game(): + """Creates a deterministic 4x5 game""" + size = TEST_BOARD_SIZE + ms_game = MinesweeperEnv(size, TEST_NUM_MINES) + ms_game.seed(TEST_SEED) + return ms_game + + +def assert_game(ms_game, actions, expected_boards, expected_rewards, expected_dones): + """Given a full list of game steps, plays through the game and asserts all states are correct.""" + + expected_hist = [] + + def err_msg(idx): + return "idx: {}".format(idx) + + for i, action in enumerate(actions): + board, reward, done, info = ms_game.step(action) + + npt.assert_array_equal(ms_game.board, expected_boards[i], err_msg(i)) + npt.assert_array_equal(board, expected_boards[i], err_msg(i)) + + expected_hist.append(action) + npt.assert_array_equal(ms_game.hist, expected_hist, err_msg(i)) + + assert reward == expected_rewards[i], err_msg(i) + assert done == expected_dones[i], err_msg(i) + assert info == dict(), err_msg(i) + + +def test_win(ms_game=create_game()): + """Asserts that a winning playthrough works.""" + + actions = [(0, 0), (3, 3), (0, 3), (1, 2), (0, 4), (1, 4)] + expected_boards = [ + [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, -1, -1, -1], [-1, -1, -1, -1, -1]], + [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, -1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, 1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, 1], [0, 1, 2, -1, 1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + ] + + expected_rewards = [REWARD_CLEAR] * (len(expected_boards) - 1) + [REWARD_WIN] + expected_dones = [False] * (len(expected_boards) - 1) + [True] + + assert_game(ms_game, actions, expected_boards, expected_rewards, expected_dones) + + +def test_lose(ms_game=create_game()): + """Asserts that a losing playthrough works.""" + + actions = [(0, 0), (3, 3), (0, 3), (1, 2), (0, 4), (0, 2)] + expected_boards = [ + [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, -1, -1, -1], [-1, -1, -1, -1, -1]], + [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, -1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -1, 2, 1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]], + [[0, 1, -2, 2, 1], [0, 1, 2, -2, 1], [1, 1, 1, 1, 1], [-2, 1, 0, 0, 0]], + ] + + expected_rewards = [REWARD_CLEAR] * (len(expected_boards) - 1) + [REWARD_LOSE] + expected_dones = [False] * (len(expected_boards) - 1) + [True] + + assert_game(ms_game, actions, expected_boards, expected_rewards, expected_dones) + + +def test_reset_and_reseed(): + """Tests resetting the game and re-seeding.""" + + size = TEST_BOARD_SIZE + ms_game = create_game() + + test_win(ms_game) + ms_game.reset() + ms_game.seed(TEST_SEED) # need to re-seed so it's deterministic + + test_lose(ms_game) + ms_game.reset() + + assert ms_game.get_status() is None + assert ms_game.hist == [] + npt.assert_array_equal(ms_game.board_size, (4, 5)) + assert ms_game.num_mines == TEST_NUM_MINES + + expected_board = [[SPACE_UNKNOWN] * size[1]] * size[0] + npt.assert_array_equal(ms_game.board, expected_board) + + +def test_render(): + """Tests game rendering""" + + # get losing board + ms_game = create_game() + test_lose(ms_game) + + class WriteSideEffect: + """Mock class for writable classes.""" + out = "" + + def write(self, text): + """Appends text to internal buffer.""" + self.out += str(text) + + def get(self): + """Gets the internal buffer.""" + return self.out + + expected_board = "0 1 X 2 1\n" \ + "0 1 2 X 1\n" \ + "1 1 1 1 1\n" \ + "X 1 0 0 0" + + human_se = WriteSideEffect() + with patch("sys.stdout.write", side_effect=human_se.write): + ms_game.render('human') + assert human_se.get() == expected_board + + string_io = ms_game.render('ansi') + string_io.seek(0) + assert string_io.read() == expected_board + + pytest.raises(NotImplementedError, ms_game.render, 'rgb_array') + pytest.raises(NotImplementedError, ms_game.render, 'other') diff --git a/scripts/format.sh b/scripts/format.sh index 7c5ffb1..b007fa1 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -3,9 +3,9 @@ set -eo pipefail if [ "$#" -eq 0 ]; then - pipenv run format + yapf -i -r gym_minesweeper setup.py 2>/dev/null shfmt -w scripts elif [ "$1" == "check" ]; then - pipenv run format check + yapf -d -r gym_minesweeper setup.py shfmt -l -d scripts fi diff --git a/scripts/lint.sh b/scripts/lint.sh index e2ae396..eba84f4 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -2,5 +2,5 @@ set -eo pipefail -pipenv run lint +pylint --rcfile=setup.cfg gym_minesweeper setup.py shellcheck scripts/hooks/* scripts/*.sh diff --git a/scripts/python_format.sh b/scripts/python_format.sh deleted file mode 100755 index 6cc7294..0000000 --- a/scripts/python_format.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -set -eo pipefail - -if [ "$#" -eq 0 ]; then - yapf -i -r gym_minesweeper 2>/dev/null -elif [ "$1" == "check" ]; then - yapf -d -r gym_minesweeper -fi diff --git a/scripts/test.sh b/scripts/test.sh index 486f9b0..7b537a0 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -2,4 +2,4 @@ set -eo pipefail -pipenv run test +pytest --junitxml=test_results/gym_minesweeper/report.xml gym_minesweeper/tests diff --git a/setup.cfg b/setup.cfg index e4714f4..7c70e04 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,19 @@ platform = any project_urls = Bug Tracker = https://github.com/aokellermann/gym-minesweeper/issues +[options] +python_requires = >= 3.8 +test_suite = tests +install_requires = + gym + numpy + +[options.extras_require] +dev = + yapf + pylint + pytest + [yapf] based_on_style = pep8 column_limit = 120 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..404b76d --- /dev/null +++ b/setup.py @@ -0,0 +1,4 @@ +"""Setup script for gym-minesweeper""" + +from setuptools import setup +setup()