From 80008b873c745aea326dcd97d67ba1390b228042 Mon Sep 17 00:00:00 2001
From: Antony Kellermann <aokellermann@gmail.com>
Date: Fri, 25 Dec 2020 19:06:00 -0500
Subject: [PATCH] Minesweeper Env (#2)

### Changes
* Implements minesweeper gym env
* Adds unit tests
* Changes to using `virtualenv` over `pipenv` due to [locking taking an eternity](https://github.com/pypa/pipenv/issues/3827)
---
 .circleci/config.yml                      |  25 +--
 .gitignore                                |   3 +-
 Pipfile                                   |  19 --
 gym_minesweeper/__init__.py               |   5 +
 gym_minesweeper/minesweeper.py            | 205 ++++++++++++++++++++++
 gym_minesweeper/tests/dummy_test.py       |   6 -
 gym_minesweeper/tests/minesweeper_test.py | 167 ++++++++++++++++++
 scripts/format.sh                         |   4 +-
 scripts/lint.sh                           |   2 +-
 scripts/python_format.sh                  |   9 -
 scripts/test.sh                           |   2 +-
 setup.cfg                                 |  13 ++
 setup.py                                  |   4 +
 13 files changed, 415 insertions(+), 49 deletions(-)
 delete mode 100644 Pipfile
 create mode 100644 gym_minesweeper/minesweeper.py
 delete mode 100644 gym_minesweeper/tests/dummy_test.py
 create mode 100644 gym_minesweeper/tests/minesweeper_test.py
 delete mode 100755 scripts/python_format.sh
 create mode 100644 setup.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c5d2ddf..bb0d156 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,26 +3,31 @@ version: 2
 jobs:
   build:
     docker:
-      - image: alpine
-        environment:
-          LANG: en_us.UTF-8
-          PIPENV_VENV_IN_PROJECT: true
-          PIPENV_DEV: true
+      - image: archlinux:base-devel
 
     steps:
       - checkout
 
       - run:
           name: Install Required Tools
+
+          # 1. Python deps
+          # 2. python-pillow does not provide wheel, so must be built with these deps
+          # 3. Formatting/linting
           command: |
-            sed -i -e 's/v[[:digit:]]\..*\//edge\//g' /etc/apk/repositories
-            echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories
-            apk add --no-cache bash py3-pip shellcheck shfmt
-            pip install pipenv
+            pacman -Sy --noconfirm \
+              python python-pip python-virtualenv \
+              lcms2 libtiff openjpeg2 libimagequant libxcb \
+              shellcheck shfmt
 
       - run:
           name: Set Up Virtualenv
-          command: pipenv install
+          command: |
+            python -m venv venv
+            echo "source venv/bin/activate" >> $BASH_ENV
+            source venv/bin/activate
+            pip install --upgrade pip wheel
+            pip install -e .[dev]
 
       - run:
           name: Format
diff --git a/.gitignore b/.gitignore
index eccf426..9401b57 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /.idea/
-/Pipfile.lock
 __pycache__
 /test_results/
+/venv/
+*.egg*
diff --git a/Pipfile b/Pipfile
deleted file mode 100644
index 8174224..0000000
--- a/Pipfile
+++ /dev/null
@@ -1,19 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-
-[dev-packages]
-pytest = "*"
-yapf = "*"
-pylint = "*"
-
-[requires]
-python_version = "3.8"
-
-[scripts]
-format = "./scripts/python_format.sh"
-lint = "pylint --rcfile=setup.cfg gym_minesweeper"
-test = "pytest --junitxml=test_results/gym_minesweeper/report.xml gym_minesweeper/tests"
diff --git a/gym_minesweeper/__init__.py b/gym_minesweeper/__init__.py
index e69de29..c7adda1 100644
--- a/gym_minesweeper/__init__.py
+++ b/gym_minesweeper/__init__.py
@@ -0,0 +1,5 @@
+"""OpenAI gym environment for minesweeper."""
+
+__all__ = ['MinesweeperEnv', 'SPACE_MINE', 'SPACE_UNKNOWN', 'REWARD_WIN', 'REWARD_LOSE', 'REWARD_CLEAR']
+
+from gym_minesweeper.minesweeper import MinesweeperEnv, SPACE_MINE, SPACE_UNKNOWN, REWARD_WIN, REWARD_LOSE, REWARD_CLEAR
diff --git a/gym_minesweeper/minesweeper.py b/gym_minesweeper/minesweeper.py
new file mode 100644
index 0000000..7c724e0
--- /dev/null
+++ b/gym_minesweeper/minesweeper.py
@@ -0,0 +1,205 @@
+"""OpenAI gym environment for minesweeper."""
+
+import sys
+from io import StringIO
+
+import gym
+import numpy as np
+from gym import spaces
+from gym.utils import seeding
+
+DEFAULT_BOARD_SIZE = (16, 30)
+DEFAULT_NUM_MINES = 99
+
+SPACE_MINE = -2
+SPACE_UNKNOWN = -1
+SPACE_MAX = 8
+
+REWARD_WIN = 1000
+REWARD_LOSE = -100
+REWARD_CLEAR = 5
+
+
+# Based on https://github.com/genyrosk/gym-chess/blob/master/gym_chess/envs/chess.py
+# pylint: disable=R0902
+class MinesweeperEnv(gym.Env):
+    """Minesweeper gym environment."""
+
+    metadata = {"render.modes": ["ansi", "human"]}
+
+    def __init__(self, board_size=DEFAULT_BOARD_SIZE, num_mines=DEFAULT_NUM_MINES):
+        assert np.prod(board_size) >= num_mines
+        assert len(board_size) == 2
+        self.board_size, self.num_mines = board_size, num_mines
+        self.hist, self.board, self._board, self._rng = None, None, None, None
+
+        self.observation_space = spaces.Box(SPACE_MINE, SPACE_MAX + 1, board_size, np.int)
+        self.action_space = spaces.Discrete(np.prod(board_size))
+        self.reset()
+
+    def step(self, action):
+        """Run one timestep of the environment's dynamics. When end of
+        episode is reached, you are responsible for calling `reset()`
+        to reset this environment's state.
+
+        Accepts an action and returns a tuple (observation, reward, done, info).
+
+        Args:
+            action (np.array): [x, y] coordinate pair of space to clear
+
+        Returns:
+            observation (np.array[np.array]): current board state
+            reward (float) : amount of reward returned after previous action
+            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
+            info (dict): currently contains nothing
+        """
+
+        target_x, target_y = tuple(action)
+        assert self._is_clearable_space(target_x, target_y), "Invalid action: {}".format(action)
+
+        # If first step, populate board
+        # We do this here so that the first move never triggers a mine to explode
+        if len(self.hist) == 0:
+            # Place mines in private board
+            mines_placed = 0
+            while mines_placed < self.num_mines:
+                mine_indices = list(
+                    zip(*
+                        [self._rng.randint(0, dim_size, self.num_mines - mines_placed)
+                         for dim_size in self.board_size]))
+                for i in mine_indices:
+                    if self._board[i] == SPACE_UNKNOWN:
+                        # prohibit mines adjacent or equal to target on first step
+                        if i[0] > target_x + 1 or i[0] < target_x - 1 or i[1] > target_y + 1 or i[1] < target_y - 1:
+                            self._board[i] = SPACE_MINE
+                            mines_placed += 1
+
+            # Calculate nearby mines in private board
+            for calc_x in range(self.board_size[0]):
+                for calc_y in range(self.board_size[1]):
+                    if self._board[calc_x, calc_y] == SPACE_UNKNOWN:
+                        self._board[calc_x, calc_y] = self._num_nearby_mines(calc_x, calc_y)
+
+        self._clear_space(target_x, target_y)
+
+        status = self.get_status()
+
+        if status is None:
+            return self.board, 5, False, dict()
+        if status:
+            # if won, no need to reveal mines
+            return self.board, 1000, True, dict()
+        # if lost, reveal mines
+        self.board = self._board
+        return self.board, -100, True, dict()
+
+    def reset(self):
+        """Resets the environment to an initial state and returns an initial
+        observation.
+
+        Note that this function does not reset the environment's random
+        number generator(s); random variables in the environment's state are
+        sampled independently between multiple calls to `reset()`. In other
+        words, each call of `reset()` yields an environment suitable for
+        a new episode, independent of previous episodes.
+
+        Returns:
+            observation (np.array[np.array]): current board state (all unknown)
+        """
+
+        self.hist = []
+        self._board = np.full(self.board_size, SPACE_UNKNOWN, np.int)
+        self.board = np.array(self._board)
+        return self.board
+
+    def render(self, mode='human'):
+        """Renders the environment.
+
+        If mode is:
+
+        - human: render to the current display or terminal and
+          return nothing. Usually for human consumption.
+        - ansi: Return a StringIO.StringIO containing a
+          terminal-style text representation. The text may include newlines
+          and ANSI escape sequences (e.g. for colors).
+
+        Args:
+            mode (str): the mode to render with
+
+        Returns:
+            outfile (StringIO or None): StringIO stream if mode is ansi, otherwise None
+        """
+
+        outfile = StringIO() if mode == 'ansi' else sys.stdout if mode == 'human' else super().render(mode)
+        for i, dim_1 in enumerate(self.board):
+            for j, dim_2 in enumerate(dim_1):
+                if dim_2 == SPACE_MINE:
+                    outfile.write('X')
+                elif dim_2 == SPACE_UNKNOWN:
+                    outfile.write('-')
+                else:
+                    outfile.write(str(dim_2))
+                if j != self.board_size[1] - 1:
+                    outfile.write(' ')
+            if i != self.board_size[0] - 1:
+                outfile.write('\n')
+        if mode == 'ansi':
+            return outfile
+        return None
+
+    def seed(self, seed=None):
+        """Sets the seed for this env's random number generator(s).
+
+        Returns:
+            list<bigint>: Returns the list of seeds used in this env's random
+              number generators. In this case, the length is 1.
+        """
+
+        self._rng, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _is_valid_space(self, target_x, target_y):
+        return 0 <= target_x < self.board_size[0] and 0 <= target_y < self.board_size[1]
+
+    def _is_clearable_space(self, target_x, target_y):
+        return self._is_valid_space(target_x, target_y) and self.board[target_x, target_y] == SPACE_UNKNOWN
+
+    def _num_nearby_mines(self, target_x, target_y):
+        num_mines = 0
+        for i in range(target_x - 1, target_x + 2):
+            for j in range(target_y - 1, target_y + 2):
+                if (target_x != i or target_y != j) and self._is_valid_space(i, j) and self._board[i, j] == SPACE_MINE:
+                    num_mines += 1
+        return num_mines
+
+    def _clear_space(self, target_x, target_y):
+        spaces_to_clear = {(target_x, target_y)}
+        spaces_cleared = set()
+
+        update_hist = True
+        while spaces_to_clear:
+            current_space = next(iter(spaces_to_clear))
+            self.board[current_space[0], current_space[1]] = self._board[current_space[0], current_space[1]]
+            if update_hist:
+                self.hist.append(current_space)
+                update_hist = False
+
+            spaces_to_clear.remove(current_space)
+            spaces_cleared.add(current_space)
+
+            if self.board[current_space[0], current_space[1]] == 0:
+                for i in range(current_space[0] - 1, current_space[0] + 2):
+                    for j in range(current_space[1] - 1, current_space[1] + 2):
+                        if self._is_valid_space(i, j) and (i, j) not in spaces_cleared:
+                            spaces_to_clear.add((i, j))
+
+    def get_status(self):
+        """Gets the status of the game.
+
+        Returns:
+            status (bool): True if game won, False if game lost, None if game in progress
+        """
+
+        if np.count_nonzero(self.board == SPACE_MINE):
+            return False
+        return True if np.count_nonzero(self.board == SPACE_UNKNOWN) == self.num_mines else None
diff --git a/gym_minesweeper/tests/dummy_test.py b/gym_minesweeper/tests/dummy_test.py
deleted file mode 100644
index 367a7d0..0000000
--- a/gym_minesweeper/tests/dummy_test.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Dummy."""
-
-
-def test_dummy():
-    """Dummy."""
-    assert True
diff --git a/gym_minesweeper/tests/minesweeper_test.py b/gym_minesweeper/tests/minesweeper_test.py
new file mode 100644
index 0000000..2e97f06
--- /dev/null
+++ b/gym_minesweeper/tests/minesweeper_test.py
@@ -0,0 +1,167 @@
+"""Tests for minesweeper env implementation."""
+from unittest.mock import patch
+
+import numpy.testing as npt
+import pytest
+
+from gym_minesweeper import MinesweeperEnv, SPACE_UNKNOWN, REWARD_WIN, REWARD_LOSE, REWARD_CLEAR
+
+TEST_BOARD_SIZE = (4, 5)
+TEST_NUM_MINES = 3
+TEST_SEED = 42069
+
+
+def test_no_mines_init():
+    """Asserts that initializing with no mines works properly"""
+
+    size = (30, 50)
+    ms_game = MinesweeperEnv(size, 0)
+    assert size == ms_game.board_size
+    assert ms_game.num_mines == 0
+    npt.assert_array_equal([], ms_game.hist)
+    npt.assert_array_equal([[SPACE_UNKNOWN] * size[1]] * size[0], ms_game.board)
+
+
+def test_no_mines_step():
+    """Asserts that taking one step with no mines wins"""
+
+    size = (30, 50)
+    ms_game = MinesweeperEnv(size, 0)
+    action = (21, 5)
+    board, reward, done, info = ms_game.step(action)
+
+    expected_board = [[0] * size[1]] * size[0]
+    npt.assert_array_equal(ms_game.board, expected_board)
+    npt.assert_array_equal(ms_game.hist, [action])
+
+    npt.assert_array_equal(board, expected_board)
+    assert reward == REWARD_WIN
+    assert done
+    assert info == dict()
+
+
+def create_game():
+    """Creates a deterministic 4x5 game"""
+    size = TEST_BOARD_SIZE
+    ms_game = MinesweeperEnv(size, TEST_NUM_MINES)
+    ms_game.seed(TEST_SEED)
+    return ms_game
+
+
+def assert_game(ms_game, actions, expected_boards, expected_rewards, expected_dones):
+    """Given a full list of game steps, plays through the game and asserts all states are correct."""
+
+    expected_hist = []
+
+    def err_msg(idx):
+        return "idx: {}".format(idx)
+
+    for i, action in enumerate(actions):
+        board, reward, done, info = ms_game.step(action)
+
+        npt.assert_array_equal(ms_game.board, expected_boards[i], err_msg(i))
+        npt.assert_array_equal(board, expected_boards[i], err_msg(i))
+
+        expected_hist.append(action)
+        npt.assert_array_equal(ms_game.hist, expected_hist, err_msg(i))
+
+        assert reward == expected_rewards[i], err_msg(i)
+        assert done == expected_dones[i], err_msg(i)
+        assert info == dict(), err_msg(i)
+
+
+def test_win(ms_game=create_game()):
+    """Asserts that a winning playthrough works."""
+
+    actions = [(0, 0), (3, 3), (0, 3), (1, 2), (0, 4), (1, 4)]
+    expected_boards = [
+        [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, -1, -1, -1], [-1, -1, -1, -1, -1]],
+        [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, -1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, 1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, 1], [0, 1, 2, -1, 1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+    ]
+
+    expected_rewards = [REWARD_CLEAR] * (len(expected_boards) - 1) + [REWARD_WIN]
+    expected_dones = [False] * (len(expected_boards) - 1) + [True]
+
+    assert_game(ms_game, actions, expected_boards, expected_rewards, expected_dones)
+
+
+def test_lose(ms_game=create_game()):
+    """Asserts that a losing playthrough works."""
+
+    actions = [(0, 0), (3, 3), (0, 3), (1, 2), (0, 4), (0, 2)]
+    expected_boards = [
+        [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, -1, -1, -1], [-1, -1, -1, -1, -1]],
+        [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, -1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -1, 2, 1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
+        [[0, 1, -2, 2, 1], [0, 1, 2, -2, 1], [1, 1, 1, 1, 1], [-2, 1, 0, 0, 0]],
+    ]
+
+    expected_rewards = [REWARD_CLEAR] * (len(expected_boards) - 1) + [REWARD_LOSE]
+    expected_dones = [False] * (len(expected_boards) - 1) + [True]
+
+    assert_game(ms_game, actions, expected_boards, expected_rewards, expected_dones)
+
+
+def test_reset_and_reseed():
+    """Tests resetting the game and re-seeding."""
+
+    size = TEST_BOARD_SIZE
+    ms_game = create_game()
+
+    test_win(ms_game)
+    ms_game.reset()
+    ms_game.seed(TEST_SEED)  # need to re-seed so it's deterministic
+
+    test_lose(ms_game)
+    ms_game.reset()
+
+    assert ms_game.get_status() is None
+    assert ms_game.hist == []
+    npt.assert_array_equal(ms_game.board_size, (4, 5))
+    assert ms_game.num_mines == TEST_NUM_MINES
+
+    expected_board = [[SPACE_UNKNOWN] * size[1]] * size[0]
+    npt.assert_array_equal(ms_game.board, expected_board)
+
+
+def test_render():
+    """Tests game rendering"""
+
+    # get losing board
+    ms_game = create_game()
+    test_lose(ms_game)
+
+    class WriteSideEffect:
+        """Mock class for writable classes."""
+        out = ""
+
+        def write(self, text):
+            """Appends text to internal buffer."""
+            self.out += str(text)
+
+        def get(self):
+            """Gets the internal buffer."""
+            return self.out
+
+    expected_board = "0 1 X 2 1\n" \
+                     "0 1 2 X 1\n" \
+                     "1 1 1 1 1\n" \
+                     "X 1 0 0 0"
+
+    human_se = WriteSideEffect()
+    with patch("sys.stdout.write", side_effect=human_se.write):
+        ms_game.render('human')
+        assert human_se.get() == expected_board
+
+    string_io = ms_game.render('ansi')
+    string_io.seek(0)
+    assert string_io.read() == expected_board
+
+    pytest.raises(NotImplementedError, ms_game.render, 'rgb_array')
+    pytest.raises(NotImplementedError, ms_game.render, 'other')
diff --git a/scripts/format.sh b/scripts/format.sh
index 7c5ffb1..b007fa1 100755
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -3,9 +3,9 @@
 set -eo pipefail
 
 if [ "$#" -eq 0 ]; then
-	pipenv run format
+	yapf -i -r gym_minesweeper setup.py 2>/dev/null
 	shfmt -w scripts
 elif [ "$1" == "check" ]; then
-	pipenv run format check
+	yapf -d -r gym_minesweeper setup.py
 	shfmt -l -d scripts
 fi
diff --git a/scripts/lint.sh b/scripts/lint.sh
index e2ae396..eba84f4 100755
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -2,5 +2,5 @@
 
 set -eo pipefail
 
-pipenv run lint
+pylint --rcfile=setup.cfg gym_minesweeper setup.py
 shellcheck scripts/hooks/* scripts/*.sh
diff --git a/scripts/python_format.sh b/scripts/python_format.sh
deleted file mode 100755
index 6cc7294..0000000
--- a/scripts/python_format.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env bash
-
-set -eo pipefail
-
-if [ "$#" -eq 0 ]; then
-	yapf -i -r gym_minesweeper 2>/dev/null
-elif [ "$1" == "check" ]; then
-	yapf -d -r gym_minesweeper
-fi
diff --git a/scripts/test.sh b/scripts/test.sh
index 486f9b0..7b537a0 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -2,4 +2,4 @@
 
 set -eo pipefail
 
-pipenv run test
+pytest --junitxml=test_results/gym_minesweeper/report.xml gym_minesweeper/tests
diff --git a/setup.cfg b/setup.cfg
index e4714f4..7c70e04 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,6 +13,19 @@ platform = any
 project_urls =
     Bug Tracker = https://github.com/aokellermann/gym-minesweeper/issues
 
+[options]
+python_requires = >= 3.8
+test_suite = tests
+install_requires =
+    gym
+    numpy
+
+[options.extras_require]
+dev =
+    yapf
+    pylint
+    pytest
+
 [yapf]
 based_on_style = pep8
 column_limit = 120
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..404b76d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,4 @@
+"""Setup script for gym-minesweeper"""
+
+from setuptools import setup
+setup()