diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7f8d6a2ed..dfaf0789b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,7 +53,7 @@ jobs: run: brew install cmake boost openal-soft sdl2 - name: Set up Python ${{ matrix.python-version }} environment - uses: actions/setup-python@v1 + uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} @@ -61,8 +61,7 @@ jobs: run: python -c "import sys; print(sys.version)" - name: Pip install - run: pip install . - + run: pip install .[gym] + - name: Import check run: python -c "import vizdoom" - diff --git a/README.md b/README.md index 2f0f830d3..05bbc4098 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ ViZDoom is based on [ZDoom](https://github.com/rheit/zdoom) to provide the game ## Features - Multi-platform (Linux, macOS, Windows), - API for Python, C++, and Julia (thanks to [Jun Tian](https://github.com/findmyway)), and also Lua and Java for older versions, +- [OpenAI Gym](https://github.com/openai/gym) environment wrappers for basic scenarios. - Easy-to-create custom scenarios (visual editors, scripting language and examples available), - Async and sync single-player and multi-player modes, - Fast (up to 7000 fps in sync mode, single-threaded), @@ -92,6 +93,13 @@ To install pre-build release for Windows 10 or 11 64-bit and Python 3.6+ just ru pip install vizdoom ``` +### Gym wrappers +If you wish to use OpenAI Gym environments, install ViZDoom with +``` +pip install vizdoom[gym] +``` +See [documentation](doc/Gym.md) and [examples](examples/python/gym_wrapper.py) on the use of Gym API. + ## Examples diff --git a/doc/Gym.md b/doc/Gym.md new file mode 100644 index 000000000..99231c59c --- /dev/null +++ b/doc/Gym.md @@ -0,0 +1,13 @@ +# OpenAI Gym wrappers + +Installing ViZDoom with `pip install vizdoom[gym]` will include +Gym wrappers to interact with ViZDoom over [Gym API](https://www.gymlibrary.ml/). + +These wrappers are under `gym_wrappers`, containing the basic environment and +few example environments based on the built-in scenarios. This environment +simply initializes ViZDoom with the settings from the scenario config files +and implements the necessary API to function as a Gym API. + +See following examples for use: + - `examples/python/gym_wrapper.py` for basic usage + - `examples/python/learning_stable_baselines.py` for example training with [stable-baselines3](https://github.com/DLR-RM/stable-baselines3/) diff --git a/examples/python/gym_wrapper.py b/examples/python/gym_wrapper.py new file mode 100644 index 000000000..476415bfb --- /dev/null +++ b/examples/python/gym_wrapper.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +##################################################################### +# Example for running a vizdoom scenario as a gym env +##################################################################### + +import gym +from vizdoom import gym_wrapper + +if __name__ == '__main__': + env = gym.make("VizdoomHealthGatheringSupreme-v0") + + # Rendering random rollouts for ten episodes + for _ in range(10): + done = False + obs = env.reset() + while not done: + obs, rew, done, info = env.step(env.action_space.sample()) + env.render() diff --git a/examples/python/learning_stable_baselines.py b/examples/python/learning_stable_baselines.py new file mode 100644 index 000000000..21c2332b5 --- /dev/null +++ b/examples/python/learning_stable_baselines.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +##################################################################### +# Example script of training agents with stable-baselines3 +# on ViZDoom using the Gym API +# +# Note: ViZDoom must be installed with optional gym dependencies: +# pip install vizdoom[gym] +# You also need stable-baselines3: +# pip install stable-baselines3 +# +# See more stable-baselines3 documentation here: +# https://stable-baselines3.readthedocs.io/en/master/index.html +##################################################################### + +from argparse import ArgumentParser + +import cv2 +import numpy as np +import gym +import vizdoom.gym_wrapper + +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env + +DEFAULT_ENV = "VizdoomBasic-v0" +AVAILABLE_ENVS = [env for env in [env_spec.id for env_spec in gym.envs.registry.all()] if "Vizdoom" in env] +# Height and width of the resized image +IMAGE_SHAPE = (60, 80) + +# Training parameters +TRAINING_TIMESTEPS = int(1e6) +N_STEPS = 128 +N_ENVS = 8 +FRAME_SKIP = 4 + + +class ObservationWrapper(gym.ObservationWrapper): + """ + ViZDoom environments return dictionaries as observations, containing + the main image as well other info. + The image is also too large for normal training. + + This wrapper replaces the dictionary observation space with a simple + Box space (i.e., only the RGB image), and also resizes the image to a + smaller size. + + NOTE: Ideally, you should set the image size to smaller in the scenario files + for faster running of ViZDoom. This can really impact performance, + and this code is pretty slow because of this! + """ + def __init__(self, env, shape=IMAGE_SHAPE): + super().__init__(env) + self.image_shape = shape + self.image_shape_reverse = shape[::-1] + + # Create new observation space with the new shape + num_channels = env.observation_space["rgb"].shape[-1] + new_shape = (shape[0], shape[1], num_channels) + self.observation_space = gym.spaces.Box(0, 255, shape=new_shape, dtype=np.uint8) + + def observation(self, observation): + observation = cv2.resize(observation["rgb"], self.image_shape_reverse) + return observation + + +def main(args): + # Create multiple environments: this speeds up training with PPO + # We apply two wrappers on the environment: + # 1) The above wrapper that modifies the observations (takes only the image and resizes it) + # 2) A reward scaling wrapper. Normally the scenarios use large magnitudes for rewards (e.g., 100, -100). + # This may lead to unstable learning, and we scale the rewards by 1/100 + def wrap_env(env): + env = ObservationWrapper(env) + env = gym.wrappers.TransformReward(env, lambda r: r * 0.01) + return env + + envs = make_vec_env( + args.env, + n_envs=N_ENVS, + wrapper_class=wrap_env + ) + + agent = PPO("CnnPolicy", envs, n_steps=N_STEPS, verbose=1) + + # Do the actual learning + # This will print out the results in the console. + # If agent gets better, "ep_rew_mean" should increase steadily + agent.learn(total_timesteps=TRAINING_TIMESTEPS) + + +if __name__ == "__main__": + parser = ArgumentParser("Train stable-baselines3 PPO agents on ViZDoom.") + parser.add_argument("--env", + default=DEFAULT_ENV, + choices=AVAILABLE_ENVS, + help="Name of the environment to play") + args = parser.parse_args() + main(args) diff --git a/gym_wrapper/__init__.py b/gym_wrapper/__init__.py new file mode 100644 index 000000000..6045b0360 --- /dev/null +++ b/gym_wrapper/__init__.py @@ -0,0 +1,61 @@ +from gym.envs.registration import register + +register( + id="VizdoomBasic-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "basic.cfg"} +) + +register( + id="VizdoomCorridor-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "deadly_corridor.cfg"} +) + +register( + id="VizdoomDefendCenter-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "defend_the_center.cfg"} +) + +register( + id="VizdoomDefendLine-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "defend_the_line.cfg"} +) + +register( + id="VizdoomHealthGathering-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "health_gathering.cfg"} +) + +register( + id="VizdoomMyWayHome-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "my_way_home.cfg"} +) + +register( + id="VizdoomPredictPosition-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "predict_position.cfg"} +) + +register( + id="VizdoomTakeCover-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "take_cover.cfg"} +) + +register( + id="VizdoomDeathmatch-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "deathmatch.cfg"} +) + +register( + id="VizdoomHealthGatheringSupreme-v0", + entry_point="vizdoom.gym_wrapper.gym_env_defns:VizdoomScenarioEnv", + kwargs={"scenario_file": "health_gathering_supreme.cfg"} +) diff --git a/gym_wrapper/base_gym_env.py b/gym_wrapper/base_gym_env.py new file mode 100644 index 000000000..bacdae875 --- /dev/null +++ b/gym_wrapper/base_gym_env.py @@ -0,0 +1,243 @@ +from typing import Optional +import warnings + +import gym +import numpy as np +import pygame +import vizdoom.vizdoom as vzd + +# A fixed set of colors for each potential label +# for rendering an image. +# 256 is not nearly enough for all IDs, but we limit +# ourselves here to avoid hogging too much memory. +LABEL_COLORS = np.random.default_rng(42).uniform(25, 256, size=(256, 3)).astype(np.uint8) + + +class VizdoomEnv(gym.Env): + def __init__( + self, + level, + frame_skip=1, + ): + """ + Base class for Gym interface for ViZDoom. Thanks to https://github.com/shakenes/vizdoomgym + Child classes are defined in vizdoom_env_definitions.py, + + Arguments: + level (str): path to the config file to load. Most settings should be set by this config file. + frame_skip (int): how many frames should be advanced per action. 1 = take action on every frame. Default: 1. + + This environment forces window to be hidden. Use `render()` function to see the game. + + Observations are dictionaries with different amount of entries, depending on if depth/label buffers were + enabled in the config file: + "rgb" = the RGB image (always available) in shape (HEIGHT, WIDTH, CHANNELS) + "depth" = the depth image in shape (HEIGHT, WIDTH), if enabled by the config file, + "labels" = the label image buffer in shape (HEIGHT, WIDTH), if enabled by the config file. For info on labels, access `env.state.labels` variable. + "automap" = the automap image buffer in shape (HEIGHT, WIDTH, CHANNELS), if enabled by the config file + "gamevariables" = all game variables, in the order specified by the config file + + Action space is always a Discrete one, one choice for each button (only one button can be pressed down at a time). + """ + self.frame_skip = frame_skip + + # init game + self.game = vzd.DoomGame() + self.game.load_config(level) + self.game.set_window_visible(False) + + screen_format = self.game.get_screen_format() + if screen_format != vzd.ScreenFormat.RGB24: + warnings.warn(f"Detected screen format {screen_format.name}. Only RGB24 is supported in the Gym wrapper. Forcing RGB24.") + self.game.set_screen_format(vzd.ScreenFormat.RGB24) + + self.game.init() + self.state = None + self.window_surface = None + self.isopen = True + + self.depth = self.game.is_depth_buffer_enabled() + self.labels = self.game.is_labels_buffer_enabled() + self.automap = self.game.is_automap_buffer_enabled() + + allowed_buttons = [] + for button in self.game.get_available_buttons(): + if "DELTA" in button.name: + warnings.warn(f"Removing button {button.name}. DELTA buttons are currently not supported in Gym wrapper. Use binary buttons instead.") + else: + allowed_buttons.append(button) + self.game.set_available_buttons(allowed_buttons) + self.action_space = gym.spaces.Discrete(len(allowed_buttons)) + + # specify observation space(s) + spaces = { + "rgb": gym.spaces.Box( + 0, + 255, + ( + self.game.get_screen_height(), + self.game.get_screen_width(), + 3, + ), + dtype=np.uint8, + ) + } + + if self.depth: + spaces["depth"] = gym.spaces.Box( + 0, + 255, + ( + self.game.get_screen_height(), + self.game.get_screen_width(), + ), + dtype=np.uint8, + ) + + if self.labels: + spaces["labels"] = gym.spaces.Box( + 0, + 255, + ( + self.game.get_screen_height(), + self.game.get_screen_width(), + ), + dtype=np.uint8, + ) + + if self.automap: + spaces["automap"] = gym.spaces.Box( + 0, + 255, + ( + self.game.get_screen_height(), + self.game.get_screen_width(), + # "automap" buffer uses same number of channels + # as the main screen buffer + 3, + ), + dtype=np.uint8, + ) + + self.num_game_variables = self.game.get_available_game_variables_size() + if self.num_game_variables > 0: + spaces["gamevariables"] = gym.spaces.Box( + np.finfo(np.float32).min, + np.finfo(np.float32).max, + (self.num_game_variables,), + dtype=np.float32 + ) + + self.observation_space = gym.spaces.Dict(spaces) + + def step(self, action): + assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid" + assert self.state is not None, "Call `reset` before using `step` method." + + # convert action to vizdoom action space (one hot) + act = [0 for _ in range(self.action_space.n)] + act[action] = 1 + + reward = self.game.make_action(act, self.frame_skip) + self.state = self.game.get_state() + done = self.game.is_episode_finished() + + return self.__collect_observations(), reward, done, {} + + def reset( + self, + *, + seed: Optional[int] = None, + return_info: bool = False, + options: Optional[dict] = None, + ): + if seed is not None: + self.game.set_seed(seed) + self.game.new_episode() + self.state = self.game.get_state() + + if not return_info: + return self.__collect_observations() + else: + return self.__collect_observations(), {} + + def __collect_observations(self): + observation = {} + if self.state is not None: + observation["rgb"] = self.state.screen_buffer + if self.depth: + observation["depth"] = self.state.depth_buffer + if self.labels: + observation["labels"] = self.state.labels_buffer + if self.automap: + observation["automap"] = self.state.automap_buffer + if self.num_game_variables > 0: + observation["gamevariables"] = self.state.game_variables.astype(np.float32) + else: + # there is no state in the terminal step, so a zero observation is returned instead + for space_key, space_item in self.observation_space.spaces.items(): + observation[space_key] = np.zeros(space_item.shape, dtype=space_item.dtype) + + return observation + + def __build_human_render_image(self): + """Stack all available buffers into one for human consumption""" + game_state = self.game.get_state() + valid_buffers = game_state is not None + + if not valid_buffers: + # Return a blank image + num_enabled_buffers = 1 + self.depth + self.labels + self.automap + img = np.zeros( + ( + self.game.get_screen_height(), + self.game.get_screen_width() * num_enabled_buffers, + 3, + ), + dtype=np.uint8, + ) + return img + + image_list = [game_state.screen_buffer] + + if self.depth: + image_list.append(np.repeat(game_state.depth_buffer[..., None], repeats=3, axis=2)) + + if self.labels: + # Give each label a fixed color. + # We need to connect each pixel in labels_buffer to the corresponding + # id via `value`` + labels_rgb = np.zeros_like(game_state.screen_buffer) + labels_buffer = game_state.labels_buffer + for label in game_state.labels: + color = LABEL_COLORS[label.object_id % 256] + labels_rgb[labels_buffer == label.value] = color + image_list.append(labels_rgb) + + if self.automap: + image_list.append(game_state.automap_buffer) + + return np.concatenate(image_list, axis=1) + + def render(self, mode="human"): + render_image = self.__build_human_render_image() + if mode == "rgb_array": + return render_image + elif mode == "human": + # Transpose image (pygame wants (width, height, channels), we have (height, width, channels)) + render_image = render_image.transpose(1, 0, 2) + if self.window_surface is None: + pygame.init() + pygame.display.set_caption("ViZDoom") + self.window_surface = pygame.display.set_mode(render_image.shape[:2]) + + surf = pygame.surfarray.make_surface(render_image) + self.window_surface.blit(surf, (0, 0)) + pygame.display.update() + else: + return self.isopen + + def close(self): + if self.window_surface: + pygame.quit() + self.isopen = False diff --git a/gym_wrapper/gym_env_defns.py b/gym_wrapper/gym_env_defns.py new file mode 100644 index 000000000..1fc8edbea --- /dev/null +++ b/gym_wrapper/gym_env_defns.py @@ -0,0 +1,13 @@ +import os +from vizdoom.gym_wrapper.base_gym_env import VizdoomEnv +from vizdoom import scenarios_path + + +class VizdoomScenarioEnv(VizdoomEnv): + """Basic ViZDoom environments which reside in the `scenarios` directory""" + def __init__( + self, scenario_file, frame_skip=1 + ): + super(VizdoomScenarioEnv, self).__init__( + os.path.join(scenarios_path, scenario_file), frame_skip + ) diff --git a/scripts/assemble_pip_package.bat b/scripts/assemble_pip_package.bat index dac268083..c4a999eae 100755 --- a/scripts/assemble_pip_package.bat +++ b/scripts/assemble_pip_package.bat @@ -21,6 +21,8 @@ set SCENARIOS_DEST_DIR=%PACKAGE_DEST_PATH%\scenarios set SCENARIOS_PATH=%SRC_PATH%\scenarios set EXAMPLES_DEST_DIR=%PACKAGE_DEST_PATH%\examples set EXAMPLES_PATH=%SRC_PATH%\examples\python +set GYM_WRAPPER_DEST_DIR=%PACKAGE_DEST_PATH%\gym_wrapper +set GYM_WRAPPER_PATH=%SRC_PATH%\gym_wrapper if not exist "%BIN_PATH%\python%PYTHON_VERSION%\vizdoom*.pyd" ( echo "Library for specified Python version does not exist. Aborting." @@ -50,3 +52,4 @@ copy "%SCENARIOS_PATH%\*.wad" "%SCENARIOS_DEST_DIR%" copy "%SCENARIOS_PATH%\*.cfg" "%SCENARIOS_DEST_DIR%" move "%SCENARIOS_DEST_DIR%\bots.cfg" "%PACKAGE_DEST_PATH%" copy "%BIN_PATH%\*.dll" "%PACKAGE_DEST_PATH%" +copy "%GYM_WRAPPER_PATH%\*.py" "%GYM_WRAPPER_DEST_DIR%" diff --git a/scripts/assemble_pip_package.sh b/scripts/assemble_pip_package.sh index 52081f34c..c0a9028e8 100755 --- a/scripts/assemble_pip_package.sh +++ b/scripts/assemble_pip_package.sh @@ -25,6 +25,8 @@ PYTHON_BIN_PATH="$(ls ${PACKAGE_DEST_DIRECTORY}/vizdoom*)" FREEDOOM_PATH="${SRC_PATH}/src/freedoom2.wad" SCENARIOS_DEST_DIR="${PACKAGE_DEST_PATH}/scenarios" SCENARIOS_PATH="${SRC_PATH}/scenarios" +GYM_WRAPPER_DEST_DIR="${PACKAGE_DEST_PATH}/gym_wrapper" +GYM_WRAPPER_PATH="${SRC_PATH}/gym_wrapper" if [ ! -e ${PYTHON_BIN_PATH} ]; then echo "Library for specified Python version does not exist. Aborting." @@ -48,3 +50,6 @@ mkdir -p ${SCENARIOS_DEST_DIR} cp ${SCENARIOS_PATH}/*.wad ${SCENARIOS_DEST_DIR} cp ${SCENARIOS_PATH}/*.cfg ${SCENARIOS_DEST_DIR} mv ${SCENARIOS_DEST_DIR}/bots.cfg ${PACKAGE_DEST_PATH} + +mkdir -p ${GYM_WRAPPER_DEST_DIR} +cp ${GYM_WRAPPER_PATH}/*.py ${GYM_WRAPPER_DEST_DIR} diff --git a/setup.py b/setup.py index 56469bd42..fc4c67617 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ build_output_path = 'bin' package_path = build_output_path + '/python' + python_version + '/pip_package' supported_platforms = ["Linux", "Mac OS X", "Windows"] -package_data = ['__init__.py', 'bots.cfg', 'freedoom2.wad', 'vizdoom.pk3', 'vizdoom', 'scenarios/*'] +package_data = ['__init__.py', 'bots.cfg', 'freedoom2.wad', 'vizdoom.pk3', 'vizdoom', 'scenarios/*', 'gym_wrapper/*'] os.makedirs(package_path, exist_ok=True) @@ -153,7 +153,7 @@ def run(self): url='http://vizdoom.cs.put.edu.pl/', author='Marek Wydmuch, Michał Kempka, Wojciech Jaśkowski, Grzegorz Runc, Jakub Toczek', author_email='mwydmuch@cs.put.poznan.pl', - + extras_require={"gym": ["gym==0.23.0", "pygame==2.1.0"]}, install_requires=['numpy'], packages=['vizdoom'], package_dir={'vizdoom': package_path}, diff --git a/tests/test_gym_wrapper.py b/tests/test_gym_wrapper.py new file mode 100644 index 000000000..c66dfae05 --- /dev/null +++ b/tests/test_gym_wrapper.py @@ -0,0 +1,61 @@ +import gym +import numpy as np +from gym.utils.env_checker import check_env +from vizdoom import gym_wrapper + +vizdoom_envs = [env for env in [env_spec.id for env_spec in gym.envs.registry.all()] if "Vizdoom" in env] + +# Testing with different non-default kwargs (since each has a different obs space) +def test_gym_wrapper(): + print("Testing Gym wrapper compatiblility with gym API") + for env_name in vizdoom_envs: + for frame_skip in [1, 4]: + env = gym.make( + env_name, + frame_skip=frame_skip + ) + + # Test if env adheres to Gym API + check_env(env, warn=True, skip_render_check=True) + + ob_space = env.observation_space + act_space = env.action_space + ob = env.reset() + assert ob_space.contains(ob), f"Reset observation: {ob!r} not in space" + + a = act_space.sample() + observation, reward, done, _info = env.step(a) + assert ob_space.contains( + observation + ), f"Step observation: {observation!r} not in space" + assert np.isscalar(reward), f"{reward} is not a scalar for {env}" + assert isinstance(done, bool), f"Expected {done} to be a boolean" + + env.close() + + +# Check obs on terminal state (terminal state is handled differently) +def test_gym_wrapper_terminal_state(): + print("Testing Gym rollout (checking terminal state)") + for env_name in vizdoom_envs: + for frame_skip in [1, 4]: + env = gym.make( + env_name, + frame_skip=frame_skip + ) + + agent = lambda ob: env.action_space.sample() + ob = env.reset() + done = False + while not done: + a = agent(ob) + (ob, _reward, done, _info) = env.step(a) + if done: + break + env.close() + assert env.observation_space.contains(ob) + + +if __name__ == "__main__": + test_gym_wrapper() + test_gym_wrapper_terminal_state() diff --git a/tests/test_many_instances.py b/tests/test_many_instances.py index 5ef507cb6..3e2b119e4 100644 --- a/tests/test_many_instances.py +++ b/tests/test_many_instances.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 +import os from argparse import ArgumentParser from multiprocessing import Process from random import choice -import vizdoom as vzd +import vizdoom as vzd DEFAULT_CONFIG = os.path.join(vzd.scenarios_path, "basic.cfg")