Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stable-retro Raspberry Pi 5 ForkServerProcess-1Error #112

Open
StartaBafras opened this issue Aug 20, 2024 · 2 comments
Open

Stable-retro Raspberry Pi 5 ForkServerProcess-1Error #112

StartaBafras opened this issue Aug 20, 2024 · 2 comments

Comments

@StartaBafras
Copy link

I am trying to train a model using PPO, and the stable-baseline3[extra] library is also installed.

The issue occurs because the StochasticFrameSkip object does not have an action_space attribute, leading to an AttributeError when used in a multiprocessing environment with SubprocVecEnv. This error is triggered when wrappers like WarpFrame try to access the action_space during environment creation. Additionally, there is a deprecation warning from gymnasium about directly accessing env.action_space, recommending the use of env.unwrapped.action_space or env.get_wrapper_attr('action_space'). This causes the subprocess to fail, resulting in a ConnectionResetError.

/home/alarm/retro/env/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: WARN: env.action_space to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.action_space` for environment variables or `env.get_wrapper_attr('action_space')` that will search the reminding wrappers.
  logger.warn(
Process ForkServerProcess-1:
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/alarm/retro/env/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 24, in _worker
    env = env_fn_wrapper.var()
  File "/home/alarm/retro/test.py", line 98, in make_env
    env = wrap_deepmind_retro(env)
  File "/home/alarm/retro/test.py", line 83, in wrap_deepmind_retro
    env = WarpFrame(env)
  File "/home/alarm/retro/env/lib/python3.9/site-packages/stable_baselines3/common/atari_wrappers.py", line 188, in __init__
    gym.ObservationWrapper.__init__(self, env)
  File "/home/alarm/retro/env/lib/python3.9/site-packages/gym/core.py", line 215, in __init__
    self.action_space = self.env.action_space
  File "/home/alarm/retro/env/lib/python3.9/site-packages/gymnasium/core.py", line 315, in __getattr__
    return getattr(self.env, name)
AttributeError: 'StochasticFrameSkip' object has no attribute 'action_space'
Traceback (most recent call last):
  File "/home/alarm/retro/test.py", line 124, in <module>
    main()
  File "/home/alarm/retro/test.py", line 101, in main
    venv = VecTransposeImage(VecFrameStack(SubprocVecEnv([make_env] * 1), n_stack=40))
  File "/home/alarm/retro/env/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 111, in __init__
    observation_space, action_space = self.remotes[0].recv()
  File "/usr/local/lib/python3.9/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/usr/local/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/usr/local/lib/python3.9/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer

System information

  • [Operating system]
    Raspberry pi 5 8 gb Linux A port of debian Bookworm with the Raspberry Pi Desktop (64 bit)
    Linux pi5 6.6.31+rpt-rpi-2712 SMP PREEMPT Debian 1:6.6.31-1+rpt1 (2024-05-29) aarch64 GNU/Linux
  • [Python version]
    3.9.19
  • [Gym Retro version]
    Compiled from source code
@pseudo-rnd-thoughts
Copy link
Member

Could you provide a minimal example script to test with?
I imagine that StochasticFrameSkip hasn't called super().__init__(env)

@StartaBafras
Copy link
Author

I am using a code very close to the code in the example folder.

"""
Train an agent using Proximal Policy Optimization from Stable Baselines 3
"""

import argparse

import gymnasium as gym
import numpy as np
from gymnasium.wrappers.time_limit import TimeLimit
from stable_baselines3 import PPO
from stable_baselines3.common.atari_wrappers import ClipRewardEnv, WarpFrame
from stable_baselines3.common.vec_env import (
    SubprocVecEnv,
    VecFrameStack,
    VecTransposeImage,
)

import retro


class StochasticFrameSkip(gym.Wrapper):
    def __init__(self, env, n, stickprob):
        gym.Wrapper.__init__(self, env)
        self.n = n
        self.stickprob = stickprob
        self.curac = None
        self.rng = np.random.RandomState()
        self.supports_want_render = hasattr(env, "supports_want_render")

    def reset(self, **kwargs):
        self.curac = None
        return self.env.reset(**kwargs)

    def step(self, ac):
        terminated = False
        truncated = False
        totrew = 0
        for i in range(self.n):
            # First step after reset, use action
            if self.curac is None:
                self.curac = ac
            # First substep, delay with probability=stickprob
            elif i == 0:
                if self.rng.rand() > self.stickprob:
                    self.curac = ac
            # Second substep, new action definitely kicks in
            elif i == 1:
                self.curac = ac
            if self.supports_want_render and i < self.n - 1:
                ob, rew, terminated, truncated, info = self.env.step(
                    self.curac,
                    want_render=False,
                )
            else:
                ob, rew, terminated, truncated, info = self.env.step(self.curac)
            totrew += rew
            if terminated or truncated:
                break
        return ob, totrew, terminated, truncated, info


def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs):
    if state is None:
        state = retro.State.DEFAULT
    env = retro.make(game, state, **kwargs)
    env = StochasticFrameSkip(env, n=4, stickprob=0.25)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env


def wrap_deepmind_retro(env):
    """
    Configure environment for retro games, using config similar to DeepMind-style Atari in openai/baseline's wrap_deepmind
    """
    env = WarpFrame(env)
    env = ClipRewardEnv(env)
    return env


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", default="Airstriker-Genesis")#BalloonFight-Nes Airstriker-Genesis
    parser.add_argument("--state", default=retro.State.DEFAULT)
    parser.add_argument("--scenario", default=None)

    args = parser.parse_args()

    def make_env():
        env = make_retro(game=args.game, state=args.state, scenario=args.scenario,render_mode="rgb_array") #,render_mode="rgb_array"
        env = wrap_deepmind_retro(env)
        return env

    venv = VecTransposeImage(VecFrameStack(SubprocVecEnv([make_env] * 1), n_stack=40))
    model = PPO(
        policy="CnnPolicy",
        env=venv,
        learning_rate=lambda f: f * 2.5e-4,
        n_steps=256,
        batch_size=128,
        n_epochs=3,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.4,
        ent_coef=0.013,
        verbose=2,
    )
    model.learn(
        total_timesteps=18_000_000,
        log_interval=1,
        progress_bar=True
    )
    model.save("N12")


if __name__ == "__main__":
    main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants