Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Problem on multiprocessing with TD3 #1659

Closed
fireyan8 opened this issue Aug 28, 2023 · 6 comments
Closed

Problem on multiprocessing with TD3 #1659

fireyan8 opened this issue Aug 28, 2023 · 6 comments
Labels
custom gym env Issue related to Custom Gym Env duplicate This issue or pull request already exists

Comments

@fireyan8
Copy link

🐛 Bug

"You must use only one env when doing episodic training." from error report

Minimum code needs to be set up so it would be long then it can run.

Code example

import numpy as np
import pybamm
import numbers
import gymnasium as gym
from gymnasium import spaces




def make_new_model(model, param, disc, update_input):
    model1 = model.new_copy()
    param1 = param.copy()

    if update_input is not None:
        param1.update(update_input)
    else:
        pass
    model1 = param1.process_model(model1, inplace=False)
    built_model = disc.process_model(model=model1, inplace=False, check_model=True)
    return built_model

def update_model_step(inputparam, model, param, disc, solutions):
    model = make_new_model(model, param, disc, inputparam)
    solver = pybamm.CasadiSolver(mode="safe", atol=1e-6, rtol=1e-3)

    #each step is 1s, 3 points
    step_solution = solver.step(solutions[-1].last_state,
                                model,
                                1,
                                npts=3,
                                save=False,)
    return step_solution

def update_input(current):
 
    update_input = {
        "Current function [A]": current,
    }

    return update_input


class Pybammenv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 4}
    def __init__(self, Vmax = 4.2, Tmax = 330, initial_soc=0.2, seed=None):
        #set up env
        self.Vmax = Vmax
        self.Tmax = Tmax
        self.initial_soc = initial_soc
        self.seed = seed
        # Observations are dictionaries
        #model.variable_names has 468 variables
        #whats important
        self.observation_space = spaces.Dict(
            {
                "Current function [A]": spaces.Box(-1, 1, shape=(1,), dtype=np.float64),
                "Terminal voltage [V]": spaces.Box(0, 1, shape=(1,), dtype=np.float64),
            }
        )

        # we have 1 actions, current
        self.action_space = spaces.Box(-1,1, shape=(), dtype=np.float32)


    def reset(self,options = {"thermal": "x-full"},seed=None):
        self.reward = 0
        self.terminted = False
        self.truncated = False
        
        options = options
        # options_plating = {"thermal": "lumped", "lithium plating": "partially reversible"}
        self.model = pybamm.lithium_ion.DFN(options=options)
        self.params = pybamm.ParameterValues("Ecker2015").copy()
        self.params.update({"Current function [A]": 0.0})
        self.params.set_initial_stoichiometries(self.initial_soc)
        model1 = self.params.process_model(self.model, inplace=False)


        #setting geometry
        geometry = self.model.default_geometry
        submesh_types = self.model.default_submesh_types
        var_pts = self.model.default_var_pts
        self.params.process_geometry(geometry)
        mesh = pybamm.Mesh(geometry, submesh_types, var_pts)


        spatial_methods = self.model.default_spatial_methods
        self.disc = pybamm.Discretisation(mesh, spatial_methods)

        self.solver = pybamm.CasadiSolver(mode="safe", atol=1e-6, rtol=1e-3)
        
        #set initial solution
        self.solutions = []
        init_model = self.disc.process_model(model=model1, inplace=False)
        initial_solution = self.solver.step(model=init_model, dt=1, old_solution=None)

        self.solutions.append(initial_solution)

        #showing initial conditions
        self.observation = {
        "Terminal voltage [V]": np.array([self.solutions[-1]["Terminal voltage [V]"].data[-1]])/4.2,
        "Current function [A]": np.array([self.solutions[-1]["Current [A]"].data[-1]])/4,
       }

        info = {"model": self.model, "param": self.params, "disc": self.disc, "solutions": self.solutions}
        return self.observation, info

    def step(self, action):
        scalar_action = action
        self.solutions += [update_model_step(update_input(current), self.model, 
                                             self.params, self.disc, self.solutions)]
        self.observation = {
            "Current function [A]": np.array([self.solutions[-1]["Current [A]"].data[-1]])/4,
            "Terminal voltage [V]": np.array([self.solutions[-1]["Terminal voltage [V]"].data[-1]])/4.2,

        }

        #terminted can also be done by checking pybamm.solution.termination(for example)
        # if self.solutions[-1]["Terminal voltage [V]"].data.any() >= self.params["Upper voltage cut-off [V]"]:
        if (self.solutions[-1]["Terminal voltage [V]"].data >=self.Vmax-0.1).any():
            self.reward += 100000
            self.terminted = True

        if (self.solutions[-1]["Cell temperature [K]"].data>=self.Tmax).any() or (self.solutions[-1]["Terminal voltage [V]"].data<=2.501).any():
            self.reward += -1000
            self.truncated = True
        self.reward += (self.solutions[-1]["Terminal voltage [V]"].data[-1]-
                        self.solutions[-2]["Terminal voltage [V]"].data[-1])*10000
        info = {"model": self.model, "param": self.params, "disc": self.disc, "solutions": self.solutions}
        return self.observation, self.reward, self.terminted, self.truncated, info

    def render(self):
        plot = pybamm.QuickPlot(self.solutions)
        plot.dynamic_plot()

    def close(self):
        pass

the codes that use multiprocess, after importing all the things:

vec_env = make_vec_env('Pybammenv_v0', n_envs=4, seed=0)


model = TD3("MultiInputPolicy", vec_env,
             verbose=1, device="cuda",batch_size=64)
model.learn(total_timesteps=1000, log_interval=10, progress_bar=False)
model.save("td3_pybamm_vec")

Relevant log output / Error message

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[10], line 7
      3 # The noise objects for TD3 
      5 model = TD3("MultiInputPolicy", vec_env,
      6              verbose=1, device="cuda",batch_size=64)
----> 7 model.learn(total_timesteps=1000, log_interval=10, progress_bar=False)
      8 model.save("td3_pybamm_vec")

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\td3\td3.py:222](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/td3/td3.py:222), in TD3.learn(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)
    213 def learn(
    214     self: SelfTD3,
    215     total_timesteps: int,
   (...)
    220     progress_bar: bool = False,
    221 ) -> SelfTD3:
--> 222     return super().learn(
    223         total_timesteps=total_timesteps,
    224         callback=callback,
    225         log_interval=log_interval,
    226         tb_log_name=tb_log_name,
    227         reset_num_timesteps=reset_num_timesteps,
    228         progress_bar=progress_bar,
    229     )

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\off_policy_algorithm.py:312](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/off_policy_algorithm.py:312), in OffPolicyAlgorithm.learn(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)
    309 callback.on_training_start(locals(), globals())
    311 while self.num_timesteps < total_timesteps:
--> 312     rollout = self.collect_rollouts(
    313         self.env,
    314         train_freq=self.train_freq,
    315         action_noise=self.action_noise,
    316         callback=callback,
    317         learning_starts=self.learning_starts,
    318         replay_buffer=self.replay_buffer,
    319         log_interval=log_interval,
    320     )
    322     if rollout.continue_training is False:
    323         break

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\off_policy_algorithm.py:524](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/off_policy_algorithm.py:524), in OffPolicyAlgorithm.collect_rollouts(self, env, callback, train_freq, replay_buffer, action_noise, learning_starts, log_interval)
    521 assert train_freq.frequency > 0, "Should at least collect one step or episode."
    523 if env.num_envs > 1:
--> 524     assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
    526 # Vectorize action noise if needed
    527 if action_noise is not None and env.num_envs > 1 and not isinstance(action_noise, VectorizedActionNoise):

AssertionError: You must use only one env when doing episodic training.


### System Info

pip
- OS: Windows-10-10.0.23531-SP0 10.0.23531
- Python: 3.11.3
- Stable-Baselines3: 2.0.0
- PyTorch: 2.1.0.dev20230819+cu121
- GPU Enabled: True
- Numpy: 1.25.2
- Cloudpickle: 2.2.1
- Gymnasium: 0.28.1
- OpenAI Gym: 0.26.0

### Checklist

- [X] I have checked that there is no similar [issue](https://github.com/DLR-RM/stable-baselines3/issues) in the repo
- [X] I have read the [documentation](https://stable-baselines3.readthedocs.io/en/master/)
- [X] I have provided a [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014) example to reproduce the bug
- [X] I have checked my env using the env checker
- [X] I've used the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
@fireyan8 fireyan8 added the custom gym env Issue related to Custom Gym Env label Aug 28, 2023
@araffin
Copy link
Member

araffin commented Aug 28, 2023

Hello,

"You must use only one env when doing episodic training." from error report

the error is pretty explicit and duplicate of #1032 (comment) and #1034

In short, change the default to train_freq=1 if you use multiple envs (you might need to adapt gradient_steps too).
This is something that I would like to change in the future (change the default), as it creates confusion.

@araffin araffin added the duplicate This issue or pull request already exists label Aug 28, 2023
@fireyan8
Copy link
Author

Hello,

"You must use only one env when doing episodic training." from error report

the error is pretty explicit and duplicate of #1032 (comment) and #1034

In short, change the default to train_freq=1 if you use multiple envs (you might need to adapt gradient_steps too). This is something that I would like to change in the future (change the default), as it creates confusion.
Thanks very much it works

@fireyan8
Copy link
Author

Then i encountered with another problem with using the trained model:
Code

model = TD3.load("./td3_pybamm_vec.zip")
# test_env = gymnasium.make("Pybammenv_v0")
test_env = make_vec_env('Pybammenv_v0', n_envs=1, seed=0)
# env = gymnasium.wrappers.FlattenObservation(test_env)
obs,info = test_env.reset()
ep = 0
print("start simulation")
while True:
    start_time = time.time()
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, done, _, info = env.step(action)
    ep += 1
    if ep % 10 == 0:
        ep_time = time.time() - start_time
        print("Step {}".format(ep + 1), "action", action, "obs", obs, "reward", rewards, "done", done)
        print("10ep time", ep_time)
        start_time = time.time()
    if done or _:
        env.render()
        print("Episode finished after {} timesteps".format(ep+1),done,_)
        break

error:

IndexError                                Traceback (most recent call last)
Cell In[8], line 4
      2 while True:
      3     start_time = time.time()
----> 4     action, _states = model.predict(obs, deterministic=True)
      5     obs, rewards, done, _, info = env.step(action)
      6     ep += 1

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\base_class.py:555](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/base_class.py:555), in BaseAlgorithm.predict(self, observation, state, episode_start, deterministic)
    535 def predict(
    536     self,
    537     observation: Union[np.ndarray, Dict[str, np.ndarray]],
   (...)
    540     deterministic: bool = False,
    541 ) -> Tuple[np.ndarray, Optional[Tuple[np.ndarray, ...]]]:
    542     """
    543     Get the policy action from an observation (and optional hidden state).
    544     Includes sugar-coating to handle different observations (e.g. normalizing images).
   (...)
    553         (used in recurrent policies)
    554     """
--> 555     return self.policy.predict(observation, state, episode_start, deterministic)

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\policies.py:346](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/policies.py:346), in BasePolicy.predict(self, observation, state, episode_start, deterministic)
    343 # Switch to eval mode (this affects batch norm / dropout)
    344 self.set_training_mode(False)
--> 346 observation, vectorized_env = self.obs_to_tensor(observation)
    348 with th.no_grad():
    349     actions = self._predict(observation, deterministic=deterministic)

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\policies.py:264](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/policies.py:264), in BaseModel.obs_to_tensor(self, observation)
    260     observation = np.array(observation)
    262 if not isinstance(observation, dict):
    263     # Dict obs need to be handled separately
--> 264     vectorized_env = is_vectorized_observation(observation, self.observation_space)
    265     # Add batch dimension if needed
    266     observation = observation.reshape((-1, *self.observation_space.shape))

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\utils.py:399](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/utils.py:399), in is_vectorized_observation(observation, observation_space)
    397 for space_type, is_vec_obs_func in is_vec_obs_func_dict.items():
    398     if isinstance(observation_space, space_type):
--> 399         return is_vec_obs_func(observation, observation_space)
    400 else:
    401     # for-else happens if no break is called
...
--> 349     if observation[key].shape != subspace.shape:
    350         all_non_vectorized = False
    351         break

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

I can run the code with the commented out lines:

# test_env = gymnasium.make("Pybammenv_v0")
# test_env = gymnasium.wrappers.FlattenObservation(test_env)

Where is the difference with the code with an without multiprocess?

@qgallouedec
Copy link
Collaborator

Your code doesn't seem to work. You use env but it is not even defined. Also, please note that gymnasium wrappers are not compatible with vectorized environments. If you want to use a gymnasium wrapper, use the wrapper_class of make_vec_env.

@fireyan8
Copy link
Author

Hallo,
the code like this is working, the td3_pybamm is trained without multiprocess and with flattenenv:

model = TD3.load("./td3_pybamm.zip")
test_env = gymnasium.make("Pybammenv_v0")
# test_env = make_vec_env('Pybammenv_v0', n_envs=1,seed=0)
test_env = gymnasium.wrappers.FlattenObservation(test_env)
obs,info = test_env.reset()
ep = 0
print("start simulation")
while True:
    start_time = time.time()
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, done, _, info = test_env.step(action)
    ep += 1
    if ep % 10 == 0:
        ep_time = time.time() - start_time
        print("Step {}".format(ep + 1), "action", action, "obs", obs, "reward", rewards, "done", done)
        print("10ep time", ep_time)
        start_time = time.time()
    if done or _:
        env.render()
        print("Episode finished after {} timesteps".format(ep+1),done,_)
        break

for the make_vec_env, I was just trying to figure out, is it they way I define the env wrong. Because I trained the model with flattenenv.
With the code above, by just changing the td3_pybamm to td3_pybamm_env( trained with multiprocessing), the code wont work and will.:

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

@fireyan8
Copy link
Author

Thank you guys I found where it goes different:
model = TD3.load("./td3_pybamm_vec.zip")

test_env = make_vec_env('Pybammenv_v0', n_envs=1,seed=0)

obs = test_env.reset()

after make_vec_env() .reset90 will just return obs, and ignore my info.
The same thing happen to .step() as well.

the wrapper is slightly different than newer gymnasium

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
custom gym env Issue related to Custom Gym Env duplicate This issue or pull request already exists
Projects
None yet
Development

No branches or pull requests

3 participants