Problem on multiprocessing with TD3 #1659

fireyan8 · 2023-08-28T09:36:04Z

🐛 Bug

"You must use only one env when doing episodic training." from error report

Minimum code needs to be set up so it would be long then it can run.

Code example

import numpy as np
import pybamm
import numbers
import gymnasium as gym
from gymnasium import spaces




def make_new_model(model, param, disc, update_input):
    model1 = model.new_copy()
    param1 = param.copy()

    if update_input is not None:
        param1.update(update_input)
    else:
        pass
    model1 = param1.process_model(model1, inplace=False)
    built_model = disc.process_model(model=model1, inplace=False, check_model=True)
    return built_model

def update_model_step(inputparam, model, param, disc, solutions):
    model = make_new_model(model, param, disc, inputparam)
    solver = pybamm.CasadiSolver(mode="safe", atol=1e-6, rtol=1e-3)

    #each step is 1s, 3 points
    step_solution = solver.step(solutions[-1].last_state,
                                model,
                                1,
                                npts=3,
                                save=False,)
    return step_solution

def update_input(current):
 
    update_input = {
        "Current function [A]": current,
    }

    return update_input


class Pybammenv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 4}
    def __init__(self, Vmax = 4.2, Tmax = 330, initial_soc=0.2, seed=None):
        #set up env
        self.Vmax = Vmax
        self.Tmax = Tmax
        self.initial_soc = initial_soc
        self.seed = seed
        # Observations are dictionaries
        #model.variable_names has 468 variables
        #whats important
        self.observation_space = spaces.Dict(
            {
                "Current function [A]": spaces.Box(-1, 1, shape=(1,), dtype=np.float64),
                "Terminal voltage [V]": spaces.Box(0, 1, shape=(1,), dtype=np.float64),
            }
        )

        # we have 1 actions, current
        self.action_space = spaces.Box(-1,1, shape=(), dtype=np.float32)


    def reset(self,options = {"thermal": "x-full"},seed=None):
        self.reward = 0
        self.terminted = False
        self.truncated = False
        
        options = options
        # options_plating = {"thermal": "lumped", "lithium plating": "partially reversible"}
        self.model = pybamm.lithium_ion.DFN(options=options)
        self.params = pybamm.ParameterValues("Ecker2015").copy()
        self.params.update({"Current function [A]": 0.0})
        self.params.set_initial_stoichiometries(self.initial_soc)
        model1 = self.params.process_model(self.model, inplace=False)


        #setting geometry
        geometry = self.model.default_geometry
        submesh_types = self.model.default_submesh_types
        var_pts = self.model.default_var_pts
        self.params.process_geometry(geometry)
        mesh = pybamm.Mesh(geometry, submesh_types, var_pts)


        spatial_methods = self.model.default_spatial_methods
        self.disc = pybamm.Discretisation(mesh, spatial_methods)

        self.solver = pybamm.CasadiSolver(mode="safe", atol=1e-6, rtol=1e-3)
        
        #set initial solution
        self.solutions = []
        init_model = self.disc.process_model(model=model1, inplace=False)
        initial_solution = self.solver.step(model=init_model, dt=1, old_solution=None)

        self.solutions.append(initial_solution)

        #showing initial conditions
        self.observation = {
        "Terminal voltage [V]": np.array([self.solutions[-1]["Terminal voltage [V]"].data[-1]])/4.2,
        "Current function [A]": np.array([self.solutions[-1]["Current [A]"].data[-1]])/4,
       }

        info = {"model": self.model, "param": self.params, "disc": self.disc, "solutions": self.solutions}
        return self.observation, info

    def step(self, action):
        scalar_action = action
        self.solutions += [update_model_step(update_input(current), self.model, 
                                             self.params, self.disc, self.solutions)]
        self.observation = {
            "Current function [A]": np.array([self.solutions[-1]["Current [A]"].data[-1]])/4,
            "Terminal voltage [V]": np.array([self.solutions[-1]["Terminal voltage [V]"].data[-1]])/4.2,

        }

        #terminted can also be done by checking pybamm.solution.termination(for example)
        # if self.solutions[-1]["Terminal voltage [V]"].data.any() >= self.params["Upper voltage cut-off [V]"]:
        if (self.solutions[-1]["Terminal voltage [V]"].data >=self.Vmax-0.1).any():
            self.reward += 100000
            self.terminted = True

        if (self.solutions[-1]["Cell temperature [K]"].data>=self.Tmax).any() or (self.solutions[-1]["Terminal voltage [V]"].data<=2.501).any():
            self.reward += -1000
            self.truncated = True
        self.reward += (self.solutions[-1]["Terminal voltage [V]"].data[-1]-
                        self.solutions[-2]["Terminal voltage [V]"].data[-1])*10000
        info = {"model": self.model, "param": self.params, "disc": self.disc, "solutions": self.solutions}
        return self.observation, self.reward, self.terminted, self.truncated, info

    def render(self):
        plot = pybamm.QuickPlot(self.solutions)
        plot.dynamic_plot()

    def close(self):
        pass

the codes that use multiprocess, after importing all the things:

vec_env = make_vec_env('Pybammenv_v0', n_envs=4, seed=0)


model = TD3("MultiInputPolicy", vec_env,
             verbose=1, device="cuda",batch_size=64)
model.learn(total_timesteps=1000, log_interval=10, progress_bar=False)
model.save("td3_pybamm_vec")

Relevant log output / Error message

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[10], line 7
      3 # The noise objects for TD3 
      5 model = TD3("MultiInputPolicy", vec_env,
      6              verbose=1, device="cuda",batch_size=64)
----> 7 model.learn(total_timesteps=1000, log_interval=10, progress_bar=False)
      8 model.save("td3_pybamm_vec")

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\td3\td3.py:222](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/td3/td3.py:222), in TD3.learn(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)
    213 def learn(
    214     self: SelfTD3,
    215     total_timesteps: int,
   (...)
    220     progress_bar: bool = False,
    221 ) -> SelfTD3:
--> 222     return super().learn(
    223         total_timesteps=total_timesteps,
    224         callback=callback,
    225         log_interval=log_interval,
    226         tb_log_name=tb_log_name,
    227         reset_num_timesteps=reset_num_timesteps,
    228         progress_bar=progress_bar,
    229     )

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\off_policy_algorithm.py:312](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/off_policy_algorithm.py:312), in OffPolicyAlgorithm.learn(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)
    309 callback.on_training_start(locals(), globals())
    311 while self.num_timesteps < total_timesteps:
--> 312     rollout = self.collect_rollouts(
    313         self.env,
    314         train_freq=self.train_freq,
    315         action_noise=self.action_noise,
    316         callback=callback,
    317         learning_starts=self.learning_starts,
    318         replay_buffer=self.replay_buffer,
    319         log_interval=log_interval,
    320     )
    322     if rollout.continue_training is False:
    323         break

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\off_policy_algorithm.py:524](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/off_policy_algorithm.py:524), in OffPolicyAlgorithm.collect_rollouts(self, env, callback, train_freq, replay_buffer, action_noise, learning_starts, log_interval)
    521 assert train_freq.frequency > 0, "Should at least collect one step or episode."
    523 if env.num_envs > 1:
--> 524     assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
    526 # Vectorize action noise if needed
    527 if action_noise is not None and env.num_envs > 1 and not isinstance(action_noise, VectorizedActionNoise):

AssertionError: You must use only one env when doing episodic training.



### System Info

pip
- OS: Windows-10-10.0.23531-SP0 10.0.23531
- Python: 3.11.3
- Stable-Baselines3: 2.0.0
- PyTorch: 2.1.0.dev20230819+cu121
- GPU Enabled: True
- Numpy: 1.25.2
- Cloudpickle: 2.2.1
- Gymnasium: 0.28.1
- OpenAI Gym: 0.26.0

### Checklist

- [X] I have checked that there is no similar [issue](https://github.com/DLR-RM/stable-baselines3/issues) in the repo
- [X] I have read the [documentation](https://stable-baselines3.readthedocs.io/en/master/)
- [X] I have provided a [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014) example to reproduce the bug
- [X] I have checked my env using the env checker
- [X] I've used the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.

The text was updated successfully, but these errors were encountered:

araffin · 2023-08-28T10:22:18Z

Hello,

"You must use only one env when doing episodic training." from error report

the error is pretty explicit and duplicate of #1032 (comment) and #1034

In short, change the default to train_freq=1 if you use multiple envs (you might need to adapt gradient_steps too).
This is something that I would like to change in the future (change the default), as it creates confusion.

fireyan8 · 2023-08-28T11:23:29Z

Hello,

"You must use only one env when doing episodic training." from error report

the error is pretty explicit and duplicate of #1032 (comment) and #1034

In short, change the default to train_freq=1 if you use multiple envs (you might need to adapt gradient_steps too). This is something that I would like to change in the future (change the default), as it creates confusion.
Thanks very much it works

fireyan8 · 2023-08-28T11:38:37Z

Then i encountered with another problem with using the trained model:
Code

model = TD3.load("./td3_pybamm_vec.zip")
# test_env = gymnasium.make("Pybammenv_v0")
test_env = make_vec_env('Pybammenv_v0', n_envs=1, seed=0)
# env = gymnasium.wrappers.FlattenObservation(test_env)
obs,info = test_env.reset()
ep = 0
print("start simulation")
while True:
    start_time = time.time()
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, done, _, info = env.step(action)
    ep += 1
    if ep % 10 == 0:
        ep_time = time.time() - start_time
        print("Step {}".format(ep + 1), "action", action, "obs", obs, "reward", rewards, "done", done)
        print("10ep time", ep_time)
        start_time = time.time()
    if done or _:
        env.render()
        print("Episode finished after {} timesteps".format(ep+1),done,_)
        break

error:

IndexError                                Traceback (most recent call last)
Cell In[8], line 4
      2 while True:
      3     start_time = time.time()
----> 4     action, _states = model.predict(obs, deterministic=True)
      5     obs, rewards, done, _, info = env.step(action)
      6     ep += 1

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\base_class.py:555](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/base_class.py:555), in BaseAlgorithm.predict(self, observation, state, episode_start, deterministic)
    535 def predict(
    536     self,
    537     observation: Union[np.ndarray, Dict[str, np.ndarray]],
   (...)
    540     deterministic: bool = False,
    541 ) -> Tuple[np.ndarray, Optional[Tuple[np.ndarray, ...]]]:
    542     """
    543     Get the policy action from an observation (and optional hidden state).
    544     Includes sugar-coating to handle different observations (e.g. normalizing images).
   (...)
    553         (used in recurrent policies)
    554     """
--> 555     return self.policy.predict(observation, state, episode_start, deterministic)

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\policies.py:346](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/policies.py:346), in BasePolicy.predict(self, observation, state, episode_start, deterministic)
    343 # Switch to eval mode (this affects batch norm / dropout)
    344 self.set_training_mode(False)
--> 346 observation, vectorized_env = self.obs_to_tensor(observation)
    348 with th.no_grad():
    349     actions = self._predict(observation, deterministic=deterministic)

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\policies.py:264](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/policies.py:264), in BaseModel.obs_to_tensor(self, observation)
    260     observation = np.array(observation)
    262 if not isinstance(observation, dict):
    263     # Dict obs need to be handled separately
--> 264     vectorized_env = is_vectorized_observation(observation, self.observation_space)
    265     # Add batch dimension if needed
    266     observation = observation.reshape((-1, *self.observation_space.shape))

File [f:\Program_Files\Anaconda\envs\pybamm_env\Lib\site-packages\stable_baselines3\common\utils.py:399](file:///F:/Program_Files/Anaconda/envs/pybamm_env/Lib/site-packages/stable_baselines3/common/utils.py:399), in is_vectorized_observation(observation, observation_space)
    397 for space_type, is_vec_obs_func in is_vec_obs_func_dict.items():
    398     if isinstance(observation_space, space_type):
--> 399         return is_vec_obs_func(observation, observation_space)
    400 else:
    401     # for-else happens if no break is called
...
--> 349     if observation[key].shape != subspace.shape:
    350         all_non_vectorized = False
    351         break

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

I can run the code with the commented out lines:

# test_env = gymnasium.make("Pybammenv_v0")
# test_env = gymnasium.wrappers.FlattenObservation(test_env)

Where is the difference with the code with an without multiprocess?

qgallouedec · 2023-08-28T12:46:42Z

Your code doesn't seem to work. You use env but it is not even defined. Also, please note that gymnasium wrappers are not compatible with vectorized environments. If you want to use a gymnasium wrapper, use the wrapper_class of make_vec_env.

fireyan8 · 2023-08-28T13:01:43Z

Hallo,
the code like this is working, the td3_pybamm is trained without multiprocess and with flattenenv:

model = TD3.load("./td3_pybamm.zip")
test_env = gymnasium.make("Pybammenv_v0")
# test_env = make_vec_env('Pybammenv_v0', n_envs=1,seed=0)
test_env = gymnasium.wrappers.FlattenObservation(test_env)
obs,info = test_env.reset()
ep = 0
print("start simulation")
while True:
    start_time = time.time()
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, done, _, info = test_env.step(action)
    ep += 1
    if ep % 10 == 0:
        ep_time = time.time() - start_time
        print("Step {}".format(ep + 1), "action", action, "obs", obs, "reward", rewards, "done", done)
        print("10ep time", ep_time)
        start_time = time.time()
    if done or _:
        env.render()
        print("Episode finished after {} timesteps".format(ep+1),done,_)
        break

for the make_vec_env, I was just trying to figure out, is it they way I define the env wrong. Because I trained the model with flattenenv.
With the code above, by just changing the td3_pybamm to td3_pybamm_env( trained with multiprocessing), the code wont work and will.:

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

fireyan8 · 2023-08-28T13:20:56Z

Thank you guys I found where it goes different:
model = TD3.load("./td3_pybamm_vec.zip")

test_env = make_vec_env('Pybammenv_v0', n_envs=1,seed=0)

obs = test_env.reset()

after make_vec_env() .reset90 will just return obs, and ignore my info.
The same thing happen to .step() as well.

the wrapper is slightly different than newer gymnasium

fireyan8 added the custom gym env Issue related to Custom Gym Env label Aug 28, 2023

araffin added the duplicate This issue or pull request already exists label Aug 28, 2023

fireyan8 closed this as completed Aug 28, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Problem on multiprocessing with TD3 #1659

Problem on multiprocessing with TD3 #1659

fireyan8 commented Aug 28, 2023

araffin commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

qgallouedec commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

Problem on multiprocessing with TD3 #1659

Problem on multiprocessing with TD3 #1659

Comments

fireyan8 commented Aug 28, 2023

🐛 Bug

Code example

Relevant log output / Error message

araffin commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

qgallouedec commented Aug 28, 2023

fireyan8 commented Aug 28, 2023

fireyan8 commented Aug 28, 2023