diff --git a/python/ray/rllib/examples/saving_experiences.py b/python/ray/rllib/examples/saving_experiences.py index 7a29b0fe7b0d..d2de88302d23 100644 --- a/python/ray/rllib/examples/saving_experiences.py +++ b/python/ray/rllib/examples/saving_experiences.py @@ -7,6 +7,7 @@ import gym import numpy as np +from ray.rllib.models.preprocessors import get_preprocessor from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder from ray.rllib.offline.json_writer import JsonWriter @@ -18,6 +19,12 @@ # simulator is available, but let's do it anyways for example purposes: env = gym.make("CartPole-v0") + # RLlib uses preprocessors to implement transforms such as one-hot encoding + # and flattening of tuple and dict observations. For CartPole a no-op + # preprocessor is used, but this may be relevant for more complex envs. + prep = get_preprocessor(env.observation_space)(env.observation_space) + print("The preprocessor is", prep) + for eps_id in range(100): obs = env.reset() prev_action = np.zeros_like(env.action_space.sample()) @@ -31,7 +38,7 @@ t=t, eps_id=eps_id, agent_index=0, - obs=obs, + obs=prep.transform(obs), actions=action, action_prob=1.0, # put the true action probability here rewards=rew, @@ -39,7 +46,7 @@ prev_rewards=prev_reward, dones=done, infos=info, - new_obs=new_obs) + new_obs=prep.transform(new_obs)) obs = new_obs prev_action = action prev_reward = rew