Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 47 additions & 40 deletions ci/jenkins_tests/run_rllib_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,267 +2,274 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env PongDeterministic-v0 \
--run A3C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shorten to 1 to try to speed up jenkins.

--config '{"num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pong-ram-v4 \
--run A3C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env PongDeterministic-v0 \
--run A2C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"free_log_std": true}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"simple_optimizer": false, "num_sgd_iter": 2, "model": {"use_lstm": true}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0.1}' \
--ray-num-gpus 1

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"remote_worker_envs": true, "num_envs_per_worker": 2, "num_workers": 1, "train_batch_size": 100, "sgd_minibatch_size": 50}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pendulum-v0 \
--run APPO \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "num_gpus": 0}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pendulum-v0 \
--run ES \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pong-v0 \
--run ES \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run A3C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run APEX \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "timesteps_per_iteration": 1000, "num_gpus": 0, "min_iter_time_s": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env FrozenLake-v0 \
--run DQN \
--stop '{"training_iteration": 2}'
--stop '{"training_iteration": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env FrozenLake-v0 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_sgd_iter": 10, "sgd_minibatch_size": 64, "train_batch_size": 1000, "num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env PongDeterministic-v4 \
--run DQN \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "sample_batch_size": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env MontezumaRevenge-v0 \
--run PPO \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run A3C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "model": {"use_lstm": true}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "use_pytorch": true}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1, "model": {"use_lstm": true, "max_seq_len": 100}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1, "num_envs_per_worker": 10}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pong-v0 \
--run PG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env FrozenLake-v0 \
--run PG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pendulum-v0 \
--run DDPG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "model": {"use_lstm": true}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0, "model": {"use_lstm": true}}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env MountainCarContinuous-v0 \
--run DDPG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env MountainCarContinuous-v0 \
--run DDPG \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pendulum-v0 \
--run APEX_DDPG \
--ray-num-cpus 8 \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env Pendulum-v0 \
--run APEX_DDPG \
--ray-num-cpus 8 \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1, "batch_mode": "complete_episodes", "parameter_noise": true}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run MARWIL \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"input": "/ray/python/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "shuffle_buffer_size": 10}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"input": "/ray/python/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "soft_q": true}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
Expand Down Expand Up @@ -375,14 +382,14 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env PongDeterministic-v4 \
--run A3C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "use_pytorch": true, "sample_async": false, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 84}, "preprocessor_pref": "rllib"}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/tests/run_silent.sh train.py \
--env CartPole-v1 \
--run A3C \
--stop '{"training_iteration": 2}' \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
Expand Down
30 changes: 19 additions & 11 deletions python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,13 @@ def __init__(self, observation_space, action_space, config):
tf.float32, [None] + list(observation_space.shape))
dist_class, logit_dim = ModelCatalog.get_action_dist(
action_space, self.config["model"])
prev_actions = ModelCatalog.get_action_placeholder(action_space)
prev_rewards = tf.placeholder(tf.float32, [None], name="prev_reward")
self.prev_actions = ModelCatalog.get_action_placeholder(action_space)
self.prev_rewards = tf.placeholder(
tf.float32, [None], name="prev_reward")
self.model = ModelCatalog.get_model({
"obs": self.observations,
"prev_actions": prev_actions,
"prev_rewards": prev_rewards,
"prev_actions": self.prev_actions,
"prev_rewards": self.prev_rewards,
"is_training": self._get_is_training_placeholder(),
}, observation_space, logit_dim, self.config["model"])
action_dist = dist_class(self.model.outputs)
Expand Down Expand Up @@ -83,8 +84,8 @@ def __init__(self, observation_space, action_space, config):
loss_in = [
("obs", self.observations),
("actions", actions),
("prev_actions", prev_actions),
("prev_rewards", prev_rewards),
("prev_actions", self.prev_actions),
("prev_rewards", self.prev_rewards),
("advantages", advantages),
("value_targets", self.v_target),
]
Expand All @@ -103,8 +104,8 @@ def __init__(self, observation_space, action_space, config):
loss_inputs=loss_in,
state_inputs=self.model.state_in,
state_outputs=self.model.state_out,
prev_action_input=prev_actions,
prev_reward_input=prev_rewards,
prev_action_input=self.prev_actions,
prev_reward_input=self.prev_rewards,
seq_lens=self.model.seq_lens,
max_seq_len=self.config["model"]["max_seq_len"])

Expand Down Expand Up @@ -138,7 +139,9 @@ def postprocess_trajectory(self,
next_state = []
for i in range(len(self.model.state_in)):
next_state.append([sample_batch["state_out_{}".format(i)][-1]])
last_r = self._value(sample_batch["new_obs"][-1], *next_state)
last_r = self._value(sample_batch["new_obs"][-1],
sample_batch["actions"][-1],
sample_batch["rewards"][-1], *next_state)
return compute_advantages(sample_batch, last_r, self.config["gamma"],
self.config["lambda"])

Expand All @@ -159,8 +162,13 @@ def extra_compute_action_fetches(self):
TFPolicyGraph.extra_compute_action_fetches(self),
**{"vf_preds": self.vf})

def _value(self, ob, *args):
feed_dict = {self.observations: [ob], self.model.seq_lens: [1]}
def _value(self, ob, prev_action, prev_reward, *args):
feed_dict = {
self.observations: [ob],
self.prev_actions: [prev_action],
self.prev_rewards: [prev_reward],
self.model.seq_lens: [1]
}
assert len(args) == len(self.model.state_in), \
(args, self.model.state_in)
for k, v in zip(self.model.state_in, args):
Expand Down
Loading