Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI, BugFix] Fix CI warnings and errors #1100

Merged
merged 12 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .circleci/unittest/linux_examples/scripts/run_local.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

set -e

# Read script from line 29
filename=".circleci/unittest/linux_examples/scripts/run_test.sh"
start_line=29
script=$(tail -n +$start_line "$filename")

# Replace "cuda:0" with "cpu"
script="${script//cuda:0/cpu}"

# Remove any instances of ".circleci/unittest/helpers/coverage_run_parallel.py"
script="${script//.circleci\/unittest\/helpers\/coverage_run_parallel.py}"
script="${script//coverage combine}"
script="${script//coverage xml -i}"

# Execute the modified script
echo "$script" | bash
42 changes: 24 additions & 18 deletions .circleci/unittest/linux_examples/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -47,15 +47,16 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -67,7 +68,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -79,7 +80,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -89,6 +90,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
optim.device=cuda:0 \
loss.mini_batch_size=10 \
loss.ppo_epochs=1 \
logger.backend= \
Expand All @@ -101,7 +103,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
frames_per_batch=200 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -114,16 +116,17 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
mode=offline
collector_device=cuda:0 \
mode=offline
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
mode=offline
collector_device=cuda:0 \
device=cuda:0 \
mode=offline

# With single envs
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
Expand All @@ -133,7 +136,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -145,15 +148,16 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -165,7 +169,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -177,7 +181,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -187,6 +191,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
optim.device=cuda:0 \
loss.mini_batch_size=10 \
loss.ppo_epochs=1 \
logger.backend= \
Expand All @@ -199,7 +204,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
frames_per_batch=200 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -213,15 +218,16 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
num_workers=2 \
env_per_collector=1 \
mode=offline \
collector_devices=cuda:0
collector_device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
mode=offline \
collector_devices=cuda:0
device=cuda:0 \
collector_device=cuda:0

python .circleci/unittest/helpers/coverage_run_parallel.py examples/bandits/dqn.py --n_steps=100

Expand Down
9 changes: 5 additions & 4 deletions examples/a2c/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CatTensors,
DoubleToFloat,
EnvCreator,
ExplorationType,
GrayScale,
NoopResetEnv,
ObservationNorm,
Expand Down Expand Up @@ -261,7 +262,7 @@ def make_a2c_models(cfg):
value_operator=value_module,
)
actor = actor_critic.get_policy_operator()
critic = actor_critic.get_value_operator()
critic = actor_critic.get_value_head() # to avoid duplicate params
else:
actor = policy_module
critic = value_module
Expand Down Expand Up @@ -326,7 +327,7 @@ def make_a2c_modules_state(proof_environment):
distribution_class=distribution_class,
distribution_kwargs=distribution_kwargs,
return_log_prob=True,
default_interaction_mode="random",
default_interaction_type=ExplorationType.RANDOM,
)

# Define the value net
Expand Down Expand Up @@ -412,7 +413,7 @@ def make_a2c_modules_pixels(proof_environment):
distribution_class=distribution_class,
distribution_kwargs=distribution_kwargs,
return_log_prob=True,
default_interaction_mode="random",
default_interaction_type=ExplorationType.RANDOM,
)

# Define another head for the value
Expand Down Expand Up @@ -451,8 +452,8 @@ def make_loss(loss_cfg, actor_network, value_network):
entropy_coef=loss_cfg.entropy_coef,
critic_coef=loss_cfg.critic_coef,
entropy_bonus=True,
gamma=loss_cfg.gamma,
)
loss_module.make_value_estimator(gamma=loss_cfg.gamma)
return loss_module, advantage_module


Expand Down
5 changes: 3 additions & 2 deletions examples/bandits/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,16 @@
actor(env.reset())
loss = DistributionalDQNLoss(
actor,
gamma=0.0,
)
loss.make_value_estimator(gamma=0.9)
else:
model = MLP(
out_features=n_actions, depth=3, num_cells=n_cells, activation_class=nn.Tanh
)
actor = QValueActor(model, action_space="categorical")
actor(env.reset())
loss = DQNLoss(actor, gamma=0.0, loss_function="smooth_l1")
loss = DQNLoss(actor, loss_function="smooth_l1", action_space=env.action_spec)
loss.make_value_estimator(gamma=0.0)
policy = EGreedyWrapper(
actor, eps_greedy, 0.0, annealing_num_steps=n_steps, spec=env.action_spec
)
Expand Down
3 changes: 1 addition & 2 deletions examples/ddpg/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ init_random_frames: 25000
activation: elu
gSDE: 0
from_pixels: 0
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu,cpu,cpu,cpu]
collector_device: cpu
env_per_collector: 8
num_workers: 32
lr_scheduler: ""
Expand Down
2 changes: 1 addition & 1 deletion examples/discrete_sac/discrete_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ def env_factory(num_workers):
qvalue_network=model[1],
num_actions=num_actions,
num_qvalue_nets=2,
gamma=cfg.gamma,
target_entropy_weight=cfg.target_entropy_weight,
loss_function="smooth_l1",
)
loss_module.make_value_estimator(gamma=cfg.gamma)

# Define Target Network Updater
target_net_updater = SoftUpdate(loss_module, cfg.target_update_polyak)
Expand Down
2 changes: 1 addition & 1 deletion examples/distributed/collectors/multi_nodes/ray_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@
entropy_coef=entropy_eps, # these keys match by default but we set this for completeness
value_target_key=advantage_module.value_target_key,
critic_coef=1.0,
gamma=0.99,
loss_critic_type="smooth_l1",
)
loss_module.make_value_estimator(gamma=0.99)

# 7. Define optimizer
optim = torch.optim.Adam(loss_module.parameters(), lr)
Expand Down
3 changes: 1 addition & 2 deletions examples/dqn/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ lr: 3e-4
multi_step: 1
init_random_frames: 25000
from_pixels: 1
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu,cpu,cpu,cpu]
collector_device: cpu
env_per_collector: 8
num_workers: 32
lr_scheduler: ""
Expand Down
3 changes: 1 addition & 2 deletions examples/dreamer/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ from_pixels: True
# we want 50 frames / traj in the replay buffer. Given the frame_skip=2 this makes each traj 100 steps long
env_per_collector: 8
num_workers: 8
# collector_devices: [cuda:1]
collector_devices: cuda:1 # [cpu,cpu,cpu,cpu,cpu,cpu,cpu,cpu]
collector_device: cuda:1
frames_per_batch: 800
optim_steps_per_batch: 80
record_interval: 30
Expand Down
6 changes: 1 addition & 5 deletions examples/dreamer/dreamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,6 @@ def main(cfg: "DictConfig"): # noqa: F821
make_env=create_env_fn,
actor_model_explore=exploration_policy,
cfg=cfg,
# make_env_kwargs=[
# {"device": device}
# for device in cfg.collector_devices
# ],
)
print("collector:", collector)

Expand All @@ -190,7 +186,7 @@ def main(cfg: "DictConfig"): # noqa: F821
record_frames=cfg.record_frames,
frame_skip=cfg.frame_skip,
policy_exploration=policy,
recorder=make_recorder_env(
environment=make_recorder_env(
cfg=cfg,
video_tag=video_tag,
obs_norm_state_dict=obs_norm_state_dict,
Expand Down
10 changes: 5 additions & 5 deletions examples/dreamer/dreamer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,13 @@ def make_transformed_env(**kwargs) -> TransformedEnv:
from_pixels = cfg.from_pixels

if custom_env is None and custom_env_maker is None:
if isinstance(cfg.collector_devices, str):
device = cfg.collector_devices
elif isinstance(cfg.collector_devices, Sequence):
device = cfg.collector_devices[0]
if isinstance(cfg.collector_device, str):
device = cfg.collector_device
elif isinstance(cfg.collector_device, Sequence):
device = cfg.collector_device[0]
else:
raise ValueError(
"collector_devices must be either a string or a sequence of strings"
"collector_device must be either a string or a sequence of strings"
)
env_kwargs = {
"env_name": env_name,
Expand Down
12 changes: 3 additions & 9 deletions examples/iql/iql_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,7 @@ def make_replay_buffer(
@hydra.main(version_base=None, config_path=".", config_name="online_config")
def main(cfg: "DictConfig"): # noqa: F821

device = (
torch.device("cuda:0")
if torch.cuda.is_available()
and torch.cuda.device_count() > 0
and cfg.device == "cuda:0"
else torch.device("cpu")
)
device = torch.device(cfg.device)

exp_name = generate_exp_name("Online_IQL", cfg.exp_name)
logger = get_logger(
Expand Down Expand Up @@ -199,11 +193,11 @@ def env_factory(num_workers):
qvalue_network=model[1],
value_network=model[2],
num_qvalue_nets=2,
gamma=cfg.gamma,
temperature=cfg.temperature,
expectile=cfg.expectile,
loss_function="smooth_l1",
)
loss_module.make_value_estimator(gamma=cfg.gamma)

# Define Target Network Updater
target_net_updater = SoftUpdate(loss_module, cfg.target_update_polyak)
Expand All @@ -216,7 +210,7 @@ def env_factory(num_workers):
frames_per_batch=cfg.frames_per_batch,
max_frames_per_traj=cfg.max_frames_per_traj,
total_frames=cfg.total_frames,
device=cfg.device,
device=cfg.collector_device,
)
collector.set_seed(cfg.seed)

Expand Down
3 changes: 1 addition & 2 deletions examples/iql/online_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ default_policy_scale: 1.0
scale_lb: 0.1
activation: elu
from_pixels: 0
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu]
collector_device: cuda:0
env_per_collector: 5
frames_per_batch: 1000 # 5*200
max_frames_per_traj: 200
Expand Down
Loading