From bbce0eaeafc93238430025e373a494819d6f0128 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Mon, 2 Sep 2024 07:53:10 +0000 Subject: [PATCH 1/3] moved make optimizer and scheduler function to inside policy --- lerobot/common/policies/act/modeling_act.py | 24 ++++++++ .../policies/diffusion/modeling_diffusion.py | 30 ++++++++++ .../common/policies/tdmpc/modeling_tdmpc.py | 7 +++ lerobot/scripts/train.py | 55 +------------------ 4 files changed, 62 insertions(+), 54 deletions(-) diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py index 3427c48299..ea8fdc2d61 100644 --- a/lerobot/common/policies/act/modeling_act.py +++ b/lerobot/common/policies/act/modeling_act.py @@ -160,6 +160,30 @@ def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]: return loss_dict + def make_optimizer_and_scheduler(self, **kwargs): + """Create the optimizer and learning rate scheduler for ACT""" + lr, lr_backbone, weight_decay = kwargs["lr"], kwargs["lr_backbone"], kwargs["weight_decay"] + optimizer_params_dicts = [ + { + "params": [ + p + for n, p in self.named_parameters() + if not n.startswith("model.backbone") and p.requires_grad + ] + }, + { + "params": [ + p + for n, p in self.named_parameters() + if n.startswith("model.backbone") and p.requires_grad + ], + "lr": lr_backbone, + }, + ] + optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=lr, weight_decay=weight_decay) + lr_scheduler = None + return optimizer, lr_scheduler + class ACTTemporalEnsembler: def __init__(self, temporal_ensemble_coeff: float, chunk_size: int) -> None: diff --git a/lerobot/common/policies/diffusion/modeling_diffusion.py b/lerobot/common/policies/diffusion/modeling_diffusion.py index 308a8be3c7..0093e451cd 100644 --- a/lerobot/common/policies/diffusion/modeling_diffusion.py +++ b/lerobot/common/policies/diffusion/modeling_diffusion.py @@ -156,6 +156,36 @@ def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]: loss = self.diffusion.compute_loss(batch) return {"loss": loss} + def make_optimizer_and_scheduler(self, **kwargs): + """Create the optimizer and learning rate scheduler for Diffusion policy""" + lr, adam_betas, adam_eps, adam_weight_decay = ( + kwargs["lr"], + kwargs["adam_betas"], + kwargs["adam_eps"], + kwargs["adam_weight_decay"], + ) + lr_scheduler_name, lr_warmup_steps, offline_steps = ( + kwargs["lr_scheduler"], + kwargs["lr_warmup_steps"], + kwargs["offline_steps"], + ) + optimizer = torch.optim.Adam( + self.diffusion.parameters(), + lr, + adam_betas, + adam_eps, + adam_weight_decay, + ) + from diffusers.optimization import get_scheduler + + lr_scheduler = get_scheduler( + lr_scheduler_name, + optimizer=optimizer, + num_warmup_steps=lr_warmup_steps, + num_training_steps=offline_steps, + ) + return optimizer, lr_scheduler + def _make_noise_scheduler(name: str, **kwargs: dict) -> DDPMScheduler | DDIMScheduler: """ diff --git a/lerobot/common/policies/tdmpc/modeling_tdmpc.py b/lerobot/common/policies/tdmpc/modeling_tdmpc.py index d97c4824c4..9e988c207d 100644 --- a/lerobot/common/policies/tdmpc/modeling_tdmpc.py +++ b/lerobot/common/policies/tdmpc/modeling_tdmpc.py @@ -534,6 +534,13 @@ def update(self): # we update every step and adjust the decay parameter `alpha` accordingly (0.99 -> 0.995) update_ema_parameters(self.model_target, self.model, self.config.target_model_momentum) + def make_optimizer_and_scheduler(self, **kwargs): + """Create the optimizer and learning rate scheduler for TD-MPC""" + lr = kwargs["lr"] + optimizer = torch.optim.Adam(self.parameters(), lr) + lr_scheduler = None + return optimizer, lr_scheduler + class TDMPCTOLD(nn.Module): """Task-Oriented Latent Dynamics (TOLD) model used in TD-MPC.""" diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 45807503f5..e2cf55d6f8 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -51,59 +51,6 @@ from lerobot.scripts.eval import eval_policy -def make_optimizer_and_scheduler(cfg, policy): - if cfg.policy.name == "act": - optimizer_params_dicts = [ - { - "params": [ - p - for n, p in policy.named_parameters() - if not n.startswith("model.backbone") and p.requires_grad - ] - }, - { - "params": [ - p - for n, p in policy.named_parameters() - if n.startswith("model.backbone") and p.requires_grad - ], - "lr": cfg.training.lr_backbone, - }, - ] - optimizer = torch.optim.AdamW( - optimizer_params_dicts, lr=cfg.training.lr, weight_decay=cfg.training.weight_decay - ) - lr_scheduler = None - elif cfg.policy.name == "diffusion": - optimizer = torch.optim.Adam( - policy.diffusion.parameters(), - cfg.training.lr, - cfg.training.adam_betas, - cfg.training.adam_eps, - cfg.training.adam_weight_decay, - ) - from diffusers.optimization import get_scheduler - - lr_scheduler = get_scheduler( - cfg.training.lr_scheduler, - optimizer=optimizer, - num_warmup_steps=cfg.training.lr_warmup_steps, - num_training_steps=cfg.training.offline_steps, - ) - elif policy.name == "tdmpc": - optimizer = torch.optim.Adam(policy.parameters(), cfg.training.lr) - lr_scheduler = None - elif cfg.policy.name == "vqbet": - from lerobot.common.policies.vqbet.modeling_vqbet import VQBeTOptimizer, VQBeTScheduler - - optimizer = VQBeTOptimizer(policy, cfg) - lr_scheduler = VQBeTScheduler(optimizer, cfg) - else: - raise NotImplementedError() - - return optimizer, lr_scheduler - - def update_policy( policy, batch, @@ -334,7 +281,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No assert isinstance(policy, nn.Module) # Create optimizer and scheduler # Temporary hack to move optimizer out of policy - optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy) + optimizer, lr_scheduler = policy.make_optimizer_and_scheduler(**cfg.training) grad_scaler = GradScaler(enabled=cfg.use_amp) step = 0 # number of policy updates (forward + backward + optim) From 3034272229e18152629e0f0ac4a6251a6f87cdb2 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Mon, 2 Sep 2024 08:04:56 +0000 Subject: [PATCH 2/3] modified tests dirs --- tests/scripts/save_policy_to_safetensors.py | 3 +-- tests/test_policies.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/scripts/save_policy_to_safetensors.py b/tests/scripts/save_policy_to_safetensors.py index 5236b7ae55..7287ed7304 100644 --- a/tests/scripts/save_policy_to_safetensors.py +++ b/tests/scripts/save_policy_to_safetensors.py @@ -22,7 +22,6 @@ from lerobot.common.datasets.factory import make_dataset from lerobot.common.policies.factory import make_policy from lerobot.common.utils.utils import init_hydra_config, set_global_seed -from lerobot.scripts.train import make_optimizer_and_scheduler from tests.utils import DEFAULT_CONFIG_PATH @@ -40,7 +39,7 @@ def get_policy_stats(env_name, policy_name, extra_overrides): dataset = make_dataset(cfg) policy = make_policy(cfg, dataset_stats=dataset.stats) policy.train() - optimizer, _ = make_optimizer_and_scheduler(cfg, policy) + optimizer, _ = policy.make_optimizer_and_scheduler(**cfg.training) dataloader = torch.utils.data.DataLoader( dataset, diff --git a/tests/test_policies.py b/tests/test_policies.py index d90f00716b..6926166101 100644 --- a/tests/test_policies.py +++ b/tests/test_policies.py @@ -37,7 +37,6 @@ from lerobot.common.policies.normalize import Normalize, Unnormalize from lerobot.common.policies.policy_protocol import Policy from lerobot.common.utils.utils import init_hydra_config, seeded_context -from lerobot.scripts.train import make_optimizer_and_scheduler from tests.scripts.save_policy_to_safetensors import get_policy_stats from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, require_cpu, require_env, require_x86_64_kernel @@ -214,7 +213,7 @@ def test_act_backbone_lr(): dataset = make_dataset(cfg) policy = make_policy(hydra_cfg=cfg, dataset_stats=dataset.stats) - optimizer, _ = make_optimizer_and_scheduler(cfg, policy) + optimizer, _ = policy.make_optimizer_and_scheduler(**cfg.training) assert len(optimizer.param_groups) == 2 assert optimizer.param_groups[0]["lr"] == cfg.training.lr assert optimizer.param_groups[1]["lr"] == cfg.training.lr_backbone From 06fc9b89e17976d4a8e96b8e5421e448df40c630 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Mon, 2 Sep 2024 08:20:17 +0000 Subject: [PATCH 3/3] pass entire config to make_optimizer --- lerobot/common/policies/act/modeling_act.py | 9 ++++--- .../policies/diffusion/modeling_diffusion.py | 27 ++++++------------- .../common/policies/tdmpc/modeling_tdmpc.py | 5 ++-- .../common/policies/vqbet/modeling_vqbet.py | 6 +++++ lerobot/scripts/train.py | 2 +- tests/scripts/save_policy_to_safetensors.py | 2 +- tests/test_policies.py | 2 +- 7 files changed, 24 insertions(+), 29 deletions(-) diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py index ea8fdc2d61..70654d8800 100644 --- a/lerobot/common/policies/act/modeling_act.py +++ b/lerobot/common/policies/act/modeling_act.py @@ -160,9 +160,8 @@ def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]: return loss_dict - def make_optimizer_and_scheduler(self, **kwargs): + def make_optimizer_and_scheduler(self, cfg): """Create the optimizer and learning rate scheduler for ACT""" - lr, lr_backbone, weight_decay = kwargs["lr"], kwargs["lr_backbone"], kwargs["weight_decay"] optimizer_params_dicts = [ { "params": [ @@ -177,10 +176,12 @@ def make_optimizer_and_scheduler(self, **kwargs): for n, p in self.named_parameters() if n.startswith("model.backbone") and p.requires_grad ], - "lr": lr_backbone, + "lr": cfg.training.lr_backbone, }, ] - optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=lr, weight_decay=weight_decay) + optimizer = torch.optim.AdamW( + optimizer_params_dicts, lr=cfg.training.lr, weight_decay=cfg.training.weight_decay + ) lr_scheduler = None return optimizer, lr_scheduler diff --git a/lerobot/common/policies/diffusion/modeling_diffusion.py b/lerobot/common/policies/diffusion/modeling_diffusion.py index 0093e451cd..6d276fa45a 100644 --- a/lerobot/common/policies/diffusion/modeling_diffusion.py +++ b/lerobot/common/policies/diffusion/modeling_diffusion.py @@ -156,33 +156,22 @@ def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]: loss = self.diffusion.compute_loss(batch) return {"loss": loss} - def make_optimizer_and_scheduler(self, **kwargs): + def make_optimizer_and_scheduler(self, cfg): """Create the optimizer and learning rate scheduler for Diffusion policy""" - lr, adam_betas, adam_eps, adam_weight_decay = ( - kwargs["lr"], - kwargs["adam_betas"], - kwargs["adam_eps"], - kwargs["adam_weight_decay"], - ) - lr_scheduler_name, lr_warmup_steps, offline_steps = ( - kwargs["lr_scheduler"], - kwargs["lr_warmup_steps"], - kwargs["offline_steps"], - ) optimizer = torch.optim.Adam( self.diffusion.parameters(), - lr, - adam_betas, - adam_eps, - adam_weight_decay, + cfg.training.lr, + cfg.training.adam_betas, + cfg.training.adam_eps, + cfg.training.adam_weight_decay, ) from diffusers.optimization import get_scheduler lr_scheduler = get_scheduler( - lr_scheduler_name, + cfg.training.lr_scheduler, optimizer=optimizer, - num_warmup_steps=lr_warmup_steps, - num_training_steps=offline_steps, + num_warmup_steps=cfg.training.lr_warmup_steps, + num_training_steps=cfg.training.offline_steps, ) return optimizer, lr_scheduler diff --git a/lerobot/common/policies/tdmpc/modeling_tdmpc.py b/lerobot/common/policies/tdmpc/modeling_tdmpc.py index 9e988c207d..169f67a0ae 100644 --- a/lerobot/common/policies/tdmpc/modeling_tdmpc.py +++ b/lerobot/common/policies/tdmpc/modeling_tdmpc.py @@ -534,10 +534,9 @@ def update(self): # we update every step and adjust the decay parameter `alpha` accordingly (0.99 -> 0.995) update_ema_parameters(self.model_target, self.model, self.config.target_model_momentum) - def make_optimizer_and_scheduler(self, **kwargs): + def make_optimizer_and_scheduler(self, cfg): """Create the optimizer and learning rate scheduler for TD-MPC""" - lr = kwargs["lr"] - optimizer = torch.optim.Adam(self.parameters(), lr) + optimizer = torch.optim.Adam(self.parameters(), cfg.training.lr) lr_scheduler = None return optimizer, lr_scheduler diff --git a/lerobot/common/policies/vqbet/modeling_vqbet.py b/lerobot/common/policies/vqbet/modeling_vqbet.py index 87cf59f195..18cf4491eb 100644 --- a/lerobot/common/policies/vqbet/modeling_vqbet.py +++ b/lerobot/common/policies/vqbet/modeling_vqbet.py @@ -152,6 +152,12 @@ def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]: return loss_dict + def make_optimizer_and_scheduler(self, cfg): + """Create the optimizer and learning rate scheduler for VQ-BeT""" + optimizer = VQBeTOptimizer(self, cfg) + scheduler = VQBeTScheduler(optimizer, cfg) + return optimizer, scheduler + class SpatialSoftmax(nn.Module): """ diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index e2cf55d6f8..0c048cfb62 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -281,7 +281,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No assert isinstance(policy, nn.Module) # Create optimizer and scheduler # Temporary hack to move optimizer out of policy - optimizer, lr_scheduler = policy.make_optimizer_and_scheduler(**cfg.training) + optimizer, lr_scheduler = policy.make_optimizer_and_scheduler(cfg) grad_scaler = GradScaler(enabled=cfg.use_amp) step = 0 # number of policy updates (forward + backward + optim) diff --git a/tests/scripts/save_policy_to_safetensors.py b/tests/scripts/save_policy_to_safetensors.py index 7287ed7304..0336387743 100644 --- a/tests/scripts/save_policy_to_safetensors.py +++ b/tests/scripts/save_policy_to_safetensors.py @@ -39,7 +39,7 @@ def get_policy_stats(env_name, policy_name, extra_overrides): dataset = make_dataset(cfg) policy = make_policy(cfg, dataset_stats=dataset.stats) policy.train() - optimizer, _ = policy.make_optimizer_and_scheduler(**cfg.training) + optimizer, _ = policy.make_optimizer_and_scheduler(cfg) dataloader = torch.utils.data.DataLoader( dataset, diff --git a/tests/test_policies.py b/tests/test_policies.py index 6926166101..76a056d243 100644 --- a/tests/test_policies.py +++ b/tests/test_policies.py @@ -213,7 +213,7 @@ def test_act_backbone_lr(): dataset = make_dataset(cfg) policy = make_policy(hydra_cfg=cfg, dataset_stats=dataset.stats) - optimizer, _ = policy.make_optimizer_and_scheduler(**cfg.training) + optimizer, _ = policy.make_optimizer_and_scheduler(cfg) assert len(optimizer.param_groups) == 2 assert optimizer.param_groups[0]["lr"] == cfg.training.lr assert optimizer.param_groups[1]["lr"] == cfg.training.lr_backbone