Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli committed Jan 29, 2023
1 parent 5f4f161 commit 973c197
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 132 deletions.
2 changes: 1 addition & 1 deletion src/pytorch_lightning/core/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,7 +1406,7 @@ def untoggle_optimizer(self, optimizer: Union[Optimizer, LightningOptimizer]) ->
Args:
optimizer: The optimizer to untoggle.
"""
for opt in enumerate(self.trainer.optimizers):
for opt in self.trainer.optimizers:
# TODO: handle comparison when LightningOptimizer
if opt != optimizer:
for group in opt.param_groups:
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_lightning/tuner/lr_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _exchange_scheduler(self, trainer: "pl.Trainer") -> None:
scheduler = cast(LRScheduler, scheduler)

trainer.strategy.optimizers = [optimizer]
trainer.strategy.lr_scheduler_configs = [LRSchedulerConfig(scheduler, interval="step", opt_idx=0)]
trainer.strategy.lr_scheduler_configs = [LRSchedulerConfig(scheduler, interval="step")]
_validate_optimizers_attached(trainer.optimizers, trainer.lr_scheduler_configs)

def plot(self, suggest: bool = False, show: bool = False, ax: Optional["Axes"] = None) -> Optional["plt.Figure"]:
Expand Down
208 changes: 105 additions & 103 deletions tests/tests_pytorch/core/test_lightning_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,80 +115,80 @@ def test_1_optimizer_toggle_model():

assert not model._param_requires_grad_state
# toggle optimizer was failing with a single optimizer
model.toggle_optimizer(optimizer, 0)
model.toggle_optimizer(optimizer)
assert model._param_requires_grad_state
model.untoggle_optimizer(0)
model.untoggle_optimizer(optimizer)
assert not model._param_requires_grad_state


def test_toggle_untoggle_2_optimizers_no_shared_parameters(tmpdir):
class TestModel(BoringModel):
def training_step(self, batch, batch_idx, optimizer_idx=None):
return super().training_step(batch, batch_idx)

def __init__(self):
super().__init__()
self.automatic_optimization = False
self.layer_1 = nn.Sequential(nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 32))

self.layer_2 = nn.Sequential(
nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 2)
)

# set some weights to False to check untoggle works as expected.
# set some weights to require no gradient to check that toggle/untoggle works as expected.
self.layer_1[2].weight.requires_grad = False
self.layer_1[4].weight.requires_grad = False

self.layer_2[1].weight.requires_grad = False
self.layer_2[3].weight.requires_grad = False

def training_step(self, batch, batch_idx):
opt1, opt2 = self.optimizers()

# Use the first optimizer, toggle it
self.toggle_optimizer(opt1)
loss = self.step(batch)
self.manual_backward(loss)
assert self.layer_1[0].weight.requires_grad is True
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is False
opt1.step()
opt1.zero_grad()
self.untoggle_optimizer(opt1)

# Use the second optimizer, toggle it
self.toggle_optimizer(opt2)
loss = self.step(batch)
self.manual_backward(loss)
assert self.layer_1[0].weight.requires_grad is False
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is True
opt2.step()
opt2.zero_grad()
self.untoggle_optimizer(opt2)

def configure_optimizers(self):
optimizer = SGD(self.layer_1.parameters(), lr=0.1)
optimizer_1 = SGD(self.layer_1.parameters(), lr=0.1)
optimizer_2 = Adam(self.layer_2.parameters(), lr=0.1)
return [optimizer, optimizer_2]

def optimizer_step(
self,
current_epoch,
batch_nb,
optimizer,
optimizer_idx,
closure,
on_tpu=False,
using_lbfgs=False,
):
if optimizer_idx == 0:
assert self.layer_1[0].weight.requires_grad is True
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is False

if optimizer_idx == 1:
assert self.layer_1[0].weight.requires_grad is False
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is True

optimizer.step(closure=closure)
return [optimizer_1, optimizer_2]

model = TestModel()
model.training_epoch_end = None

trainer = Trainer(
max_epochs=1, default_root_dir=tmpdir, limit_train_batches=8, accumulate_grad_batches=2, limit_val_batches=0
)
trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, limit_train_batches=8, limit_val_batches=0)
trainer.fit(model)


def test_toggle_untoggle_3_optimizers_shared_parameters(tmpdir):
class TestModel(BoringModel):
def __init__(self):
super().__init__()
self.automatic_optimization = False
self.layer_1 = nn.Sequential(nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 32))

self.layer_2 = nn.Sequential(
Expand All @@ -199,7 +199,7 @@ def __init__(self):
nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 2)
)

# set some weights to False to check untoggle works as expected.
# set some weights to require no gradient to check that toggle/untoggle works as expected.
self.layer_1[2].weight.requires_grad = False
self.layer_1[4].weight.requires_grad = False

Expand All @@ -209,61 +209,65 @@ def __init__(self):
self.layer_3[1].weight.requires_grad = False
self.layer_3[5].weight.requires_grad = False

def optimizer_step(
self,
current_epoch,
batch_nb,
optimizer,
optimizer_idx,
closure,
on_tpu=False,
using_lbfgs=False,
):
if optimizer_idx == 0:
assert self.layer_1[0].weight.requires_grad is True
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is True

assert self.layer_3[1].weight.requires_grad is False
assert self.layer_3[3].weight.requires_grad is False
assert self.layer_3[5].weight.requires_grad is False

if optimizer_idx == 1:
assert self.layer_1[0].weight.requires_grad is False
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is True

assert self.layer_3[1].weight.requires_grad is False
assert self.layer_3[3].weight.requires_grad is True
assert self.layer_3[5].weight.requires_grad is False

if optimizer_idx == 2:
assert self.layer_1[0].weight.requires_grad is True
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is False

assert self.layer_3[1].weight.requires_grad is False
assert self.layer_3[3].weight.requires_grad is True
assert self.layer_3[5].weight.requires_grad is False

optimizer.step(closure=closure)

def training_step(self, batch, batch_idx, optimizer_idx=None):
loss = super().training_step(batch, batch_idx)
# make sure the model is untoggle when returning None
return loss if batch_idx % 2 == 0 else None
def training_step(self, batch, batch_idx):
opt1, opt2, opt3 = self.optimizers()

# Use the first optimizer, toggle it
self.toggle_optimizer(opt1)
loss = self.step(batch)
self.manual_backward(loss)
assert self.layer_1[0].weight.requires_grad is True
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is True

assert self.layer_3[1].weight.requires_grad is False
assert self.layer_3[3].weight.requires_grad is False
assert self.layer_3[5].weight.requires_grad is False
opt1.step()
opt1.zero_grad()
self.untoggle_optimizer(opt1)

# Use the second optimizer, toggle it
self.toggle_optimizer(opt2)
loss = self.step(batch)
self.manual_backward(loss)
assert self.layer_1[0].weight.requires_grad is False
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is True

assert self.layer_3[1].weight.requires_grad is False
assert self.layer_3[3].weight.requires_grad is True
assert self.layer_3[5].weight.requires_grad is False
opt2.step()
opt2.zero_grad()
self.untoggle_optimizer(opt2)

# Use the third optimizer, toggle it
self.toggle_optimizer(opt3)
loss = self.step(batch)
self.manual_backward(loss)
assert self.layer_1[0].weight.requires_grad is True
assert self.layer_1[2].weight.requires_grad is False
assert self.layer_1[4].weight.requires_grad is False

assert self.layer_2[1].weight.requires_grad is False
assert self.layer_2[3].weight.requires_grad is False
assert self.layer_2[5].weight.requires_grad is False

assert self.layer_3[1].weight.requires_grad is False
assert self.layer_3[3].weight.requires_grad is True
assert self.layer_3[5].weight.requires_grad is False
opt3.step()
opt3.zero_grad()
self.untoggle_optimizer(opt3)

@staticmethod
def combine_generators(gen_1, gen_2):
Expand All @@ -278,9 +282,7 @@ def configure_optimizers(self):

model = TestModel()
model.training_epoch_end = None

trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, limit_train_batches=8, accumulate_grad_batches=2)

trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, limit_train_batches=8)
trainer.fit(model)


Expand Down Expand Up @@ -356,7 +358,7 @@ class TestModel(BoringModel):
has_validated_gradients = False
custom_gradient_clip_val = 1e-2

def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm):
def configure_gradient_clipping(self, optimizer, gradient_clip_val, gradient_clip_algorithm):
assert gradient_clip_val == self.trainer.gradient_clip_val
assert gradient_clip_algorithm == self.trainer.gradient_clip_algorithm

Expand Down Expand Up @@ -385,7 +387,7 @@ def test_lightning_module_configure_gradient_clipping_different_argument_values(
class TestModel(BoringModel):
custom_gradient_clip_val = 1e-2

def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm):
def configure_gradient_clipping(self, optimizer, gradient_clip_val, gradient_clip_algorithm):
self.clip_gradients(optimizer, gradient_clip_val=self.custom_gradient_clip_val)

model = TestModel()
Expand All @@ -401,7 +403,7 @@ def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_va
class TestModel(BoringModel):
custom_gradient_clip_algorithm = "foo"

def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm):
def configure_gradient_clipping(self, optimizer, gradient_clip_val, gradient_clip_algorithm):
self.clip_gradients(optimizer, gradient_clip_algorithm=self.custom_gradient_clip_algorithm)

model = TestModel()
Expand Down
Loading

0 comments on commit 973c197

Please sign in to comment.