Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] Named Parameter Groups in LearningRateMonitor #7987

Merged
merged 20 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
7c4ddf4
let LearningRateMonitor use `parameter_group` names
AffineParameter Jun 14, 2021
cb0b9e3
update tests
AffineParameter Jun 14, 2021
5177dcf
update docs
AffineParameter Jun 15, 2021
cce0041
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 15, 2021
d9e717e
clean up after linter
AffineParameter Jun 15, 2021
71fdf78
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 15, 2021
3d34575
rename the new unit test :facepalm:
AffineParameter Jun 15, 2021
df7fd3a
attempt to head-fake the auto-linter
AffineParameter Jun 15, 2021
2decdb6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 15, 2021
44ac6af
next attempt to satisfy the linter
AffineParameter Jun 15, 2021
5ca983b
Respond to review comments & adjust if/else
AffineParameter Jun 16, 2021
666e303
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 16, 2021
41c301f
Update CHANGELOG.md
AffineParameter Jun 16, 2021
d46fd5e
Update pytorch_lightning/callbacks/lr_monitor.py
kaushikb11 Jun 16, 2021
0d14952
Update pytorch_lightning/callbacks/lr_monitor.py
AffineParameter Jun 16, 2021
ff59320
Update pytorch_lightning/callbacks/lr_monitor.py
AffineParameter Jun 16, 2021
c13ae59
Update tests/callbacks/test_lr_monitor.py
AffineParameter Jun 16, 2021
40181c8
remove unnecessary training/val loops during unit tests
AffineParameter Jun 16, 2021
c729971
revert suggested simplification & use 1-indexing in enumerate
AffineParameter Jun 16, 2021
9d1af7e
Update pytorch_lightning/callbacks/lr_monitor.py
AffineParameter Jun 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 53 additions & 11 deletions pytorch_lightning/callbacks/lr_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

"""
from collections import defaultdict
from typing import Any, DefaultDict, Dict, List, Optional, Type
from typing import Any, DefaultDict, Dict, List, Optional, Set, Type

from torch.optim.optimizer import Optimizer

Expand Down Expand Up @@ -55,7 +55,9 @@ class LearningRateMonitor(Callback):
In case of multiple optimizers of same type, they will be named ``Adam``,
``Adam-1`` etc. If a optimizer has multiple parameter groups they will
be named ``Adam/pg1``, ``Adam/pg2`` etc. To control naming, pass in a
``name`` keyword in the construction of the learning rate schedulers
``name`` keyword in the construction of the learning rate schedulers.
A ``name`` keyword can also be used for parameter groups in the
construction of the optimizer.

Example::

Expand All @@ -67,6 +69,21 @@ def configure_optimizer(self):
}
return [optimizer], [lr_scheduler]

Example::

def configure_optimizer(self):
optimizer = torch.optim.SGD(
[
{
'params': [p for p in self.parameters()],
'name': 'my_parameter_group_name'
}
],
lr=0.1
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved
)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, ...)
return [optimizer], [lr_scheduler]

"""

def __init__(self, logging_interval: Optional[str] = None, log_momentum: bool = False):
Expand Down Expand Up @@ -150,11 +167,11 @@ def _extract_stats(self, trainer, interval: str) -> Dict[str, float]:
use_betas = 'betas' in opt.defaults

for i, pg in enumerate(param_groups):
suffix = f'/pg{i + 1}' if len(param_groups) > 1 else ''
lr = self._extract_lr(pg, f'{name}{suffix}')
name_and_suffix = self._add_suffix(name, param_groups, i)
lr = self._extract_lr(pg, name_and_suffix)
latest_stat.update(lr)
momentum = self._extract_momentum(
param_group=pg, name=f'{name}-momentum{suffix}', use_betas=use_betas
param_group=pg, name=name_and_suffix.replace(name, f'{name}-momentum'), use_betas=use_betas
)
latest_stat.update(momentum)

Expand Down Expand Up @@ -192,6 +209,28 @@ def _add_prefix(
count = seen_optimizer_types[optimizer_cls]
return name + f'-{count - 1}' if count > 1 else name

def _add_suffix(self, name: str, param_groups: List[Dict], param_group_index: int, use_names: bool = True) -> str:
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved
if len(param_groups) > 1:
if not use_names:
return f'{name}/pg{param_group_index+1}'
else:
pg_name = param_groups[param_group_index].get('name', f'pg{param_group_index+1}')
return f'{name}/{pg_name}'
else:
if not use_names:
return name
else:
pg_name = param_groups[param_group_index].get('name')
return f'{name}/{pg_name}' if pg_name else name
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved

def _duplicate_param_group_names(self, param_groups: List[Dict]) -> Set[str]:
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved
names = [pg.get('name', f'pg{i+1}') for i, pg in enumerate(param_groups)]
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved
unique = set(names)
if len(names) == len(unique):
return set()
else:
return set(n for n in names if names.count(n) > 1)
kaushikb11 marked this conversation as resolved.
Show resolved Hide resolved

def _find_names(self, lr_schedulers: List, add_lr_sch_names: bool = True) -> List[str]:
# Create unique names in the case we have multiple of the same learning
# rate scheduler + multiple parameter groups
Expand All @@ -212,15 +251,18 @@ def _find_names(self, lr_schedulers: List, add_lr_sch_names: bool = True) -> Lis

# Multiple param groups for the same scheduler
param_groups = sch.optimizer.param_groups
duplicates = self._duplicate_param_group_names(param_groups)
if duplicates:
raise MisconfigurationException(
'A single `Optimizer` cannot have multiple parameter groups with identical '
f'`name` values. {name} has duplicated parameter group names {duplicates}'
)

name = self._add_prefix(name, optimizer_cls, seen_optimizer_types)

if len(param_groups) != 1:
for i in range(len(param_groups)):
temp = f'{name}/pg{i + 1}'
names.append(temp)
else:
names.append(name)
for i in range(len(param_groups)):
name_and_suffix = self._add_suffix(name, param_groups, i)
names.append(name_and_suffix)
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved

if add_lr_sch_names:
self.lr_sch_names.append(name)
Expand Down
71 changes: 71 additions & 0 deletions tests/callbacks/test_lr_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,77 @@ def configure_optimizers(self):
assert lr_monitor.lr_sch_names == list(lr_monitor.lrs.keys()) == ['my_logging_name']


def test_lr_monitor_custom_pg_name(tmpdir):

class TestModel(BoringModel):

def configure_optimizers(self):
optimizer = torch.optim.SGD([{'params': [p for p in self.layer.parameters()], 'name': 'linear'}], lr=0.1)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
return [optimizer], [lr_scheduler]

lr_monitor = LearningRateMonitor()
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=2,
limit_val_batches=0.1,
limit_train_batches=0.5,
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved
callbacks=[lr_monitor],
progress_bar_refresh_rate=0,
weights_summary=None,
)
trainer.fit(TestModel())
assert lr_monitor.lr_sch_names == ['lr-SGD']
assert list(lr_monitor.lrs.keys()) == ['lr-SGD/linear']
AffineParameter marked this conversation as resolved.
Show resolved Hide resolved


def test_lr_monitor_duplicate_custom_pg_names(tmpdir):
tutils.reset_seed()

class TestModel(BoringModel):

def __init__(self):
super().__init__()
self.linear_a = torch.nn.Linear(32, 16)
self.linear_b = torch.nn.Linear(16, 2)

def forward(self, x):
x = self.linear_a(x)
x = self.linear_b(x)
return x

def configure_optimizers(self):
optimizer = torch.optim.SGD([
{
'params': [p for p in self.linear_a.parameters()],
'name': 'linear'
},
{
'params': [p for p in self.linear_b.parameters()],
'name': 'linear'
},
],
lr=0.1)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
return [optimizer], [lr_scheduler]

lr_monitor = LearningRateMonitor()
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=2,
limit_val_batches=0.1,
limit_train_batches=0.5,
callbacks=[lr_monitor],
progress_bar_refresh_rate=0,
weights_summary=None,
)

with pytest.raises(
MisconfigurationException, match='A single `Optimizer` cannot have multiple parameter groups with identical'
):
trainer.fit(TestModel())


def test_multiple_optimizers_basefinetuning(tmpdir):

class TestModel(BoringModel):
Expand Down