Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename log_save_interval, row_log_interval #3748

Merged
merged 18 commits into from
Oct 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Deprecated

- Rename Trainer arguments `row_log_interval` >> `log_every_n_steps` and `log_save_interval` >> `flush_logs_every_n_steps` ([#3748](https://github.com/PyTorchLightning/pytorch-lightning/pull/3748))

### Removed

Expand Down
12 changes: 6 additions & 6 deletions pytorch_lightning/trainer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,15 +601,15 @@ def world_size(self):

.. note:: Might slow performance because it uses the output of nvidia-smi.

log_save_interval
^^^^^^^^^^^^^^^^^
flush_logs_every_n_steps
^^^^^^^^^^^^^^^^^^^^^^^^

Writes logs to disk this often.

.. testcode::

# default used by the Trainer
trainer = Trainer(log_save_interval=100)
trainer = Trainer(flush_logs_every_n_steps=100)

See Also:
- :ref:`Experiment Reporting <experiment_reporting>`
Expand Down Expand Up @@ -936,15 +936,15 @@ def world_size(self):
# resume from a specific checkpoint
trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt')

row_log_interval
^^^^^^^^^^^^^^^^
log_every_n_steps
^^^^^^^^^^^^^^^^^

How often to add logging rows (does not write to disk)

.. testcode::

# default used by the Trainer
trainer = Trainer(row_log_interval=50)
trainer = Trainer(log_every_n_steps=50)

See Also:
- :ref:`Experiment Reporting <experiment_reporting>`
Expand Down
10 changes: 6 additions & 4 deletions pytorch_lightning/trainer/connectors/logger_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ def __init__(self, trainer):
self.progress_bar_metrics = {}
self.eval_loop_results = []

def on_trainer_init(self, logger, log_save_interval, row_log_interval):
def on_trainer_init(self, logger, flush_logs_every_n_steps, log_every_n_steps):
# logging
self.configure_logger(logger)
self.trainer.log_save_interval = log_save_interval
self.trainer.row_log_interval = row_log_interval
# todo: IDE is complaining, these shall be initialized in the Trainer init at leas as placeholders
# and assign here the desired value
self.trainer.flush_logs_every_n_steps = flush_logs_every_n_steps
self.trainer.log_every_n_steps = log_every_n_steps

def configure_logger(self, logger):
if logger is True:
Expand Down Expand Up @@ -510,7 +512,7 @@ def __gather_result_across_time_and_optimizers(self, epoch_output):
def log_train_step_metrics(self, batch_output):
# when metrics should be logged
should_log_metrics = (
(self.trainer.global_step + 1) % self.trainer.row_log_interval == 0 or self.trainer.should_stop
(self.trainer.global_step + 1) % self.trainer.log_every_n_steps == 0 or self.trainer.should_stop
)
if should_log_metrics or self.trainer.fast_dev_run:
# logs user requested information to logger
Expand Down
39 changes: 39 additions & 0 deletions pytorch_lightning/trainer/deprecated_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,42 @@
# limitations under the License.

"""Mirroring deprecated API"""
from abc import ABC

from pytorch_lightning.utilities import rank_zero_warn


class TrainerDeprecatedAPITillVer0_11(ABC):
flush_logs_every_n_steps: int
log_every_n_steps: int

def __init__(self):
super().__init__() # mixin calls super too

@property
def log_save_interval(self) -> int:
"""Back compatibility, will be removed in v0.11.0"""
rank_zero_warn("Attribute `log_save_interval` is now set by `flush_logs_every_n_steps` since v0.10.0"
" and this method will be removed in v0.11.0", DeprecationWarning)
return self.flush_logs_every_n_steps

@log_save_interval.setter
def log_save_interval(self, val: int):
"""Back compatibility, will be removed in v0.11.0"""
rank_zero_warn("Attribute `log_save_interval` is now set by `flush_logs_every_n_steps` since v0.10.0"
" and this method will be removed in v0.11.0", DeprecationWarning)
self.flush_logs_every_n_steps = val

@property
def row_log_interval(self) -> int:
"""Back compatibility, will be removed in v0.10.0"""
rank_zero_warn("Attribute `row_log_interval` is now set by `log_every_n_steps` since v0.10.0"
" and this method will be removed in v0.11.0", DeprecationWarning)
return self.log_every_n_steps

@row_log_interval.setter
def row_log_interval(self, val: int):
"""Back compatibility, will be removed in v0.10.0"""
rank_zero_warn("Attribute `row_log_interval` is now set by `log_every_n_steps` since v0.10.0"
" and this method will be removed in v0.11.0", DeprecationWarning)
self.log_every_n_steps = val
67 changes: 46 additions & 21 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pytorch_lightning.trainer.callback_hook import TrainerCallbackHookMixin
from pytorch_lightning.trainer.configuration_validator import ConfigValidator
from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_11
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
from pytorch_lightning.trainer.optimizers import TrainerOptimizersMixin
Expand Down Expand Up @@ -78,6 +79,7 @@ class Trainer(
TrainerLoggingMixin,
TrainerTrainingTricksMixin,
TrainerDataLoadingMixin,
TrainerDeprecatedAPITillVer0_11,
):
def __init__(
self,
Expand Down Expand Up @@ -108,8 +110,8 @@ def __init__(
limit_val_batches: Union[int, float] = 1.0,
limit_test_batches: Union[int, float] = 1.0,
val_check_interval: Union[int, float] = 1.0,
log_save_interval: int = 100,
row_log_interval: int = 50,
flush_logs_every_n_steps: int = 100,
log_every_n_steps: int = 50,
distributed_backend: Optional[str] = None,
sync_batchnorm: bool = False,
precision: int = 32,
Expand All @@ -129,8 +131,10 @@ def __init__(
prepare_data_per_node: bool = True,
cluster_environment: ClusterEnvironment = None,
amp_backend: str = 'native',
amp_level: str = 'O2', # backward compatible, todo: remove in v1.0.0
amp_level: str = 'O2',
overfit_pct: float = None, # backward compatible, todo: remove in v1.0.0
log_save_interval: Optional[int] = None, # backward compatible, todo: remove in 0.11
row_log_interval: Optional[int] = None, # backward compatible, todo: remove in 0.11
):
r"""
Customize every aspect of training via flags
Expand Down Expand Up @@ -178,10 +182,14 @@ def __init__(
distributed_backend: The distributed backend to use (dp, ddp, ddp2, ddp_spawn, ddp_cpu)

early_stop_callback (:class:`pytorch_lightning.callbacks.EarlyStopping`).
Deprecated since v0.10.0 and will be removed in v1.0.
.. warning:: .. deprecated:: 0.10.0

Will be removed in v1.0.

fast_dev_run: runs 1 batch of train, test and val to find any bugs (ie: a sort of unit test).

flush_logs_every_n_steps: How often to flush logs to disk (defaults to every 100 steps).

gpus: number of gpus to train on (int) or which GPUs to train on (list or str) applied per node

gradient_clip_val: 0 means don't clip.
Expand All @@ -196,7 +204,12 @@ def __init__(

log_gpu_memory: None, 'min_max', 'all'. Might slow performance

log_save_interval: Writes logs to disk this often
log_every_n_steps: How often to log within steps (defaults to every 50 steps).
Borda marked this conversation as resolved.
Show resolved Hide resolved

log_save_interval: How often to flush logs to disk.
.. warning:: .. deprecated:: 0.10.0

Use `flush_logs_every_n_steps` instead. Will remove v0.11.0.

prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data.
Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data
Expand Down Expand Up @@ -235,7 +248,10 @@ def __init__(
resume_from_checkpoint: To resume training from a specific checkpoint pass in the path here.
This can be a URL.

row_log_interval: How often to add logging rows (does not write to disk)
row_log_interval: How often to log within steps.
.. warning:: .. deprecated:: 0.10.0

Use `log_every_n_steps` instead. Will remove v0.11.0.

sync_batchnorm: Synchronize batch norm layers between process groups/whole world.

Expand All @@ -262,6 +278,19 @@ def __init__(
"""
super().__init__()

# deprecation warnings
if row_log_interval is not None:
teddykoker marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn("Argument `row_log_interval` is deprecated in v0.10, use `log_every_n_steps` instead."
" It will be removed in v0.11.0.", DeprecationWarning)
log_every_n_steps = row_log_interval

if log_save_interval is not None:
warnings.warn(
"Argument `log_save_interval` is deprecated in v0.10, use `flush_logs_every_n_steps` instead."
" It will be removed in v0.11.0.", DeprecationWarning
)
flush_logs_every_n_steps = log_save_interval

# init connectors
self.dev_debugger = InternalDebugger(self)
self.config_validator = ConfigValidator(self)
Expand Down Expand Up @@ -299,7 +328,7 @@ def __init__(
process_position,
default_root_dir,
weights_save_path,
resume_from_checkpoint
resume_from_checkpoint,
)

# hook
Expand All @@ -310,18 +339,12 @@ def __init__(

# init data flags
self.data_connector.on_trainer_init(
check_val_every_n_epoch,
reload_dataloaders_every_epoch,
prepare_data_per_node
check_val_every_n_epoch, reload_dataloaders_every_epoch, prepare_data_per_node
)

# init training tricks
self.training_tricks_connector.on_trainer_init(
gradient_clip_val,
track_grad_norm,
accumulate_grad_batches,
truncated_bptt_steps,
terminate_on_nan
gradient_clip_val, track_grad_norm, accumulate_grad_batches, truncated_bptt_steps, terminate_on_nan
)

# init accelerator related flags
Expand Down Expand Up @@ -351,7 +374,7 @@ def __init__(
self.profile_connector.on_trainer_init(profiler)

# init logger flags
self.logger_connector.on_trainer_init(logger, log_save_interval, row_log_interval)
self.logger_connector.on_trainer_init(logger, flush_logs_every_n_steps, log_every_n_steps)

# init debugging flags
self.debugging_connector.on_init_start(
Expand All @@ -361,7 +384,7 @@ def __init__(
limit_test_batches,
val_check_interval,
overfit_batches,
fast_dev_run
fast_dev_run,
)

# set precision
Expand Down Expand Up @@ -511,13 +534,15 @@ def train(self):
met_min_steps = self.global_step >= self.min_steps if self.min_steps else True

if self.should_stop:
if (met_min_epochs and met_min_steps):
if met_min_epochs and met_min_steps:
self.train_loop.on_train_end()
return
else:
log.info('Trainer was signaled to stop but required minimum epochs'
f' ({self.min_epochs}) or minimum steps ({self.min_steps}) has'
' not been met. Training will continue...')
log.info(
'Trainer was signaled to stop but required minimum epochs'
f' ({self.min_epochs}) or minimum steps ({self.min_steps}) has'
' not been met. Training will continue...'
)

# hook
self.train_loop.on_train_end()
Expand Down
4 changes: 2 additions & 2 deletions pytorch_lightning/trainer/training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def on_before_backward(self, batch_idx, optimizer):

def _track_gradient_norm(self):
grad_norm_dict = {}
if (self.trainer.global_step + 1) % self.trainer.row_log_interval == 0:
if (self.trainer.global_step + 1) % self.trainer.log_every_n_steps == 0:
Borda marked this conversation as resolved.
Show resolved Hide resolved
if float(self.trainer.track_grad_norm) > 0:
model = self.trainer.get_model()
grad_norm_dict = model.grad_norm(self.trainer.track_grad_norm)
Expand Down Expand Up @@ -788,7 +788,7 @@ def build_train_args(self, batch, batch_idx, opt_idx, hiddens):
def save_loggers_on_train_batch_end(self):
# when loggers should save to disk
should_save_log = (
(self.trainer.global_step + 1) % self.trainer.log_save_interval == 0 or self.trainer.should_stop
(self.trainer.global_step + 1) % self.trainer.flush_logs_every_n_steps == 0 or self.trainer.should_stop
)
if should_save_log or self.trainer.fast_dev_run:
if self.trainer.is_global_zero and self.trainer.logger is not None:
Expand Down
10 changes: 5 additions & 5 deletions tests/models/test_grad_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_grad_tracking(tmpdir, norm_type, rtol=5e-3):
default_root_dir=tmpdir,
max_epochs=3,
track_grad_norm=norm_type,
row_log_interval=1, # request grad_norms every batch
log_every_n_steps=1, # request grad_norms every batch
)
result = trainer.fit(model)

Expand All @@ -76,20 +76,20 @@ def test_grad_tracking(tmpdir, norm_type, rtol=5e-3):
assert np.allclose(log, mod, rtol=rtol)


@pytest.mark.parametrize("row_log_interval", [1, 2, 3])
def test_grad_tracking_interval(tmpdir, row_log_interval):
@pytest.mark.parametrize("log_every_n_steps", [1, 2, 3])
def test_grad_tracking_interval(tmpdir, log_every_n_steps):
""" Test that gradient norms get tracked in the right interval and that everytime the same keys get logged. """
trainer = Trainer(
default_root_dir=tmpdir,
track_grad_norm=2,
row_log_interval=row_log_interval,
log_every_n_steps=log_every_n_steps,
max_steps=10,
)

with patch.object(trainer.logger, "log_metrics") as mocked:
model = EvalModelTemplate()
trainer.fit(model)
expected = trainer.global_step // row_log_interval
expected = trainer.global_step // log_every_n_steps
grad_norm_dicts = []
for _, kwargs in mocked.call_args_list:
metrics = kwargs.get("metrics", {})
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def test_result_obj_on_tpu(tmpdir):
default_root_dir=tmpdir,
max_epochs=epochs,
callbacks=[EarlyStopping()],
row_log_interval=2,
log_every_n_steps=2,
limit_train_batches=batches,
weights_summary=None,
tpu_cores=8
Expand Down
16 changes: 14 additions & 2 deletions tests/test_deprecated.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,25 @@ def _soft_unimport_module(str_module):

def test_tbd_remove_in_v0_11_0_trainer():
with pytest.deprecated_call(match='will be removed in v0.11.0'):
lr_logger = LearningRateLogger()
LearningRateLogger()

with pytest.deprecated_call(match='will be removed in v0.11.0'):
trainer = Trainer(row_log_interval=8)
assert trainer.log_every_n_steps == 8
with pytest.deprecated_call(match='will be removed in v0.11.0'):
assert trainer.row_log_interval == 8

with pytest.deprecated_call(match='will be removed in v0.11.0'):
trainer = Trainer(log_save_interval=9)
assert trainer.flush_logs_every_n_steps == 9
with pytest.deprecated_call(match='will be removed in v0.11.0'):
assert trainer.log_save_interval == 9


@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_tbd_remove_in_v0_11_0_trainer_gpu():
with pytest.deprecated_call(match='will be removed in v0.11.0'):
gpu_usage = GpuUsageLogger()
GpuUsageLogger()


class ModelVer0_6(EvalModelTemplate):
Expand Down
Loading