Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set Activation Checkpointing Defaults #7404

Merged
merged 5 commits into from
Sep 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions nemo/collections/nlp/modules/common/megatron/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,14 +981,14 @@ def __init__(
elif self.activations_checkpoint_method == 'block':
logging.info(
(
f'Using block activation checkpointing requires activations_checkpoint_num_layers to be set.'
f'Got: {self.activations_checkpoint_num_layers}. Setting to 1 by default.'
f'Using block activation checkpointing with granularity selective forces all layers to use checkpointing.'
)
)
else:
raise ValueError(
f'activations_checkpoint_method should be "uniform" or "block" when using granularity selective.'
)
self.activations_checkpoint_num_layers = num_layers # forcing all layers
elif self.activations_checkpoint_granularity == 'full':
if self.activations_checkpoint_method in ['uniform', 'block']:
if not self.activations_checkpoint_num_layers:
Expand All @@ -998,6 +998,7 @@ def __init__(
f'Got: {self.activations_checkpoint_num_layers}. Setting to 1 by default.'
)
)
self.activations_checkpoint_num_layers = 1 # keeping the old default
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this code should not execute since TransformerConfig enforces recompute_num_layers is not None, so just used the older default instead of changing it

else:
raise ValueError(
f'activations_checkpoint_method should be "uniform" or "block" when using granularity full.'
Expand Down Expand Up @@ -1047,6 +1048,13 @@ def __init__(
# TODO: Add similar assert for encoder-decoder.

self.num_layers = self.get_num_layers(num_layers)

if (
self.activations_checkpoint_num_layers is not None
and self.activations_checkpoint_num_layers > self.num_layers
):
self.activations_checkpoint_num_layers = self.num_layers

# Transformer layers.
def build_layer(layer_number):
if isinstance(layer_type, list):
Expand Down
Loading