Skip to content

Commit

Permalink
Merge branch 'r1.6.1' of github.com:NVIDIA/NeMo into r1.6.1
Browse files Browse the repository at this point in the history
  • Loading branch information
ericharper committed Feb 2, 2022
2 parents 6692a25 + 7a8c050 commit acf6bf4
Show file tree
Hide file tree
Showing 22 changed files with 161 additions and 60 deletions.
2 changes: 1 addition & 1 deletion nemo/collections/asr/models/label_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def verify_speakers(self, path2audio_file1, path2audio_file2, threshold=0.7):
embs2 = self.get_embedding(path2audio_file2).squeeze()
# Length Normalize
X = embs1 / torch.linalg.norm(embs1)
Y = embs1 / torch.linalg.norm(embs2)
Y = embs2 / torch.linalg.norm(embs2)
# Score
similarity_score = torch.dot(X, Y) / ((torch.dot(X, X) * torch.dot(Y, Y)) ** 0.5)
similarity_score = (similarity_score + 1) / 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,22 @@

import numpy as np
import torch
from apex.transformer import parallel_state

from nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset import BlendableDataset
from nemo.collections.nlp.data.language_modeling.megatron.indexed_dataset import make_dataset as make_indexed_dataset
from nemo.utils import logging
from nemo.utils.get_rank import is_global_rank_zero

try:
from apex.transformer import parallel_state

HAVE_APEX = True

except (ImportError, ModuleNotFoundError):

HAVE_APEX = False


DSET_TYPE_BERT = 'standard_bert'
DSET_TYPE_ICT = 'ict'
DSET_TYPE_T5 = 't5'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import numpy as np
import torch
from apex.transformer import parallel_state

from nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset import BlendableDataset
from nemo.collections.nlp.data.language_modeling.megatron.dataset_utils import (
Expand All @@ -30,6 +29,15 @@
from nemo.collections.nlp.data.language_modeling.megatron.megatron_dataset import MegatronDataset
from nemo.utils import logging

try:
from apex.transformer import parallel_state

HAVE_APEX = True

except (ImportError, ModuleNotFoundError):

HAVE_APEX = False


def build_train_valid_test_datasets(
cfg, trainer, data_prefix, data_impl, splits_string, train_valid_test_num_samples, seq_length, seed, skip_warmup
Expand Down Expand Up @@ -155,6 +163,10 @@ def get_indexed_dataset_(data_prefix, data_impl, skip_warmup):

class GPTDataset(MegatronDataset):
def __init__(self, cfg, trainer, name, data_prefix, documents, indexed_dataset, num_samples, seq_length, seed):
if not HAVE_APEX:
raise ImportError(
"Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
)

super().__init__(cfg, trainer=trainer)
self.name = name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
"""BERT model."""

import torch
from apex.transformer import parallel_state, tensor_parallel
from apex.transformer.enums import AttnMaskType
from apex.transformer.tensor_parallel.layers import set_tensor_model_parallel_attributes

from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model, parallel_lm_logits
from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
Expand All @@ -29,6 +26,16 @@
openai_gelu,
scaled_init_method_normal,
)
from nemo.utils import logging

try:
from apex.transformer import parallel_state, tensor_parallel
from apex.transformer.enums import AttnMaskType
from apex.transformer.tensor_parallel.layers import set_tensor_model_parallel_attributes

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def bert_extended_attention_mask(attention_mask):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,19 @@
"""GPT-2 model."""

import torch
from apex.transformer import tensor_parallel
from apex.transformer.enums import AttnMaskType

from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model, parallel_lm_logits
from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
from nemo.collections.nlp.modules.common.megatron.utils import init_method_normal, scaled_init_method_normal
from nemo.utils import logging

try:
from apex.transformer import tensor_parallel
from apex.transformer.enums import AttnMaskType

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def post_language_model_processing(
Expand Down Expand Up @@ -89,6 +96,7 @@ def __init__(
num_prompt_tokens=10,
prompt_tags=None,
):

super(GPTModel, self).__init__()

self.parallel_output = parallel_output
Expand Down
12 changes: 10 additions & 2 deletions nemo/collections/nlp/models/language_modeling/megatron/t5_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,19 @@
"""T5 model."""

import torch
from apex.transformer import tensor_parallel
from apex.transformer.enums import AttnMaskType

from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model, parallel_lm_logits
from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
from nemo.collections.nlp.modules.common.megatron.utils import init_method_normal, scaled_init_method_normal
from nemo.utils import logging

try:
from apex.transformer import tensor_parallel
from apex.transformer.enums import AttnMaskType

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def t5_attention_mask_func(attention_scores, attention_mask):
Expand Down Expand Up @@ -103,6 +110,7 @@ def __init__(
openai_gelu=False,
onnx_safe=False,
):

super(T5Model, self).__init__()

self.parallel_output = parallel_output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import torch
import torch.nn.functional as F
from apex.transformer import parallel_state, tensor_parallel
from omegaconf.dictconfig import DictConfig
from omegaconf.omegaconf import open_dict
from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
Expand All @@ -44,13 +43,24 @@
from nemo.core.optim import MasterOptimizerWrapper, prepare_lr_scheduler
from nemo.utils import AppState, logging

try:
from apex.transformer import parallel_state, tensor_parallel

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


class MegatronGPTModel(NLPModel):
"""
Megatron GPT pretraining and prompt tuning
"""

def __init__(self, cfg: DictConfig, trainer: Trainer):
if not HAVE_APEX:
raise ImportError(
"Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
)
super().__init__(cfg, trainer=trainer)
self.cfg = cfg

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import torch
import torch.nn as nn
from apex.transformer import parallel_state, tensor_parallel
from omegaconf.dictconfig import DictConfig
from pytorch_lightning.trainer.trainer import Trainer

Expand All @@ -35,13 +34,24 @@
from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer
from nemo.utils import AppState, logging

try:
from apex.transformer import parallel_state, tensor_parallel

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


class MegatronT5Model(NLPModel):
"""
Megatron T5 pretraining
"""

def __init__(self, cfg: DictConfig, trainer: Trainer):
if not HAVE_APEX:
raise ImportError(
"Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
)
super().__init__(cfg, trainer=trainer)
self.cfg = cfg

Expand Down
12 changes: 0 additions & 12 deletions nemo/collections/nlp/models/nlp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,6 @@
from nemo.core.classes.exportable import Exportable
from nemo.utils import AppState, logging

try:
import apex

HAVE_APEX = True

except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


__all__ = ['NLPModel']

Expand All @@ -59,10 +51,6 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
# handles model parallel save and restore logic
self._save_restore_connector = NLPSaveRestoreConnector()
self.set_world_size(trainer)
if not HAVE_APEX:
logging.warning(
"Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
)

def register_artifact(
self, config_path: str, src: str, verify_src_exists: bool = False,
Expand Down
12 changes: 9 additions & 3 deletions nemo/collections/nlp/modules/common/megatron/clip_grads.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@

import amp_C
import torch
from apex.multi_tensor_apply import multi_tensor_applier
from apex.transformer import parallel_state
from apex.transformer.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate
from torch._six import inf

from nemo.collections.nlp.modules.common.megatron.module import param_is_not_shared

try:
from apex.multi_tensor_apply import multi_tensor_applier
from apex.transformer import parallel_state
from apex.transformer.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def clip_grad_norm_fp32(parameters, max_norm, norm_type=2):
"""Clips gradient norm of an iterable of parameters whose gradients
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@


import torch
from apex._autocast_utils import _cast_if_autocast_enabled

try:
from apex._autocast_utils import _cast_if_autocast_enabled

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def bias_dropout_add(x, bias, residual, prob, training):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
# limitations under the License.

import torch
from apex._autocast_utils import _cast_if_autocast_enabled

try:
from apex._autocast_utils import _cast_if_autocast_enabled

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False

###### BIAS GELU FUSION/ NO AUTOGRAD ################
# 1/sqrt(2*pi)-> 0.3989423
Expand Down
11 changes: 8 additions & 3 deletions nemo/collections/nlp/modules/common/megatron/fused_layer_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from apex.contrib.layer_norm.layer_norm import FastLayerNorm
from apex.normalization.fused_layer_norm import MixedFusedLayerNorm

try:
from apex.contrib.layer_norm.layer_norm import FastLayerNorm
from apex.normalization.fused_layer_norm import MixedFusedLayerNorm

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def get_layer_norm(hidden_size, eps=1e-5, persist_layer_norm=False):
Expand Down
10 changes: 8 additions & 2 deletions nemo/collections/nlp/modules/common/megatron/language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from apex.transformer import parallel_state, tensor_parallel
from apex.transformer.enums import AttnMaskType, LayerType

from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer
Expand All @@ -30,6 +28,14 @@
scaled_init_method_normal,
)

try:
from apex.transformer import parallel_state, tensor_parallel
from apex.transformer.enums import AttnMaskType, LayerType

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def parallel_lm_logits(input_, word_embeddings_weight, parallel_output, bias=None):
"""LM logits using word embedding weights."""
Expand Down
22 changes: 14 additions & 8 deletions nemo/collections/nlp/modules/common/megatron/megatron_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,24 @@

import numpy as np
import torch
from apex.transformer import tensor_parallel
from apex.transformer.parallel_state import (
get_pipeline_model_parallel_rank,
set_pipeline_model_parallel_rank,
set_pipeline_model_parallel_world_size,
set_tensor_model_parallel_rank,
set_tensor_model_parallel_world_size,
)

from nemo.collections.nlp.modules.common.megatron.megatron_utils import compute_model_parallel_rank
from nemo.utils import AppState

try:
from apex.transformer import tensor_parallel
from apex.transformer.parallel_state import (
get_pipeline_model_parallel_rank,
set_pipeline_model_parallel_rank,
set_pipeline_model_parallel_world_size,
set_tensor_model_parallel_rank,
set_tensor_model_parallel_world_size,
)

HAVE_APEX = True
except (ImportError, ModuleNotFoundError):
HAVE_APEX = False


def initialize_model_parallel_for_nemo(
world_size, global_rank, local_rank, tensor_model_parallel_size=1, seed=1234,
Expand Down
Loading

0 comments on commit acf6bf4

Please sign in to comment.