Skip to content

Commit

Permalink
Merge branch 'main' into megatron-bart
Browse files Browse the repository at this point in the history
  • Loading branch information
ericharper authored Mar 30, 2022
2 parents 1c94524 + e4a1b20 commit 81e5065
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo
from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group
from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer
from nemo.core.classes.common import PretrainedModelInfo
from nemo.core.neural_types import ChannelType, MaskType, NeuralType
from nemo.utils import AppState, logging

Expand Down Expand Up @@ -393,8 +394,40 @@ def configure_gradient_clipping(self, *args, **kwargs):
parameters = self.model.parameters()
clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val)

def list_available_models(self):
return None
@classmethod
def list_available_models(cls) -> Optional[PretrainedModelInfo]:
"""
This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud.
Returns:
List of available pre-trained models.
"""
result = []
for vocab in ['cased', 'uncased']:
result.append(
PretrainedModelInfo(
pretrained_model_name=f"megatron_bert_345m_{vocab}",
location=f"https://api.ngc.nvidia.com/v2/models/nvidia/nemo/megatron_bert_345m_{vocab}/versions/1/files/megatron_bert_345m_{vocab}.nemo",
description=f"345M parameter BERT Megatron model with {vocab} vocab.",
)
)
for vocab_size in ['50k', '30k']:
for vocab in ['cased', 'uncased']:
result.append(
PretrainedModelInfo(
pretrained_model_name=f"biomegatron345m_biovocab_{vocab_size}_{vocab}",
location=f"https://api.ngc.nvidia.com/v2/models/nvidia/nemo/biomegatron345m_biovocab_{vocab_size}_{vocab}/versions/1/files/BioMegatron345m-biovocab-{vocab_size}_{vocab}.nemo",
description="Megatron 345m parameters model with biomedical vocabulary ({vocab_size} size) {vocab}, pre-trained on PubMed biomedical text corpus.",
)
)
for vocab in ['cased', 'uncased']:
result.append(
PretrainedModelInfo(
pretrained_model_name=f"biomegatron345m{vocab}",
location=f"https://api.ngc.nvidia.com/v2/models/nvidia/nemo/megatron_bert_345m_{vocab}/versions/1/files/megatron_bert_345m_{vocab}.nemo",
description=f"Megatron pretrained on {vocab} biomedical dataset PubMed with 345 million parameters.",
)
)
return result

def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size):
"""Pad vocab size so it is divisible by model parallel size and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
)
from nemo.collections.nlp.parts.nlp_overrides import GradScaler
from nemo.collections.nlp.parts.utils_funcs import get_last_rank
from nemo.core.classes.common import PretrainedModelInfo
from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler
from nemo.utils import AppState, logging

Expand Down Expand Up @@ -1233,3 +1234,20 @@ def compute_logprobs(self, request: Dict, positions: List):
response[index] = item

return response

@classmethod
def list_available_models(cls) -> Optional[PretrainedModelInfo]:
"""
This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud.
Returns:
List of available pre-trained models.
"""
result = []
result.append(
PretrainedModelInfo(
pretrained_model_name="megatron_gpt_345m",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/megatron_gpt_345m/versions/1/files/megatron_gpt_345m.nemo",
description="345M parameter GPT generative Megatron model.",
)
)
return result

0 comments on commit 81e5065

Please sign in to comment.