Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 11, 2024
1 parent cfd83e2 commit 85a4249
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import queue
import warnings
from dataclasses import fields
from functools import partial, cache
from functools import cache, partial
from typing import Any, Dict, Iterator, List, Optional, Union

import torch
Expand Down Expand Up @@ -103,18 +103,21 @@
except (ImportError, ModuleNotFoundError):
HAVE_TE = False


@cache
def mcore_supports_moe() -> bool:
global HAVE_MEGATRON_CORE
if not HAVE_MEGATRON_CORE:
return False
try:
from megatron.core.transformer.moe.base_moe_layer import MoETokenDispatcher

return True
except ImportError:
return False
return False


def get_specs(spec_name, num_experts=None):
if spec_name == '':
if num_experts is not None:
Expand Down Expand Up @@ -1561,9 +1564,11 @@ def build_transformer_config(self) -> TransformerConfig:
'num_moe_experts': self.cfg.get('num_moe_experts', None),
'moe_router_type': self.cfg.get('moe_router_type', None),
}
if model_specific_configs['num_moe_experts'] is not None or \
model_specific_configs['moe_router_type'] is not None:
assert mcore_supports_moe(), 'Megatron-core >= v0.5.0 is required for MoE'
if (
model_specific_configs['num_moe_experts'] is not None
or model_specific_configs['moe_router_type'] is not None
):
assert mcore_supports_moe(), 'Megatron-core >= v0.5.0 is required for MoE'
elif not mcore_supports_moe():
del model_specific_configs['num_moe_experts']
del model_specific_configs['moe_router_type']
Expand Down
4 changes: 1 addition & 3 deletions scripts/nlp_language_modeling/convert_hf_mixtral_to_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
import os
from argparse import ArgumentParser
from collections import OrderedDict
from transformers import AutoModelForCausalLM, AutoTokenizer

import torch
import torch.nn
from omegaconf import OmegaConf
from pytorch_lightning.core.saving import _load_state as ptl_load_state
from pytorch_lightning.trainer.trainer import Trainer
from sentencepiece import SentencePieceProcessor
from transformers import AutoModelForCausalLM, AutoTokenizer

from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
from nemo.collections.nlp.parts.nlp_overrides import (
Expand Down Expand Up @@ -228,8 +228,6 @@ def convert(args):
if mcore_gpt:
assert nemo_config.activation.startswith('fast-'), 'mcore only supports fast version of gated linear unit.'



for l in range(int(num_layers)):
print(f"converting layer {l}")
old_tensor_shape = ckpt[f'model.layers.{l}.self_attn.q_proj.weight'].size()
Expand Down

0 comments on commit 85a4249

Please sign in to comment.