Skip to content

Commit

Permalink
Mixtral-converters: use set_cpu_expert_model_parallel_world_size to…
Browse files Browse the repository at this point in the history
… specify MoE world size.

Signed-off-by: Alexandros Koumparoulis <[email protected]>
  • Loading branch information
akoumpa committed Jan 22, 2024
1 parent a49a99a commit d926b1a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
3 changes: 2 additions & 1 deletion scripts/nlp_language_modeling/convert_hf_mixtral_to_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
PipelineMixedPrecisionPlugin,
)
from nemo.utils import logging

import megatron.core.parallel_state as parallel_state

def get_args():
parser = ArgumentParser()
Expand Down Expand Up @@ -340,4 +340,5 @@ def convert(args):

if __name__ == '__main__':
args = get_args()
parallel_state.set_cpu_expert_model_parallel_world_size(1)
convert(args)
3 changes: 2 additions & 1 deletion scripts/nlp_language_modeling/convert_nemo_mixtral_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
from nemo.utils import logging

import megatron.core.parallel_state as parallel_state

def get_args():
parser = ArgumentParser()
Expand Down Expand Up @@ -231,6 +231,7 @@ def convert(in_file, precision=None) -> None:

if __name__ == '__main__':
args = get_args()
parallel_state.set_cpu_expert_model_parallel_world_size(1)
hf_state_dict, nemo_config = convert(args.in_file, args.precision)

config = load_config(args.hf_model_name, nemo_config)
Expand Down

0 comments on commit d926b1a

Please sign in to comment.