diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_config.yaml deleted file mode 100644 index a1c5f774cd11..000000000000 --- a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_config.yaml +++ /dev/null @@ -1,173 +0,0 @@ -name: megatron_virtual_prompt_gpt - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: 3 # min 25 recommended - max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 # frequency with which training steps are logged - val_check_interval: 1.0 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch - gradient_clip_val: 1.0 - benchmark: False - - - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 2 - mode: min - save_nemo_on_train_end: True - filename: 'megatron_gpt_prompt_tune--{val_loss:.3f}-{step}' - model_parallel_size: ${model.tensor_model_parallel_size} - save_best_model: True - create_early_stopping_callback: True - early_stopping_callback_params: - monitor: "val_loss" - mode: "min" - min_delta: 0.001 - patience: 10 - verbose: True - strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training. - - -model: - seed: 1234 - nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: 'p-tuning' # one of 'prompt-tuning', 'p-tuning', or 'inference' - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - global_batch_size: 8 - micro_batch_size: 4 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - report_validation_metric: False - validation_metric: 'accuracy' - - restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the GPT language model .nemo file, always required - save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. - existing_tasks: ['boolq', 'intent_and_slot'] # List of tasks the model has already been p-tuned/prompt-tuned for, needed when a restore path is given - new_tasks: ['rte'] # List of new tasknames to be prompt-tuned - - - - ## Sequence Parallelism - # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially - # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. - sequence_parallel: False - - ## Activation Checkpoint - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - # 'uniform' divides the total number of transformer layers and checkpoints the input activation - # of each chunk at the specified granularity - # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity - activations_checkpoint_num_layers: null # not used with 'selective' - - task_templates: # Add more/replace tasks as needed, these are just examples - - taskname: "boolq" # The task name - prompt_template: "<|VIRTUAL_PROMPT_0|> Passage: {passage} <|VIRTUAL_PROMPT_1|> \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|> - total_virtual_tokens: 30 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time. - virtual_token_splits: [20, 10] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens - truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped. - answer_only_loss: True - answer_field: "answer" - - - taskname: "intent_and_slot" - prompt_template: "<|VIRTUAL_PROMPT_0|> intent options: {intent_options} <|VIRTUAL_PROMPT_1|> slot options: {slot_options} <|VIRTUAL_PROMPT_2|> {utterance} \nintent: {intent} \nslot: {slot}" - total_virtual_tokens: 30 - answer_only_loss: False - virtual_token_splits: [15, 10, 5] - truncate_field: null - - - taskname: "rte" - prompt_template: "<|VIRTUAL_PROMPT_0|>{premise}\n{hypothesis}\nAnswer: {answer}" - total_virtual_tokens: 9 - virtual_token_splits: [9] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "squad" - prompt_template: "<|VIRTUAL_PROMPT_0|> context: {context} question: {question} answer: {answer}" - total_virtual_tokens: 10 - virtual_token_splits: [10] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "taskname" - prompt_template: "<|VIRTUAL_PROMPT_0|> {prompt} {completion}" - total_virtual_tokens: 100 - virtual_token_splits: [100] - truncate_field: null - answer_only_loss: True - answer_field: "completion" - - prompt_tuning: # Prompt tunin specific params - new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks - new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random - - p_tuning: # P-tuning specific params - encoder_type: "tpmlp" # ['tpmlp', 'lstm', 'biglstm', 'mlp'] - dropout: 0.0 - num_layers: 2 # number of layers for MLP or LSTM layers. Note, it has no effect for tpmlp currently as it always assumes it is two layers. - encoder_hidden: 2048 # encoder hidden for biglstm and tpmlp - init_std: 0.023 # init std for tpmlp layers - - data: - train_ds: [data/rte_train.jsonl,] - validation_ds: [data/rte_val.jsonl,] - add_eos: True - shuffle: True - num_workers: 8 - pin_memory: True - train_cache_data_path: null # the path to the train cache data - validation_cache_data_path: null # the path to the validation cache data - test_cache_data_path: null # the path to the test cache data - load_cache: False # whether to load from the cache data - max_seq_length: 1024 # filter out training and validation examples longer than 1024 tokens. Set to None will default to model's encoder length. - min_seq_length: 1 # filter out training and validation examples less than 1 token long. - - - optim: - name: fused_adam - lr: 1e-4 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - min_lr: 0.0 # min_lr must be 0.0 for prompt learning when pipeline parallel > 1 - constant_steps: 0 # Constant steps should also be 0 when min_lr=0 - monitor: val_loss - reduce_on_plateau: false - - # required for reporting validation metrics - inference: - greedy: False - top_k: 0 - top_p: 0.9 - temperature: 1.0 - tokens_to_generate: 30 - repetition_penalty: 1.2 - min_tokens_to_generate: 0 diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml deleted file mode 100644 index 33ca3f06ddfe..000000000000 --- a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml +++ /dev/null @@ -1,30 +0,0 @@ -inference: - greedy: False # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 1.0 # sampling temperature - add_BOS: True # add the bos token at the begining of the prompt - tokens_to_generate: 30 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - batch_size: 1 - - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -gpt_model_file: null # GPT nemo file path -virtual_prompt_model_file: ??? # path to a MegatronGPTPromptLearningModel model if you want to use soft prompts -pred_file_path: ??? # Path will model predictions will be written -max_seq_length: 8192 # this will filter out inputs whose length is longer than the set value form the generation process. -data_paths: # paths to .jsonl files you want to perform inference on -num_workers: 8 - \ No newline at end of file diff --git a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning.yaml b/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning.yaml deleted file mode 100644 index b966ad0eb631..000000000000 --- a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning.yaml +++ /dev/null @@ -1,108 +0,0 @@ -name: p_tuning_squad_t5 - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: 10 - max_steps: -1 - log_every_n_steps: 10 - val_check_interval: 1.0 - gradient_clip_val: 1.0 - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: PromptLearning-T5 - name: ${name} - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 2 - mode: min - save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below - filename: "megatron_t5_prompt_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}" - model_parallel_size: ${model.tensor_model_parallel_size} - save_best_model: True - create_early_stopping_callback: True - early_stopping_callback_params: - monitor: "val_loss" - mode: "min" - min_delta: 0.001 - patience: 10 - verbose: True - -model: - seed: 1234 - nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: "p-tuning" # one of 'prompt-tuning', 'p-tuning', or 'inference' - tensor_model_parallel_size: 1 - pipeline_model_parallel_size: 1 - global_batch_size: 8 - micro_batch_size: 8 # micro batch size should equal global batch size when pipeline parallel = 1 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - report_validation_metric: False - validation_metric: accuracy - - restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required - save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. - existing_tasks: [] - new_tasks: ["squad"] - - - task_templates: - - taskname: "squad" - prompt_template: "<|VIRTUAL_PROMPT_0|> {context} {question} {answer}" - total_virtual_tokens: 100 - virtual_token_splits: [100] - truncate_field: context - answer_field: answer - - p_tuning: # P-tuning specific params - encoder_type: "mlp" # Either "mlp" or "lstm", mlp is default - num_layers: 2 # 2 recommended for MLP, 1 recommended for LSTM, must be at least 2 for mlp - dropout: 0.0 - - prompt_tuning: # Prompt tunin specific params - new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks - new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random - - data: - train_ds: ["data/squad_train.jsonl"] - validation_ds: ["data/squad_val.jsonl"] - add_eos: true - add_bos: false - decoder_starts_with_pad: False - add_eos_to_decoder_output: True - add_sentinel_to_input: True - ul2_prompt_token: null # , , - shuffle: true - num_workers: 4 - pin_memory: true - - optim: - name: fused_adam - lr: 1e-4 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - constant_steps: 0 - min_lr: 0.0 - monitor: val_loss - reduce_on_plateau: false \ No newline at end of file diff --git a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning_inference.yaml b/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning_inference.yaml deleted file mode 100644 index 0b5929a36a62..000000000000 --- a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning_inference.yaml +++ /dev/null @@ -1,21 +0,0 @@ -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -data: - test_ds: ??? - num_workers: 1 - global_batch_size: 8 - micro_batch_size: 8 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -language_model_path: ??? # path to a pretrained T5 nemo file -virtual_prompt_model_file: ??? # path to a MegatronT5PromptLearningModel nemo file -pred_file_path: ??? # Path were all model predicitons will be written to a text file - - diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py deleted file mode 100644 index 3edca99e15a5..000000000000 --- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment - -from nemo.collections.nlp.models.language_modeling.megatron_t5_prompt_learning_model import ( - MegatronT5PromptLearningModel, -) -from nemo.collections.nlp.parts.nlp_overrides import ( - CustomProgressBar, - GradScaler, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.decorators import deprecated -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - - -""" -This is an example of how to ptune/prompt-tune a pretrained T5 model. -Be sure to use a .nemo T5 model with this code. If you've downloaded -a model from NGC or are otherwise using a MegatronLM model, please use -either megatron_ckpt_to_nemo.py or megatron_lm_ckpt_to_nemo.py found -within this examples directory to convert your model to .nemo format. -""" - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_prompt_learning.yaml") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - plugins = [] - strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) - if cfg.trainer.precision == 16 or cfg.trainer.precision == '16-mixed': - scaler = GradScaler( - init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), - growth_interval=cfg.model.get('native_amp_growth_interval', 1000), - hysteresis=cfg.model.get('hysteresis', 2), - enabled=False - if cfg.model.pipeline_model_parallel_size > 1 - else True, # turn off the grad scale for pipeline parallel LM model - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugins.append(PipelineMixedPrecisionPlugin(precision='16-mixed', device='cuda', scaler=scaler)) - - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()]) - exp_manager(trainer, cfg.exp_manager) - - # load existing or init new soft prompt T5 model - if cfg.model.get("restore_path", None): - model = MegatronT5PromptLearningModel.restore_from( - cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector() - ) - - else: - model = MegatronT5PromptLearningModel(cfg.model, trainer=trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py deleted file mode 100644 index 67640138b3ff..000000000000 --- a/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -from omegaconf.omegaconf import open_dict -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.nlp.models.language_modeling.megatron_t5_prompt_learning_model import ( - MegatronT5PromptLearningModel, -) -from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy -from nemo.core.config import hydra_runner -from nemo.utils.app_state import AppState -from nemo.utils.decorators import deprecated - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True -except (ImportError, ModuleNotFoundError): - HAVE_MEGATRON_CORE = False - - -if not torch.cuda.is_available(): - raise EnvironmentError("GPU is needed for the inference") - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_prompt_learning_inference") -def main(cfg) -> None: - - # trainer required for restoring model parallel models - trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) - - if ( - cfg.tensor_model_parallel_size < 0 - or cfg.pipeline_model_parallel_size < 0 - or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 - ): - model_config = MegatronT5PromptLearningModel.restore_from( - restore_path=cfg.language_model_path, trainer=trainer, return_config=True, - ) - - with open_dict(cfg): - cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) - cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) - cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" - - app_state = AppState() - if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: - app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - ( - app_state.tensor_model_parallel_rank, - app_state.pipeline_model_parallel_rank, - app_state.model_parallel_size, - app_state.data_parallel_size, - app_state.pipeline_model_parallel_split_rank, - app_state.virtual_pipeline_model_parallel_rank, - ) = fake_initialize_model_parallel( - world_size=app_state.model_parallel_size, - rank=trainer.global_rank, - tensor_model_parallel_size_=cfg.tensor_model_parallel_size, - pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, - ) - - # Load prompt tuned model, virtual_prompt_model_file and language_model_path must be provided in config - if cfg.get('virtual_prompt_model_file', None) is not None and cfg.get('language_model_path', None) is not None: - - # Update frozen T5 model path in case it has changed - prompt_learning_cfg = MegatronT5PromptLearningModel.restore_from( - cfg.virtual_prompt_model_file, trainer=trainer, return_config=True - ) - with open_dict(prompt_learning_cfg): - if cfg.get("language_model_path"): - # This is for backward compatibility with old checkpoints that used `pretrained_language_model_path` instead of `language_model_path`. - if hasattr(prompt_learning_cfg, 'pretrained_language_model_path'): - prompt_learning_cfg.pretrained_language_model_path = cfg.language_model_path - else: - prompt_learning_cfg.language_model_path = cfg.language_model_path - prompt_learning_cfg.micro_batch_size = cfg.data.get('micro_batch_size', 4) - prompt_learning_cfg.global_batch_size = cfg.data.get('global_batch_size', 4) - - # Now load prompt learning model with frozen T5 model base - model = MegatronT5PromptLearningModel.restore_from( - restore_path=cfg.virtual_prompt_model_file, trainer=trainer, override_config_path=prompt_learning_cfg - ) - - else: - raise ValueError("virtual_prompt_model_file and pretrained_language_model_file must be provided in config") - - # check whether the DDP is initialized - if parallel_state.is_unitialized(): - - def dummy(): - return - - if model.trainer.strategy.launcher is not None: - model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) - model.trainer.strategy.setup_environment() - - model.freeze() - - _, test_dl = model.build_virtual_prompt_dataset( - dataset_paths=cfg.data.test_ds, - batch_size=cfg.data.global_batch_size, - for_train=False, - drop_last=False, - shuffle=False, - num_workers=cfg.data.num_workers, - pin_memory=True, - ) - - outputs = trainer.predict(model, test_dl) - with open(cfg.pred_file_path, "w", encoding="utf-8") as pred_file: - for batch in outputs: - preds = batch["preds_text"] - for pred in preds: - pred = pred.strip().replace("\n", " ") - pred_file.write(pred + "\n") - print('test finish---------------------------------') - - -if __name__ == '__main__': - main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_inference.yaml deleted file mode 100644 index bf724ad4a060..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_inference.yaml +++ /dev/null @@ -1,34 +0,0 @@ -inference: - greedy: True # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 1.0 # sampling temperature - add_BOS: True # add the bos token at the begining of the prompt - tokens_to_generate: 30 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -gpt_model_file: ??? # GPT nemo file path # used when starting from a .nemo file -adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py) -pred_file_path: null # save predictions to this file -checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training -checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null # model configuration file, only used for PTL checkpoint loading -data_paths: ??? # prompts for GPT inference -server: False # whether launch the inference server -port: 5555 # the port number for the inference server -batch_size: 8 -num_workers: 8 diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_tuning_config.yaml deleted file mode 100755 index a7829e212f53..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_tuning_config.yaml +++ /dev/null @@ -1,154 +0,0 @@ -name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_adapter_dim${model.adapter_tuning.adapter_dim} - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 - max_steps: 100 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 0.2 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: ${name} - create_wandb_logger: null - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 1 - mode: min - save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.nemo_path set below, - filename: 'megatron_gpt_adapter_tuning--{val_loss:.3f}-{step}' - model_parallel_size: ${model.tensor_model_parallel_size} - always_save_nemo: True - save_best_model: True - create_early_stopping_callback: True - early_stopping_callback_params: - monitor: "val_loss" - mode: "min" - min_delta: 0.001 - patience: 10 - verbose: True - -model: - seed: 1234 - nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: 'no-prompts' # adapter tuning requires no virtual prompts - encoder_seq_length: 2048 - gradient_as_bucket_view: false - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - global_batch_size: 8 - micro_batch_size: 4 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - - restore_path: null # Path to an existing adapter .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the GPT language model .nemo file, always required - existing_tasks: [] # List of tasks the model has already been p-tuned/prompt-tuned for, needed when a restore path is given - new_tasks: ["rte"] # List of new tasknames to be prompt-tuned - - task_templates: # Add more/replace tasks as needed, these are just examples - - taskname: "boolq" # The task name - prompt_template: "Passage: {passage} \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|> - total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time. - virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens - truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped. - answer_only_loss: True - answer_field: "answer" - - - taskname: "intent_and_slot" - prompt_template: "intent options: {intent_options} slot options: {slot_options} {utterance} \nintent: {intent} \nslot: {slot}" - total_virtual_tokens: 0 - answer_only_loss: False - virtual_token_splits: [] - truncate_field: null - - - taskname: "rte" - prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "squad" - prompt_template: "context: {context} question: {question} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "arc-challenge" - prompt_template: "question: {question} choices: {choices} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "xsum" - prompt_template: "{source} Summary: {target}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "target" - - - taskname: "taskname" - prompt_template: "{prompt} {completion}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: "prompt" - answer_only_loss: True - answer_field: "completion" - - adapter_tuning: - type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' - adapter_dim: 50 - adapter_dropout: 0.1 - norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used. - column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal - row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal - norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] - - data: - train_ds: ??? # expects a list of paths to training data files - validation_ds: ??? # expects a paths to validation data files - add_eos: True - shuffle: True - num_workers: 8 - pin_memory: True - - - optim: - name: fused_adam - lr: 1e-4 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - constant_steps: 0 # Constant steps should also be 0 when min_lr=0 - min_lr: 0.0 # min_lr must be 0.0 for prompt learning - monitor: val_loss - reduce_on_plateau: false diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_inference.yaml deleted file mode 100644 index 0cb8467c66f0..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_inference.yaml +++ /dev/null @@ -1,32 +0,0 @@ -inference: - greedy: True # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 1.0 # sampling temperature - add_BOS: True # add the bos token at the begining of the prompt - tokens_to_generate: 30 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -gpt_model_file: ??? # GPT nemo file path # used when starting from a .nemo file -adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py) -pred_file_path: null # save predictions to this file -checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training -checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null # model configuration file, only used for PTL checkpoint loading -data_paths: ??? # prompts for GPT inference -batch_size: 8 -num_workers: 8 \ No newline at end of file diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_tuning_config.yaml deleted file mode 100755 index b5e2afb73186..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_tuning_config.yaml +++ /dev/null @@ -1,130 +0,0 @@ -name: ia3_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs} - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 - max_steps: 100 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 0.2 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: ${name} - create_wandb_logger: null - wandb_logger_kwargs: - project: null - name: null - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 1 - mode: min - save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.nemo_path set below, - filename: 'megatron_gpt_ia3_tuning--{val_loss:.3f}-{step}' - model_parallel_size: ${model.tensor_model_parallel_size} - save_best_model: True - -model: - seed: 1234 - nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: 'no-prompts' # adapter tuning requires no virtual prompts - encoder_seq_length: 2048 - gradient_as_bucket_view: false - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - global_batch_size: 8 - micro_batch_size: 4 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - - restore_path: null # Path to an existing adapter .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the GPT language model .nemo file, always required - existing_tasks: [] # List of tasks the model has already been p-tuned/prompt-tuned for, needed when a restore path is given - new_tasks: ["rte"] # List of new tasknames to be prompt-tuned - - task_templates: # Add more/replace tasks as needed, these are just examples - - taskname: "boolq" # The task name - prompt_template: "Passage: {passage} \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|> - total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time. - virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens - truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped. - answer_only_loss: True - answer_field: "answer" - - - taskname: "intent_and_slot" - prompt_template: "intent options: {intent_options} slot options: {slot_options} {utterance} \nintent: {intent} \nslot: {slot}" - total_virtual_tokens: 0 - answer_only_loss: True - virtual_token_splits: [] - truncate_field: null - - - taskname: "rte" - prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "squad" - prompt_template: "context: {context} question: {question} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "arc-challenge" - prompt_template: "question: {question} choices: {choices} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "answer" - - - taskname: "xsum" - prompt_template: "{source} Summary: {target}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_only_loss: True - answer_field: "target" - - data: - train_ds: ??? # expects a list of paths to training data files - validation_ds: ??? # expects a paths to validation data files - add_eos: True - shuffle: True - num_workers: 8 - pin_memory: True - - - optim: - name: fused_adam - lr: 1e-4 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - constant_steps: 0 # Constant steps should also be 0 when min_lr=0 - min_lr: 0.0 # min_lr must be 0.0 for prompt learning - monitor: val_loss - reduce_on_plateau: false diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_inference.yaml deleted file mode 100644 index fcd92a401970..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_inference.yaml +++ /dev/null @@ -1,37 +0,0 @@ -inference: - greedy: True # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 1.0 # sampling temperature - add_BOS: True # add the bos token at the begining of the prompt - tokens_to_generate: 30 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -data: - test_ds: ??? - num_workers: 1 - global_batch_size: 4 - micro_batch_size: 4 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file -adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py) -pred_file_path: null # save predictions to this file -checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training -checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null # model configuration file, only used for PTL checkpoint loading -batch_size: 8 - diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_tuning_config.yaml deleted file mode 100644 index 5fc411f61b11..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_tuning_config.yaml +++ /dev/null @@ -1,135 +0,0 @@ -name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_adapter_dim${model.adapter_tuning.adapter_dim} - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 - max_steps: 100 - log_every_n_steps: 10 - val_check_interval: 20 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 1 - mode: min - save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below - filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}" - model_parallel_size: ${model.tensor_model_parallel_size} - save_best_model: True - -model: - seed: 1234 - nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts - encoder_seq_length: 2048 - gradient_as_bucket_view: false - tensor_model_parallel_size: 1 - pipeline_model_parallel_size: 1 - global_batch_size: 8 - micro_batch_size: 4 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - report_validation_metric: False - validation_metric: accuracy - - restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required - existing_tasks: [] - new_tasks: ["squad"] - - task_templates: - - taskname: "boolq" # The task name - prompt_template: "Passage: {passage} \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|> - total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time. - virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens - truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped. - answer_field: "answer" - - - taskname: "intent_and_slot" - prompt_template: "intent options: {intent_options} slot options: {slot_options} {utterance} \nintent: {intent} \nslot: {slot}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - - - taskname: "rte" - prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "answer" - - - taskname: "squad" - prompt_template: "context: {context} question: {question} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "answer" - - - taskname: "arc-challenge" - prompt_template: "question: {question} choices: {choices} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "answer" - - - taskname: "xsum" - prompt_template: "{source} Summary: {target}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "target" - - adapter_tuning: - type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' - adapter_dim: 50 - adapter_dropout: 0.1 - norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used. - column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal - row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal - norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] - - data: - train_ds: ??? - validation_ds: ??? - add_eos: True - shuffle: True - num_workers: 8 - pin_memory: True - - - optim: - name: fused_adam - lr: 1e-3 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - constant_steps: 0 - min_lr: 0.0 - monitor: val_loss - reduce_on_plateau: false - - diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_inference.yaml deleted file mode 100644 index fcd92a401970..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_inference.yaml +++ /dev/null @@ -1,37 +0,0 @@ -inference: - greedy: True # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 1.0 # sampling temperature - add_BOS: True # add the bos token at the begining of the prompt - tokens_to_generate: 30 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -data: - test_ds: ??? - num_workers: 1 - global_batch_size: 4 - micro_batch_size: 4 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file -adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py) -pred_file_path: null # save predictions to this file -checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training -checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null # model configuration file, only used for PTL checkpoint loading -batch_size: 8 - diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_tuning_config.yaml deleted file mode 100644 index 5c12993bd12e..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_tuning_config.yaml +++ /dev/null @@ -1,112 +0,0 @@ -name: ia3_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs} - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 - max_steps: 100 - log_every_n_steps: 10 - val_check_interval: 20 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 1 - mode: min - save_nemo_on_train_end: True - filename: "megatron_t5_ia3_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}" - model_parallel_size: ${model.tensor_model_parallel_size} - save_best_model: True - -model: - seed: 1234 - nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts - encoder_seq_length: 2048 - gradient_as_bucket_view: false - tensor_model_parallel_size: 1 - pipeline_model_parallel_size: 1 - global_batch_size: 4 - micro_batch_size: 2 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - report_validation_metric: False - - restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required - existing_tasks: [] - new_tasks: ["squad"] - - task_templates: - - taskname: "squad" - prompt_template: "context: {context} question: {question} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "answer" - - - taskname: "arc-challenge" - prompt_template: "question: {question} choices: {choices} answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "answer" - - - taskname: "xsum" - prompt_template: "{source} Summary: {target}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "target" - - - taskname: "rte" - prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" - total_virtual_tokens: 0 - virtual_token_splits: [] - truncate_field: null - answer_field: "answer" - - data: - train_ds: ["data/squad_train.jsonl"] - validation_ds: ["data/squad_val.jsonl"] - add_eos: True - shuffle: True - num_workers: 8 - pin_memory: True - - - optim: - name: fused_adam - lr: 1e-3 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - constant_steps: 0 - min_lr: 0.0 - monitor: val_loss - reduce_on_plateau: false - - diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml deleted file mode 100644 index 008241d19389..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml +++ /dev/null @@ -1,36 +0,0 @@ -inference: - greedy: True # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 1.0 # sampling temperature - add_BOS: True # add the bos token at the begining of the prompt - tokens_to_generate: 30 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -data: - test_ds: ??? - num_workers: 1 - global_batch_size: 4 - micro_batch_size: 4 - -tensor_model_parallel_size: -1 -pipeline_model_parallel_size: -1 -pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file -adapter_model_file: ??? # .nemo file saved during training (using megatron_t5_lora_tuning.py) -pred_file_path: null # save predictions to this file -checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training -checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null # model configuration file, only used for PTL checkpoint loading -batch_size: 8 diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml deleted file mode 100644 index 8f46f1f3720d..000000000000 --- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml +++ /dev/null @@ -1,99 +0,0 @@ -name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_lora_dim${model.lora_tuning.kqv_adapter_dim} - -trainer: - devices: 1 - accelerator: gpu - num_nodes: 1 - precision: 16 - logger: False - enable_checkpointing: False - replace_sampler_ddp: False - max_epochs: 10 - max_steps: 1000 - log_every_n_steps: 1 - val_check_interval: 2 - accumulate_grad_batches: 1 - gradient_clip_val: 0.0 - benchmark: False - -exp_manager: - explicit_log_dir: null - exp_dir: nemo-lora-mt0-tr - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_from_checkpoint: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 1 - mode: min - save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below - filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}" - model_parallel_size: ${model.tensor_model_parallel_size} - save_best_model: True - -model: - seed: 1234 - nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved - virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts - encoder_seq_length: 2048 - gradient_as_bucket_view: false - tensor_model_parallel_size: 1 - pipeline_model_parallel_size: 1 - global_batch_size: 4 - micro_batch_size: 4 - validation_global_batch_size: ${model.global_batch_size} - validation_micro_batch_size: ${model.micro_batch_size} - validation_drop_last: False - report_validation_metric: False - validation_metric: accuracy - - restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with - language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required - existing_tasks: [] - new_tasks: ["taskname"] - - task_templates: - - taskname: "taskname" # The task name - prompt_template: "{prompt} {completion}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|> - total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time. - virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens - truncate_field: "prompt" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped. - answer_field: "completion" - - lora_tuning: - kqv_adapter_dim: 24 - kv_adapter_dim: 16 - q_adapter_dim: 8 - adapter_dropout: 0.1 - column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal - row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal - - data: - train_ds: ??? - validation_ds: ??? - shuffle: True - num_workers: 0 - pin_memory: True - add_eos: True - - - optim: - name: fused_adam - lr: 1e-3 - weight_decay: 0.01 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 50 - constant_steps: 0 - min_lr: 0.0 - monitor: val_loss - reduce_on_plateau: false diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py deleted file mode 100644 index 5fd07e85ce2d..000000000000 --- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import torch.multiprocessing as mp -from megatron.core import parallel_state -from omegaconf import OmegaConf -from omegaconf.omegaconf import open_dict -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5AdapterLearningModel -from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy -from nemo.core.config import hydra_runner -from nemo.utils.app_state import AppState -from nemo.utils.decorators import deprecated - -mp.set_start_method("spawn", force=True) - -""" -This is the script to run an Adapter Tuned GPT Model for text generation. - -Usage: - Assume the model has TP=1, PP=1 in the following use cases. - a. run greedy inference using a base gpt nemo file, and an adapter nemo file: - python megatron_gpt_ia3_eval.py \ - gpt_model_file=PATH TO GPT MODEL NEMO FILE \ - adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \ - data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \ - pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS -""" - -if not torch.cuda.is_available(): - raise EnvironmentError("GPU is needed for the inference") - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference") -def main(cfg) -> None: - - # trainer required for restoring model parallel models - trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) - - if ( - cfg.tensor_model_parallel_size < 0 - or cfg.pipeline_model_parallel_size < 0 - or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 - ): - model_config = MegatronT5AdapterLearningModel.restore_from( - restore_path=cfg.language_model_path, trainer=trainer, return_config=True, - ) - - with open_dict(cfg): - cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) - cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) - cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) - - app_state = AppState() - if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: - app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - ( - app_state.tensor_model_parallel_rank, - app_state.pipeline_model_parallel_rank, - app_state.model_parallel_size, - app_state.data_parallel_size, - app_state.pipeline_model_parallel_split_rank, - app_state.virtual_pipeline_model_parallel_rank, - ) = fake_initialize_model_parallel( - world_size=app_state.model_parallel_size, - rank=trainer.global_rank, - tensor_model_parallel_size_=cfg.tensor_model_parallel_size, - pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, - ) - - # Load an adapter model, must be provided in config - if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None: - # Update frozen GPT model path in case it has changed - adapter_tuning_cfg = MegatronT5AdapterLearningModel.restore_from( - cfg.adapter_model_file, trainer=trainer, return_config=True - ) - with open_dict(adapter_tuning_cfg): - adapter_tuning_cfg.language_model_path = cfg.language_model_path - adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path - adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size - adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size - - # Now load prompt learning model with frozen gpt model base - model = MegatronT5AdapterLearningModel.restore_from( - restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg - ) - - # Or load regular GPT model - else: - raise NotImplementedError( - "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path" - ) - - # check whether the DDP is initialized - if parallel_state.is_unitialized(): - - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - model.freeze() - - # Have to turn off activations_checkpoint_method for inference - try: - model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - try: - model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - test_ds, test_dl = model.build_virtual_prompt_dataset( - dataset_paths=cfg.data.test_ds, - batch_size=cfg.data.global_batch_size, - for_train=False, - drop_last=False, - shuffle=False, - num_workers=cfg.data.num_workers, - pin_memory=True, - ) - - config = OmegaConf.to_container(cfg.inference) - model.set_inference_config(config) - response = trainer.predict(model, test_dl) - print("***************************") - if cfg.pred_file_path is not None: - with open(cfg.pred_file_path, "w", encoding="utf-8") as f: - for batch in response: - for inp, pred in zip(batch['input_text'], batch['preds_text']): - inp = ' '.join(inp.split('\n')) - pred = ' '.join(pred.split('\n')) - f.write(f'{inp} {pred}\n') - print("predictions saved to {}".format(cfg.pred_file_path)) - else: - print(response) - print("***************************") - - -if __name__ == '__main__': - main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py deleted file mode 100644 index 96a8cba64863..000000000000 --- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment - -from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5AdapterLearningModel -from nemo.collections.nlp.parts.nlp_overrides import ( - CustomProgressBar, - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.decorators import deprecated -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - -""" -This is the script to train an Adapter infused GPT Model for text generation. -A base GPT Model is required as a starting point. This script will then insert -Adapters into each Transformer layer and will train/update only these adapters -during training. The base GPT Model weights will remain frozen. - -During training this script will only save the newly trained Adapter weights -in checkpoints. At the end of training a .nemo file of Adapter weights will -be saved. - -Usage: - Assuming the base model is a 125m GPT Model, with TP=1, PP=1: - a. run a training run for a base gpt nemo file: - python megatron_gpt_adapter_tuning.py \ - "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", - "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", - model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" - name="NAME OF TRAINING RUN" - exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", - trainer.max_epochs=2 -""" - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_tuning_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) - with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, - ) - if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: - scaler = None - if cfg.trainer.precision in [16, '16', '16-mixed']: - scaler = GradScaler( - init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), - growth_interval=cfg.model.get('native_amp_growth_interval', 1000), - hysteresis=cfg.model.get('hysteresis', 2), - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugin_precision = '16-mixed' - else: - plugin_precision = 'bf16-mixed' - if megatron_amp_O2 and not with_distributed_adam: - plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - else: - plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()]) - exp_manager(trainer, cfg.exp_manager) - - # load existing or init new soft prompt GPT model - if cfg.model.get("restore_path", None): - model = MegatronT5AdapterLearningModel.restore_from( - cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector() - ) - else: - model = MegatronT5AdapterLearningModel(cfg.model, trainer=trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py deleted file mode 100644 index cc9dfef059b8..000000000000 --- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import torch.multiprocessing as mp -from megatron.core import parallel_state -from omegaconf import OmegaConf -from omegaconf.omegaconf import open_dict -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5InfusedAdapterModel -from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy -from nemo.core.config import hydra_runner -from nemo.utils.app_state import AppState -from nemo.utils.decorators import deprecated - -mp.set_start_method("spawn", force=True) - -""" -This is the script to run an Adapter Tuned GPT Model for text generation. - -Usage: - Assume the model has TP=1, PP=1 in the following use cases. - a. run greedy inference using a base gpt nemo file, and an adapter nemo file: - python megatron_gpt_ia3_eval.py \ - gpt_model_file=PATH TO GPT MODEL NEMO FILE \ - adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \ - data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \ - pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS -""" - -if not torch.cuda.is_available(): - raise EnvironmentError("GPU is needed for the inference") - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_ia3_inference") -def main(cfg) -> None: - - # trainer required for restoring model parallel models - trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) - - if ( - cfg.tensor_model_parallel_size < 0 - or cfg.pipeline_model_parallel_size < 0 - or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 - ): - model_config = MegatronT5InfusedAdapterModel.restore_from( - restore_path=cfg.language_model_path, trainer=trainer, return_config=True, - ) - - with open_dict(cfg): - cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) - cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) - cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) - - app_state = AppState() - if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: - app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - ( - app_state.tensor_model_parallel_rank, - app_state.pipeline_model_parallel_rank, - app_state.model_parallel_size, - app_state.data_parallel_size, - app_state.pipeline_model_parallel_split_rank, - app_state.virtual_pipeline_model_parallel_rank, - ) = fake_initialize_model_parallel( - world_size=app_state.model_parallel_size, - rank=trainer.global_rank, - tensor_model_parallel_size_=cfg.tensor_model_parallel_size, - pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, - ) - - # Load an adapter model, must be provided in config - if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None: - # Update frozen GPT model path in case it has changed - ia3_tuning_cfg = MegatronT5InfusedAdapterModel.restore_from( - cfg.adapter_model_file, trainer=trainer, return_config=True - ) - with open_dict(ia3_tuning_cfg): - ia3_tuning_cfg.language_model_path = cfg.language_model_path - ia3_tuning_cfg.pretrained_language_model_path = cfg.language_model_path - ia3_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size - ia3_tuning_cfg.global_batch_size = cfg.data.global_batch_size - - # Now load prompt learning model with frozen gpt model base - model = MegatronT5InfusedAdapterModel.restore_from( - restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=ia3_tuning_cfg - ) - - # Or load regular GPT model - else: - raise NotImplementedError( - "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path" - ) - - # check whether the DDP is initialized - if parallel_state.is_unitialized(): - - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - model.freeze() - - # Have to turn off activations_checkpoint_method for inference - try: - model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - try: - model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - test_ds, test_dl = model.build_virtual_prompt_dataset( - dataset_paths=cfg.data.test_ds, - batch_size=cfg.data.global_batch_size, - for_train=False, - drop_last=False, - shuffle=False, - num_workers=cfg.data.num_workers, - pin_memory=True, - ) - - config = OmegaConf.to_container(cfg.inference) - model.set_inference_config(config) - response = trainer.predict(model, test_dl) - print("***************************") - if cfg.pred_file_path is not None: - with open(cfg.pred_file_path, "w", encoding="utf-8") as f: - for batch in response: - for inp, pred in zip(batch['input_text'], batch['preds_text']): - inp = ' '.join(inp.split('\n')) - pred = ' '.join(pred.split('\n')) - f.write(f'{inp} {pred}\n') - print("predictions saved to {}".format(cfg.pred_file_path)) - else: - print(response) - print("***************************") - - -if __name__ == '__main__': - main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py deleted file mode 100644 index 1edc87a416a4..000000000000 --- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment - -from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5InfusedAdapterModel -from nemo.collections.nlp.parts.nlp_overrides import ( - CustomProgressBar, - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.decorators import deprecated -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - -""" -This is the script to train an Adapter infused GPT Model for text generation. -A base GPT Model is required as a starting point. This script will then insert -Adapters into each Transformer layer and will train/update only these adapters -during training. The base GPT Model weights will remain frozen. - -During training this script will only save the newly trained Adapter weights -in checkpoints. At the end of training a .nemo file of Adapter weights will -be saved. - -Usage: - Assuming the base model is a 125m GPT Model, with TP=1, PP=1: - a. run a training run for a base gpt nemo file: - python megatron_gpt_adapter_tuning.py \ - "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", - "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", - model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" - name="NAME OF TRAINING RUN" - exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", - trainer.max_epochs=2 -""" - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_ia3_tuning_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) - with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, - ) - if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: - scaler = None - if cfg.trainer.precision in [16, '16', '16-mixed']: - scaler = GradScaler( - init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), - growth_interval=cfg.model.get('native_amp_growth_interval', 1000), - hysteresis=cfg.model.get('hysteresis', 2), - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugin_precision = '16-mixed' - else: - plugin_precision = 'bf16-mixed' - - if megatron_amp_O2 and not with_distributed_adam: - plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - else: - plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()]) - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.pretrained_language_model_path = cfg.model.language_model_path - - # load existing or init new soft prompt GPT model - if cfg.model.get("restore_path", None): - model = MegatronT5InfusedAdapterModel.restore_from( - cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector() - ) - else: - model = MegatronT5InfusedAdapterModel(cfg.model, trainer=trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py deleted file mode 100644 index 38032d06a8c8..000000000000 --- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import torch.multiprocessing as mp -from megatron.core import parallel_state -from omegaconf import OmegaConf -from omegaconf.omegaconf import open_dict -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel -from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy -from nemo.core.config import hydra_runner -from nemo.utils.app_state import AppState -from nemo.utils.decorators import deprecated - -mp.set_start_method("spawn", force=True) - -""" -This is the script to run an Adapter Tuned GPT Model for text generation. - -Usage: - Assume the model has TP=1, PP=1 in the following use cases. - a. run greedy inference using a base gpt nemo file, and an adapter nemo file: - python megatron_gpt_ia3_eval.py \ - gpt_model_file=PATH TO GPT MODEL NEMO FILE \ - adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \ - data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \ - pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS -""" - -if not torch.cuda.is_available(): - raise EnvironmentError("GPU is needed for the inference") - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference") -def main(cfg) -> None: - - # trainer required for restoring model parallel models - trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) - - if ( - cfg.tensor_model_parallel_size < 0 - or cfg.pipeline_model_parallel_size < 0 - or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 - ): - model_config = MegatronT5LoraModel.restore_from( - restore_path=cfg.language_model_path, trainer=trainer, return_config=True, - ) - - with open_dict(cfg): - cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) - cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) - cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) - - app_state = AppState() - if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: - app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - ( - app_state.tensor_model_parallel_rank, - app_state.pipeline_model_parallel_rank, - app_state.model_parallel_size, - app_state.data_parallel_size, - app_state.pipeline_model_parallel_split_rank, - app_state.virtual_pipeline_model_parallel_rank, - ) = fake_initialize_model_parallel( - world_size=app_state.model_parallel_size, - rank=trainer.global_rank, - tensor_model_parallel_size_=cfg.tensor_model_parallel_size, - pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, - ) - - # Load an adapter model, must be provided in config - if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None: - # Update frozen GPT model path in case it has changed - adapter_tuning_cfg = MegatronT5LoraModel.restore_from( - cfg.adapter_model_file, trainer=trainer, return_config=True - ) - with open_dict(adapter_tuning_cfg): - adapter_tuning_cfg.language_model_path = cfg.language_model_path - adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path - adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size - adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size - - # Now load prompt learning model with frozen gpt model base - model = MegatronT5LoraModel.restore_from( - restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg - ) - - # Or load regular GPT model - else: - raise NotImplementedError( - "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path" - ) - - # check whether the DDP is initialized - if parallel_state.is_unitialized(): - - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - model.freeze() - - # Have to turn off activations_checkpoint_method for inference - try: - model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - try: - model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - test_ds, test_dl = model.build_virtual_prompt_dataset( - dataset_paths=cfg.data.test_ds, - batch_size=cfg.data.global_batch_size, - for_train=False, - drop_last=False, - shuffle=False, - num_workers=cfg.data.num_workers, - pin_memory=True, - ) - - config = OmegaConf.to_container(cfg.inference) - model.set_inference_config(config) - response = trainer.predict(model, test_dl) - print("***************************") - if cfg.pred_file_path is not None: - with open(cfg.pred_file_path, "w", encoding="utf-8") as f: - for batch in response: - for inp, pred in zip(batch['input_text'], batch['preds_text']): - inp = ' '.join(inp.split('\n')) - pred = ' '.join(pred.split('\n')) - f.write(f'{inp} {pred}\n') - print("predictions saved to {}".format(cfg.pred_file_path)) - else: - print(response) - print("***************************") - - -if __name__ == '__main__': - main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py deleted file mode 100644 index 7178bf8145ba..000000000000 --- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment - -from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel -from nemo.collections.nlp.parts.nlp_overrides import ( - CustomProgressBar, - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.decorators import deprecated -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - -""" -This is the script to train an Adapter infused GPT Model for text generation. -A base GPT Model is required as a starting point. This script will then insert -Adapters into each Transformer layer and will train/update only these adapters -during training. The base GPT Model weights will remain frozen. - -During training this script will only save the newly trained Adapter weights -in checkpoints. At the end of training a .nemo file of Adapter weights will -be saved. - -Usage: - Assuming the base model is a 125m GPT Model, with TP=1, PP=1: - a. run a training run for a base gpt nemo file: - python megatron_gpt_adapter_tuning.py \ - "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", - "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", - model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" - name="NAME OF TRAINING RUN" - exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", - trainer.max_epochs=2 -""" - - -@deprecated( - explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." - "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." -) -@hydra_runner(config_path="conf", config_name="megatron_t5_lora_tuning_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) - with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, - ) - if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: - scaler = None - if cfg.trainer.precision in [16, '16', '16-mixed']: - scaler = GradScaler( - init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), - growth_interval=cfg.model.get('native_amp_growth_interval', 1000), - hysteresis=cfg.model.get('hysteresis', 2), - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugin_precision = '16-mixed' - else: - plugin_precision = 'bf16-mixed' - if megatron_amp_O2 and not with_distributed_adam: - plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - else: - plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()]) - exp_manager(trainer, cfg.exp_manager) - - # load existing or init new soft prompt GPT model - if cfg.model.get("restore_path", None): - model = MegatronT5LoraModel.restore_from( - cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector() - ) - else: - model = MegatronT5LoraModel(cfg.model, trainer=trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main()