NVIDIA · Davood-M · Jun 8, 2023 · May 9, 2023 · May 9, 2023 · May 9, 2023
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml
@@ -0,0 +1,36 @@
+inference:
+  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
+  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
+  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+  temperature: 1.0 # sampling temperature
+  add_BOS: True # add the bos token at the begining of the prompt
+  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
+  all_probs: False  # whether return the log prob for all the tokens in vocab
+  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
+  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
+  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
+
+
+trainer:
+  devices: 1
+  num_nodes: 1
+  accelerator: gpu
+  logger: False # logger provided by exp_manager
+  precision: 16 # 16, 32, or bf16
+
+data:
+  test_ds: ???
+  num_workers: 1
+  global_batch_size: 4
+  micro_batch_size: 4
+
+tensor_model_parallel_size: -1
+pipeline_model_parallel_size: -1
+pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
+language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file
+adapter_model_file: ??? # .nemo file saved during training (using megatron_t5_lora_tuning.py)
+pred_file_path: null # save predictions to this file
+checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
+checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
+hparams_file: null # model configuration file, only used for PTL checkpoint loading
+batch_size: 8 
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml
@@ -0,0 +1,99 @@
+name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_lora_dim${model.lora_tuning.kqv_adapter_dim}
+
+trainer:
+  devices: 1
+  accelerator: gpu
+  num_nodes: 1
+  precision: 16
+  logger: False 
+  enable_checkpointing: False
+  replace_sampler_ddp: False
+  max_epochs: 10
+  max_steps: 1000
+  log_every_n_steps: 1
+  val_check_interval: 2
+  accumulate_grad_batches: 1
+  gradient_clip_val: 0.0
+  resume_from_checkpoint: null 
+  benchmark: False
+
+exp_manager:
+  explicit_log_dir: null
+  exp_dir: nemo-lora-mt0-tr
+  name: ${name}
+  create_wandb_logger: False
+  wandb_logger_kwargs:
+    project: null
+    name: null
+  resume_if_exists: True
+  resume_ignore_no_checkpoint: True
+  create_checkpoint_callback: True
+  checkpoint_callback_params:
+    monitor: reduced_train_loss
+    save_top_k: 1
+    mode: min
+    save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below
+    filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
+    model_parallel_size: ${model.tensor_model_parallel_size}
+    save_best_model: True
+
+model:
+  seed: 1234
+  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
+  virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts
+  encoder_seq_length: 2048
+  gradient_as_bucket_view: false
+  tensor_model_parallel_size: 1
+  pipeline_model_parallel_size: 1 
+  global_batch_size: 4
+  micro_batch_size: 4
+  validation_global_batch_size: ${model.global_batch_size}
+  validation_micro_batch_size: ${model.micro_batch_size}
+  validation_drop_last: False
+  report_validation_metric: False
+  validation_metric: accuracy
+
+  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
+  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
+  existing_tasks: []
+  new_tasks: ["taskname"] 
+
+  task_templates: 
+  - taskname: "taskname" # The task name
+    prompt_template: "{prompt} {completion}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
+    total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
+    virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
+    truncate_field: "prompt" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
+    answer_field: "completion"
+
+  lora_tuning:
+      kqv_adapter_dim: 24
+      kv_adapter_dim: 16
+      q_adapter_dim: 8
+      adapter_dropout: 0.1
+      column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal
+      row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal
+
+  data:
+    train_ds: ???
+    validation_ds: ???
+    shuffle: True
+    num_workers: 0
+    pin_memory: True
+    add_eos: True
+
+
+  optim:
+    name: fused_adam
+    lr: 1e-3
+    weight_decay: 0.01 
+    betas: 
+    - 0.9
+    - 0.98
+    sched:
+      name: CosineAnnealing
+      warmup_steps: 50
+      constant_steps: 0
+      min_lr: 0.0
+      monitor: val_loss
+      reduce_on_plateau: false
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import torch.multiprocessing as mp
+from megatron.core import parallel_state
+from omegaconf import OmegaConf
+from omegaconf.omegaconf import open_dict
+from pytorch_lightning.trainer.trainer import Trainer
+
+from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel
+from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
+from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
+from nemo.core.config import hydra_runner
+from nemo.utils.app_state import AppState
+
+mp.set_start_method("spawn", force=True)
+
+"""
+This is the script to run an Adapter Tuned GPT Model for text generation.
+
+Usage:
+    Assume the model has TP=1, PP=1 in the following use cases.
+    a. run greedy inference using a base gpt nemo file, and an adapter nemo file:
+        python megatron_gpt_ia3_eval.py \
+            gpt_model_file=PATH TO GPT MODEL NEMO FILE \
+            adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \
+            data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \
+            pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS
+"""
+
+if not torch.cuda.is_available():
+    raise EnvironmentError("GPU is needed for the inference")
+
+
+@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference")
+def main(cfg) -> None:
+
+    # trainer required for restoring model parallel models
+    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
+
+    if (
+        cfg.tensor_model_parallel_size < 0
+        or cfg.pipeline_model_parallel_size < 0
+        or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
+    ):
+        model_config = MegatronT5LoraModel.restore_from(
+            restore_path=cfg.language_model_path, trainer=trainer, return_config=True,
+        )
+
+        with open_dict(cfg):
+            cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
+            cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1)
+            cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0)
+
+    app_state = AppState()
+    if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
+        app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
+        (
+            app_state.tensor_model_parallel_rank,
+            app_state.pipeline_model_parallel_rank,
+            app_state.model_parallel_size,
+            app_state.data_parallel_size,
+            app_state.pipeline_model_parallel_split_rank,
+            app_state.virtual_pipeline_model_parallel_rank,
+        ) = fake_initialize_model_parallel(
+            world_size=app_state.model_parallel_size,
+            rank=trainer.global_rank,
+            tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
+            pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
+            pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank,
+        )
+
+    # Load an adapter model,  must be provided in config
+    if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None:
+        # Update frozen GPT model path in case it has changed
+        adapter_tuning_cfg = MegatronT5LoraModel.restore_from(
+            cfg.adapter_model_file, trainer=trainer, return_config=True
+        )
+        with open_dict(adapter_tuning_cfg):
+            adapter_tuning_cfg.language_model_path = cfg.language_model_path
+            adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path
+            adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size
+            adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size
+
+        # Now load prompt learning model with frozen gpt model base
+        model = MegatronT5LoraModel.restore_from(
+            restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg
+        )
+
+    # Or load regular GPT model
+    else:
+        raise NotImplementedError(
+            "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path"
+        )
+
+    # check whether the DDP is initialized
+    if parallel_state.is_unitialized():
+
+        def dummy():
+            return
+
+        if trainer.strategy.launcher is not None:
+            trainer.strategy.launcher.launch(dummy, trainer=trainer)
+        trainer.strategy.setup_environment()
+
+    model.freeze()
+
+    # Have to turn off activations_checkpoint_method for inference
+    try:
+        model.model.language_model.encoder.activations_checkpoint_method = None
+    except AttributeError:
+        pass
+
+    try:
+        model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None
+    except AttributeError:
+        pass
+
+    test_ds, test_dl = model.build_virtual_prompt_dataset(
+        dataset_paths=cfg.data.test_ds,
+        batch_size=cfg.data.global_batch_size,
+        for_train=False,
+        drop_last=False,
+        shuffle=False,
+        num_workers=cfg.data.num_workers,
+        pin_memory=True,
+    )
+
+    config = OmegaConf.to_container(cfg.inference)
+    model.set_inference_config(config)
+    response = trainer.predict(model, test_dl)
+    print("***************************")
+    if cfg.pred_file_path is not None:
+        with open(cfg.pred_file_path, "w", encoding="utf-8") as f:
+            for batch in response:
+                for inp, pred in zip(batch['input_text'], batch['preds_text']):
+                    inp = ' '.join(inp.split('\n'))
+                    pred = ' '.join(pred.split('\n'))
+                    f.write(f'{inp} {pred}\n')
+        print("predictions saved to {}".format(cfg.pred_file_path))
+    else:
+        print(response)
+    print("***************************")
+
+
+if __name__ == '__main__':
+    main()  # noqa pylint: disable=no-value-for-parameter