diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_config.yaml
deleted file mode 100644
index a1c5f774cd11..000000000000
--- a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_config.yaml
+++ /dev/null
@@ -1,173 +0,0 @@
-name: megatron_virtual_prompt_gpt
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False # logger provided by exp_manager
-  enable_checkpointing: False
-  use_distributed_sampler: False
-  max_epochs: 3 # min 25 recommended
-  max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches
-  log_every_n_steps: 10 # frequency with which training steps are logged 
-  val_check_interval: 1.0 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch
-  gradient_clip_val: 1.0
-  benchmark: False
-  
-
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: null
-  name: ${name}
-  create_wandb_logger: False
-  wandb_logger_kwargs:
-    project: null
-    name: null
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: val_loss
-    save_top_k: 2
-    mode: min
-    save_nemo_on_train_end: True 
-    filename: 'megatron_gpt_prompt_tune--{val_loss:.3f}-{step}'
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    save_best_model: True
-  create_early_stopping_callback: True
-  early_stopping_callback_params:
-    monitor: "val_loss"
-    mode: "min"
-    min_delta: 0.001
-    patience: 10
-    verbose: True
-    strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training.
-  
-
-model:
-  seed: 1234
-  nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: 'p-tuning' # one of 'prompt-tuning', 'p-tuning', or 'inference'
-  tensor_model_parallel_size: 1 # intra-layer model parallelism
-  pipeline_model_parallel_size: 1 # inter-layer model parallelism
-  global_batch_size: 8
-  micro_batch_size: 4
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-  report_validation_metric: False
-  validation_metric: 'accuracy'
-
-  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the GPT language model .nemo file, always required
-  save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. 
-  existing_tasks: ['boolq', 'intent_and_slot'] # List of tasks the model has already been p-tuned/prompt-tuned for, needed when a restore path is given
-  new_tasks: ['rte'] # List of new tasknames to be prompt-tuned
-  
-
-
-  ## Sequence Parallelism
-  # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially
-  # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details.
-  sequence_parallel: False
-
-  ## Activation Checkpoint 
-  activations_checkpoint_granularity: null # 'selective' or 'full' 
-  activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective'
-  # 'uniform' divides the total number of transformer layers and checkpoints the input activation
-  # of each chunk at the specified granularity
-  # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity
-  activations_checkpoint_num_layers: null # not used with 'selective'
-
-  task_templates: # Add more/replace tasks as needed, these are just examples
-  - taskname: "boolq" # The task name
-    prompt_template: "<|VIRTUAL_PROMPT_0|> Passage: {passage} <|VIRTUAL_PROMPT_1|> \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
-    total_virtual_tokens: 30 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
-    virtual_token_splits: [20, 10] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
-    truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
-    answer_only_loss: True 
-    answer_field: "answer"
-
-  - taskname: "intent_and_slot"
-    prompt_template: "<|VIRTUAL_PROMPT_0|> intent options: {intent_options} <|VIRTUAL_PROMPT_1|> slot options: {slot_options} <|VIRTUAL_PROMPT_2|> {utterance} \nintent: {intent} \nslot: {slot}"
-    total_virtual_tokens: 30
-    answer_only_loss: False 
-    virtual_token_splits: [15, 10, 5]
-    truncate_field: null
-
-  - taskname: "rte" 
-    prompt_template: "<|VIRTUAL_PROMPT_0|>{premise}\n{hypothesis}\nAnswer: {answer}" 
-    total_virtual_tokens: 9 
-    virtual_token_splits: [9] 
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-  
-  - taskname: "squad" 
-    prompt_template: "<|VIRTUAL_PROMPT_0|> context: {context} question: {question} answer: {answer}" 
-    total_virtual_tokens: 10
-    virtual_token_splits: [10]
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "taskname"
-    prompt_template: "<|VIRTUAL_PROMPT_0|> {prompt} {completion}"
-    total_virtual_tokens: 100
-    virtual_token_splits: [100]
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "completion"
-
-  prompt_tuning: # Prompt tunin specific params
-    new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks
-    new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random
-
-  p_tuning: # P-tuning specific params
-    encoder_type: "tpmlp" # ['tpmlp', 'lstm', 'biglstm', 'mlp'] 
-    dropout: 0.0
-    num_layers: 2  # number of layers for MLP or LSTM layers. Note, it has no effect for tpmlp currently as it always assumes it is two layers.
-    encoder_hidden: 2048 # encoder hidden for biglstm and tpmlp
-    init_std: 0.023  # init std for tpmlp layers
-
-  data:
-    train_ds: [data/rte_train.jsonl,]
-    validation_ds: [data/rte_val.jsonl,]
-    add_eos: True
-    shuffle: True
-    num_workers: 8
-    pin_memory: True
-    train_cache_data_path: null  # the path to the train cache data 
-    validation_cache_data_path: null  # the path to the validation cache data 
-    test_cache_data_path: null  # the path to the test cache data 
-    load_cache: False  # whether to load from the cache data
-    max_seq_length: 1024  # filter out training and validation examples longer than 1024 tokens. Set to None will default to model's encoder length.
-    min_seq_length: 1  # filter out training and validation examples less than 1 token long.
-
-
-  optim:
-    name: fused_adam
-    lr: 1e-4
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      min_lr: 0.0 # min_lr must be 0.0 for prompt learning when pipeline parallel > 1
-      constant_steps: 0 # Constant steps should also be 0 when min_lr=0
-      monitor: val_loss
-      reduce_on_plateau: false
-
-  # required for reporting validation metrics
-  inference:
-    greedy: False
-    top_k: 0
-    top_p: 0.9
-    temperature: 1.0
-    tokens_to_generate: 30
-    repetition_penalty: 1.2
-    min_tokens_to_generate: 0
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml
deleted file mode 100644
index 33ca3f06ddfe..000000000000
--- a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-inference:
-  greedy: False # Whether or not to use sampling ; use greedy decoding otherwise
-  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
-  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
-  temperature: 1.0 # sampling temperature
-  add_BOS: True # add the bos token at the begining of the prompt
-  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
-  all_probs: False  # whether return the log prob for all the tokens in vocab
-  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
-  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
-  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-  batch_size: 1
-
-
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-gpt_model_file: null  # GPT nemo file path
-virtual_prompt_model_file: ??? # path to a MegatronGPTPromptLearningModel model if you want to use soft prompts
-pred_file_path: ??? # Path will model predictions will be written
-max_seq_length: 8192 # this will filter out inputs whose length is longer than the set value form the generation process.
-data_paths: # paths to .jsonl files you want to perform inference on
-num_workers: 8
- 
\ No newline at end of file
diff --git a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning.yaml b/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning.yaml
deleted file mode 100644
index b966ad0eb631..000000000000
--- a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning.yaml
+++ /dev/null
@@ -1,108 +0,0 @@
-name: p_tuning_squad_t5
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False 
-  enable_checkpointing: False
-  use_distributed_sampler: False
-  max_epochs: 10
-  max_steps: -1
-  log_every_n_steps: 10
-  val_check_interval: 1.0
-  gradient_clip_val: 1.0
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: null
-  name: ${name}
-  create_wandb_logger: False
-  wandb_logger_kwargs:
-    project: PromptLearning-T5
-    name: ${name}
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: val_loss
-    save_top_k: 2
-    mode: min
-    save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below
-    filename: "megatron_t5_prompt_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    save_best_model: True
-  create_early_stopping_callback: True
-  early_stopping_callback_params:
-    monitor: "val_loss"
-    mode: "min"
-    min_delta: 0.001
-    patience: 10
-    verbose: True
-
-model:
-  seed: 1234
-  nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: "p-tuning" # one of 'prompt-tuning', 'p-tuning', or 'inference'
-  tensor_model_parallel_size: 1
-  pipeline_model_parallel_size: 1 
-  global_batch_size: 8 
-  micro_batch_size: 8 # micro batch size should equal global batch size when pipeline parallel = 1
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-  report_validation_metric: False
-  validation_metric: accuracy
-  
-  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
-  save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. 
-  existing_tasks: []
-  new_tasks: ["squad"] 
-
-
-  task_templates: 
-  - taskname: "squad" 
-    prompt_template: "<|VIRTUAL_PROMPT_0|> {context} {question} {answer}" 
-    total_virtual_tokens: 100
-    virtual_token_splits: [100] 
-    truncate_field: context
-    answer_field: answer
-
-  p_tuning: # P-tuning specific params
-      encoder_type: "mlp" # Either "mlp" or "lstm", mlp is default
-      num_layers: 2 # 2 recommended for MLP, 1 recommended for LSTM, must be at least 2 for mlp
-      dropout: 0.0
-
-  prompt_tuning: # Prompt tunin specific params
-    new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks
-    new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random
-
-  data:
-    train_ds: ["data/squad_train.jsonl"]
-    validation_ds: ["data/squad_val.jsonl"]
-    add_eos: true
-    add_bos: false
-    decoder_starts_with_pad: False
-    add_eos_to_decoder_output: True
-    add_sentinel_to_input: True
-    ul2_prompt_token: null # <extra_id_s>, <extra_id_r>, <extra_id_x>
-    shuffle: true
-    num_workers: 4
-    pin_memory: true
-
-  optim:
-    name: fused_adam
-    lr: 1e-4
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      constant_steps: 0
-      min_lr: 0.0
-      monitor: val_loss
-      reduce_on_plateau: false
\ No newline at end of file
diff --git a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning_inference.yaml b/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning_inference.yaml
deleted file mode 100644
index 0b5929a36a62..000000000000
--- a/examples/nlp/language_modeling/conf/megatron_t5_prompt_learning_inference.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-data:
-  test_ds: ???
-  num_workers: 1
-  global_batch_size: 8
-  micro_batch_size: 8
-  
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-language_model_path: ???  # path to a pretrained T5 nemo file 
-virtual_prompt_model_file: ??? # path to a MegatronT5PromptLearningModel nemo file
-pred_file_path: ??? # Path were all model predicitons will be written to a text file
-
-
diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
deleted file mode 100644
index 3edca99e15a5..000000000000
--- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch.multiprocessing as mp
-from omegaconf.omegaconf import OmegaConf, open_dict
-from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments import TorchElasticEnvironment
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_prompt_learning_model import (
-    MegatronT5PromptLearningModel,
-)
-from nemo.collections.nlp.parts.nlp_overrides import (
-    CustomProgressBar,
-    GradScaler,
-    NLPDDPStrategy,
-    NLPSaveRestoreConnector,
-    PipelineMixedPrecisionPlugin,
-)
-from nemo.core.config import hydra_runner
-from nemo.utils import logging
-from nemo.utils.decorators import deprecated
-from nemo.utils.exp_manager import exp_manager
-
-mp.set_start_method("spawn", force=True)
-
-
-"""
-This is an example of how to ptune/prompt-tune a pretrained T5 model.
-Be sure to use a .nemo T5 model with this code. If you've downloaded 
-a model from NGC or are otherwise using a MegatronLM model, please use
-either megatron_ckpt_to_nemo.py or megatron_lm_ckpt_to_nemo.py found
-within this examples directory to convert your model to .nemo format.
-"""
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_prompt_learning.yaml")
-def main(cfg) -> None:
-    logging.info("\n\n************** Experiment configuration ***********")
-    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
-
-    plugins = []
-    strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,)
-    if cfg.trainer.precision == 16 or cfg.trainer.precision == '16-mixed':
-        scaler = GradScaler(
-            init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
-            growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
-            hysteresis=cfg.model.get('hysteresis', 2),
-            enabled=False
-            if cfg.model.pipeline_model_parallel_size > 1
-            else True,  # turn off the grad scale for pipeline parallel LM model
-        )
-        # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed
-        plugins.append(PipelineMixedPrecisionPlugin(precision='16-mixed', device='cuda', scaler=scaler))
-
-    if cfg.get('cluster_type', None) == 'BCP':
-        plugins.append(TorchElasticEnvironment())
-
-    trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
-    exp_manager(trainer, cfg.exp_manager)
-
-    # load existing or init new soft prompt T5 model
-    if cfg.model.get("restore_path", None):
-        model = MegatronT5PromptLearningModel.restore_from(
-            cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector()
-        )
-
-    else:
-        model = MegatronT5PromptLearningModel(cfg.model, trainer=trainer)
-
-    trainer.fit(model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py
deleted file mode 100644
index 67640138b3ff..000000000000
--- a/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-from omegaconf.omegaconf import open_dict
-from pytorch_lightning.trainer.trainer import Trainer
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_prompt_learning_model import (
-    MegatronT5PromptLearningModel,
-)
-from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
-from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
-from nemo.core.config import hydra_runner
-from nemo.utils.app_state import AppState
-from nemo.utils.decorators import deprecated
-
-try:
-    from megatron.core import parallel_state
-
-    HAVE_MEGATRON_CORE = True
-except (ImportError, ModuleNotFoundError):
-    HAVE_MEGATRON_CORE = False
-
-
-if not torch.cuda.is_available():
-    raise EnvironmentError("GPU is needed for the inference")
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_prompt_learning_inference")
-def main(cfg) -> None:
-
-    # trainer required for restoring model parallel models
-    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
-
-    if (
-        cfg.tensor_model_parallel_size < 0
-        or cfg.pipeline_model_parallel_size < 0
-        or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
-    ):
-        model_config = MegatronT5PromptLearningModel.restore_from(
-            restore_path=cfg.language_model_path, trainer=trainer, return_config=True,
-        )
-
-        with open_dict(cfg):
-            cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0)
-
-    assert (
-        cfg.trainer.devices * cfg.trainer.num_nodes
-        == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
-    ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size"
-
-    app_state = AppState()
-    if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
-        app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
-        (
-            app_state.tensor_model_parallel_rank,
-            app_state.pipeline_model_parallel_rank,
-            app_state.model_parallel_size,
-            app_state.data_parallel_size,
-            app_state.pipeline_model_parallel_split_rank,
-            app_state.virtual_pipeline_model_parallel_rank,
-        ) = fake_initialize_model_parallel(
-            world_size=app_state.model_parallel_size,
-            rank=trainer.global_rank,
-            tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
-            pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
-            pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank,
-        )
-
-    # Load prompt tuned model, virtual_prompt_model_file and language_model_path must be provided in config
-    if cfg.get('virtual_prompt_model_file', None) is not None and cfg.get('language_model_path', None) is not None:
-
-        # Update frozen T5 model path in case it has changed
-        prompt_learning_cfg = MegatronT5PromptLearningModel.restore_from(
-            cfg.virtual_prompt_model_file, trainer=trainer, return_config=True
-        )
-        with open_dict(prompt_learning_cfg):
-            if cfg.get("language_model_path"):
-                # This is for backward compatibility with old checkpoints that used `pretrained_language_model_path` instead of `language_model_path`.
-                if hasattr(prompt_learning_cfg, 'pretrained_language_model_path'):
-                    prompt_learning_cfg.pretrained_language_model_path = cfg.language_model_path
-                else:
-                    prompt_learning_cfg.language_model_path = cfg.language_model_path
-            prompt_learning_cfg.micro_batch_size = cfg.data.get('micro_batch_size', 4)
-            prompt_learning_cfg.global_batch_size = cfg.data.get('global_batch_size', 4)
-
-        # Now load prompt learning model with frozen T5 model base
-        model = MegatronT5PromptLearningModel.restore_from(
-            restore_path=cfg.virtual_prompt_model_file, trainer=trainer, override_config_path=prompt_learning_cfg
-        )
-
-    else:
-        raise ValueError("virtual_prompt_model_file and pretrained_language_model_file must be provided in config")
-
-    # check whether the DDP is initialized
-    if parallel_state.is_unitialized():
-
-        def dummy():
-            return
-
-        if model.trainer.strategy.launcher is not None:
-            model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer)
-        model.trainer.strategy.setup_environment()
-
-    model.freeze()
-
-    _, test_dl = model.build_virtual_prompt_dataset(
-        dataset_paths=cfg.data.test_ds,
-        batch_size=cfg.data.global_batch_size,
-        for_train=False,
-        drop_last=False,
-        shuffle=False,
-        num_workers=cfg.data.num_workers,
-        pin_memory=True,
-    )
-
-    outputs = trainer.predict(model, test_dl)
-    with open(cfg.pred_file_path, "w", encoding="utf-8") as pred_file:
-        for batch in outputs:
-            preds = batch["preds_text"]
-            for pred in preds:
-                pred = pred.strip().replace("\n", " ")
-                pred_file.write(pred + "\n")
-    print('test finish---------------------------------')
-
-
-if __name__ == '__main__':
-    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_inference.yaml
deleted file mode 100644
index bf724ad4a060..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_inference.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-inference:
-  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
-  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
-  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
-  temperature: 1.0 # sampling temperature
-  add_BOS: True # add the bos token at the begining of the prompt
-  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
-  all_probs: False  # whether return the log prob for all the tokens in vocab
-  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
-  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
-  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-
-
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-gpt_model_file: ??? # GPT nemo file path # used when starting from a .nemo file
-adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py)
-pred_file_path: null # save predictions to this file
-checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
-checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
-hparams_file: null # model configuration file, only used for PTL checkpoint loading
-data_paths: ??? # prompts for GPT inference
-server: False  # whether launch the inference server
-port: 5555 # the port number for the inference server
-batch_size: 8 
-num_workers: 8
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_tuning_config.yaml
deleted file mode 100755
index a7829e212f53..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_adapter_tuning_config.yaml
+++ /dev/null
@@ -1,154 +0,0 @@
-name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_adapter_dim${model.adapter_tuning.adapter_dim}
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False # logger provided by exp_manager
-  enable_checkpointing: False
-  use_distributed_sampler: False
-  max_epochs: -1
-  max_steps: 100 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches
-  log_every_n_steps: 10
-  val_check_interval: 0.2
-  accumulate_grad_batches: 1
-  gradient_clip_val: 1.0
-  benchmark: False
-
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: null
-  name: ${name}
-  create_wandb_logger: null
-  wandb_logger_kwargs:
-    project: null
-    name: null
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: val_loss
-    save_top_k: 1
-    mode: min
-    save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.nemo_path set below, 
-    filename: 'megatron_gpt_adapter_tuning--{val_loss:.3f}-{step}'
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    always_save_nemo: True
-    save_best_model: True
-  create_early_stopping_callback: True
-  early_stopping_callback_params:
-    monitor: "val_loss"
-    mode: "min"
-    min_delta: 0.001
-    patience: 10
-    verbose: True
-
-model:
-  seed: 1234
-  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: 'no-prompts' # adapter tuning requires no virtual prompts
-  encoder_seq_length: 2048 
-  gradient_as_bucket_view: false
-  tensor_model_parallel_size: 1 # intra-layer model parallelism
-  pipeline_model_parallel_size: 1 # inter-layer model parallelism
-  global_batch_size: 8
-  micro_batch_size: 4
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-
-  restore_path: null # Path to an existing adapter .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the GPT language model .nemo file, always required
-  existing_tasks: [] # List of tasks the model has already been p-tuned/prompt-tuned for, needed when a restore path is given
-  new_tasks: ["rte"] # List of new tasknames to be prompt-tuned
-
-  task_templates: # Add more/replace tasks as needed, these are just examples
-  - taskname: "boolq" # The task name
-    prompt_template: "Passage: {passage} \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
-    total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
-    virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
-    truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
-    answer_only_loss: True 
-    answer_field: "answer"
-
-  - taskname: "intent_and_slot"
-    prompt_template: "intent options: {intent_options} slot options: {slot_options} {utterance} \nintent: {intent} \nslot: {slot}"
-    total_virtual_tokens: 0 
-    answer_only_loss: False 
-    virtual_token_splits: []
-    truncate_field: null
-
-  - taskname: "rte" 
-    prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "squad" 
-    prompt_template: "context: {context} question: {question} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "arc-challenge" 
-    prompt_template: "question: {question} choices: {choices} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "xsum" 
-    prompt_template: "{source} Summary: {target}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "target"
-  
-  - taskname: "taskname"
-    prompt_template: "{prompt} {completion}"
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: "prompt"
-    answer_only_loss: True
-    answer_field: "completion"
-
-  adapter_tuning:
-    type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter'
-    adapter_dim: 50
-    adapter_dropout: 0.1
-    norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used.
-    column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal
-    row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal
-    norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used,  options are ['layernorm', 'mixedfusedlayernorm']
-
-  data:
-    train_ds: ??? # expects a list of paths to training data files
-    validation_ds: ???  # expects a paths to validation data files
-    add_eos: True
-    shuffle: True
-    num_workers: 8
-    pin_memory: True
-
-
-  optim:
-    name: fused_adam
-    lr: 1e-4
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      constant_steps: 0 # Constant steps should also be 0 when min_lr=0
-      min_lr: 0.0 # min_lr must be 0.0 for prompt learning
-      monitor: val_loss
-      reduce_on_plateau: false
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_inference.yaml
deleted file mode 100644
index 0cb8467c66f0..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_inference.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-inference:
-  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
-  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
-  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
-  temperature: 1.0 # sampling temperature
-  add_BOS: True # add the bos token at the begining of the prompt
-  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
-  all_probs: False  # whether return the log prob for all the tokens in vocab
-  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
-  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
-  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-
-
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-gpt_model_file: ??? # GPT nemo file path # used when starting from a .nemo file
-adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py)
-pred_file_path: null # save predictions to this file
-checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
-checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
-hparams_file: null # model configuration file, only used for PTL checkpoint loading
-data_paths: ??? # prompts for GPT inference
-batch_size: 8 
-num_workers: 8
\ No newline at end of file
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_tuning_config.yaml
deleted file mode 100755
index b5e2afb73186..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_ia3_tuning_config.yaml
+++ /dev/null
@@ -1,130 +0,0 @@
-name: ia3_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False # logger provided by exp_manager
-  enable_checkpointing: False
-  use_distributed_sampler: False
-  max_epochs: -1
-  max_steps: 100 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches
-  log_every_n_steps: 10
-  val_check_interval: 0.2
-  accumulate_grad_batches: 1
-  gradient_clip_val: 1.0
-  benchmark: False
-
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: null
-  name: ${name}
-  create_wandb_logger: null
-  wandb_logger_kwargs:
-    project: null
-    name: null
-  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: val_loss
-    save_top_k: 1
-    mode: min
-    save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.nemo_path set below, 
-    filename: 'megatron_gpt_ia3_tuning--{val_loss:.3f}-{step}'
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    save_best_model: True
-
-model:
-  seed: 1234
-  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: 'no-prompts' # adapter tuning requires no virtual prompts
-  encoder_seq_length: 2048 
-  gradient_as_bucket_view: false
-  tensor_model_parallel_size: 1 # intra-layer model parallelism
-  pipeline_model_parallel_size: 1 # inter-layer model parallelism
-  global_batch_size: 8
-  micro_batch_size: 4
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-
-  restore_path: null # Path to an existing adapter .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the GPT language model .nemo file, always required
-  existing_tasks: [] # List of tasks the model has already been p-tuned/prompt-tuned for, needed when a restore path is given
-  new_tasks: ["rte"] # List of new tasknames to be prompt-tuned
-
-  task_templates: # Add more/replace tasks as needed, these are just examples
-  - taskname: "boolq" # The task name
-    prompt_template: "Passage: {passage} \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
-    total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
-    virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
-    truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
-    answer_only_loss: True 
-    answer_field: "answer"
-
-  - taskname: "intent_and_slot"
-    prompt_template: "intent options: {intent_options} slot options: {slot_options} {utterance} \nintent: {intent} \nslot: {slot}"
-    total_virtual_tokens: 0 
-    answer_only_loss: True 
-    virtual_token_splits: []
-    truncate_field: null
-
-  - taskname: "rte" 
-    prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "squad" 
-    prompt_template: "context: {context} question: {question} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "arc-challenge" 
-    prompt_template: "question: {question} choices: {choices} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "answer"
-
-  - taskname: "xsum" 
-    prompt_template: "{source} Summary: {target}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_only_loss: True
-    answer_field: "target"
-
-  data:
-    train_ds: ??? # expects a list of paths to training data files
-    validation_ds: ???  # expects a paths to validation data files
-    add_eos: True
-    shuffle: True
-    num_workers: 8
-    pin_memory: True
-
-
-  optim:
-    name: fused_adam
-    lr: 1e-4
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      constant_steps: 0 # Constant steps should also be 0 when min_lr=0
-      min_lr: 0.0 # min_lr must be 0.0 for prompt learning
-      monitor: val_loss
-      reduce_on_plateau: false
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_inference.yaml
deleted file mode 100644
index fcd92a401970..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_inference.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-inference:
-  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
-  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
-  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
-  temperature: 1.0 # sampling temperature
-  add_BOS: True # add the bos token at the begining of the prompt
-  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
-  all_probs: False  # whether return the log prob for all the tokens in vocab
-  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
-  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
-  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-
-
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-data:
-  test_ds: ???
-  num_workers: 1
-  global_batch_size: 4
-  micro_batch_size: 4
-  
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file
-adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py)
-pred_file_path: null # save predictions to this file
-checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
-checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
-hparams_file: null # model configuration file, only used for PTL checkpoint loading
-batch_size: 8 
-
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_tuning_config.yaml
deleted file mode 100644
index 5fc411f61b11..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_adapter_tuning_config.yaml
+++ /dev/null
@@ -1,135 +0,0 @@
-name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_adapter_dim${model.adapter_tuning.adapter_dim}
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False 
-  enable_checkpointing: False
-  use_distributed_sampler: False
-  max_epochs: -1
-  max_steps: 100
-  log_every_n_steps: 10
-  val_check_interval: 20
-  accumulate_grad_batches: 1
-  gradient_clip_val: 1.0
-  benchmark: False
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: null
-  name: ${name}
-  create_wandb_logger: False
-  wandb_logger_kwargs:
-    project: null
-    name: null
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: val_loss
-    save_top_k: 1
-    mode: min
-    save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below
-    filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    save_best_model: True
-
-model:
-  seed: 1234
-  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts
-  encoder_seq_length: 2048
-  gradient_as_bucket_view: false
-  tensor_model_parallel_size: 1
-  pipeline_model_parallel_size: 1 
-  global_batch_size: 8
-  micro_batch_size: 4
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-  report_validation_metric: False
-  validation_metric: accuracy
-  
-  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
-  existing_tasks: []
-  new_tasks: ["squad"] 
-
-  task_templates: 
-  - taskname: "boolq" # The task name
-    prompt_template: "Passage: {passage} \nQuestion: {question} \nAnswer: {answer}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
-    total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
-    virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
-    truncate_field: "passage" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
-    answer_field: "answer"
-
-  - taskname: "intent_and_slot"
-    prompt_template: "intent options: {intent_options} slot options: {slot_options} {utterance} \nintent: {intent} \nslot: {slot}"
-    total_virtual_tokens: 0 
-    virtual_token_splits: []
-    truncate_field: null
-
-  - taskname: "rte" 
-    prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "answer"
-
-  - taskname: "squad" 
-    prompt_template: "context: {context} question: {question} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "answer"
-
-  - taskname: "arc-challenge" 
-    prompt_template: "question: {question} choices: {choices} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "answer"
-
-  - taskname: "xsum" 
-    prompt_template: "{source} Summary: {target}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "target"
-
-  adapter_tuning:
-    type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter'
-    adapter_dim: 50
-    adapter_dropout: 0.1
-    norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used.
-    column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal
-    row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal
-    norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used,  options are ['layernorm', 'mixedfusedlayernorm']
-
-  data:
-    train_ds: ???
-    validation_ds: ???
-    add_eos: True
-    shuffle: True
-    num_workers: 8
-    pin_memory: True
-
-
-  optim:
-    name: fused_adam
-    lr: 1e-3
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      constant_steps: 0
-      min_lr: 0.0
-      monitor: val_loss
-      reduce_on_plateau: false
-  
-
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_inference.yaml
deleted file mode 100644
index fcd92a401970..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_inference.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-inference:
-  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
-  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
-  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
-  temperature: 1.0 # sampling temperature
-  add_BOS: True # add the bos token at the begining of the prompt
-  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
-  all_probs: False  # whether return the log prob for all the tokens in vocab
-  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
-  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
-  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-
-
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-data:
-  test_ds: ???
-  num_workers: 1
-  global_batch_size: 4
-  micro_batch_size: 4
-  
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file
-adapter_model_file: ??? # .nemo file saved during training (using megatron_gpt_adapter_tuning.py)
-pred_file_path: null # save predictions to this file
-checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
-checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
-hparams_file: null # model configuration file, only used for PTL checkpoint loading
-batch_size: 8 
-
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_tuning_config.yaml
deleted file mode 100644
index 5c12993bd12e..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_ia3_tuning_config.yaml
+++ /dev/null
@@ -1,112 +0,0 @@
-name: ia3_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False 
-  enable_checkpointing: False
-  use_distributed_sampler: False
-  max_epochs: -1
-  max_steps: 100
-  log_every_n_steps: 10
-  val_check_interval: 20
-  accumulate_grad_batches: 1
-  gradient_clip_val: 1.0
-  benchmark: False
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: null
-  name: ${name}
-  create_wandb_logger: False
-  wandb_logger_kwargs:
-    project: null
-    name: null
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: val_loss
-    save_top_k: 1
-    mode: min
-    save_nemo_on_train_end: True
-    filename: "megatron_t5_ia3_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    save_best_model: True
-
-model:
-  seed: 1234
-  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts
-  encoder_seq_length: 2048
-  gradient_as_bucket_view: false
-  tensor_model_parallel_size: 1
-  pipeline_model_parallel_size: 1 
-  global_batch_size: 4
-  micro_batch_size: 2
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-  report_validation_metric: False
-  
-  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
-  existing_tasks: []
-  new_tasks: ["squad"] 
-
-  task_templates: 
-  - taskname: "squad" 
-    prompt_template: "context: {context} question: {question} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "answer"
-
-  - taskname: "arc-challenge" 
-    prompt_template: "question: {question} choices: {choices} answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "answer"
-
-  - taskname: "xsum" 
-    prompt_template: "{source} Summary: {target}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "target"
-
-  - taskname: "rte" 
-    prompt_template: "sentence1: {premise} sentence2: {hypothesis} Answer: {answer}" 
-    total_virtual_tokens: 0
-    virtual_token_splits: []
-    truncate_field: null
-    answer_field: "answer"
-
-  data:
-    train_ds: ["data/squad_train.jsonl"]
-    validation_ds: ["data/squad_val.jsonl"]
-    add_eos: True
-    shuffle: True
-    num_workers: 8
-    pin_memory: True
-
-
-  optim:
-    name: fused_adam
-    lr: 1e-3
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      constant_steps: 0
-      min_lr: 0.0
-      monitor: val_loss
-      reduce_on_plateau: false
-  
-
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml
deleted file mode 100644
index 008241d19389..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-inference:
-  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
-  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
-  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
-  temperature: 1.0 # sampling temperature
-  add_BOS: True # add the bos token at the begining of the prompt
-  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
-  all_probs: False  # whether return the log prob for all the tokens in vocab
-  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
-  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
-  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-
-
-trainer:
-  devices: 1
-  num_nodes: 1
-  accelerator: gpu
-  logger: False # logger provided by exp_manager
-  precision: 16 # 16, 32, or bf16
-
-data:
-  test_ds: ???
-  num_workers: 1
-  global_batch_size: 4
-  micro_batch_size: 4
-  
-tensor_model_parallel_size: -1
-pipeline_model_parallel_size: -1
-pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file
-adapter_model_file: ??? # .nemo file saved during training (using megatron_t5_lora_tuning.py)
-pred_file_path: null # save predictions to this file
-checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
-checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
-hparams_file: null # model configuration file, only used for PTL checkpoint loading
-batch_size: 8 
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml
deleted file mode 100644
index 8f46f1f3720d..000000000000
--- a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml
+++ /dev/null
@@ -1,99 +0,0 @@
-name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_lora_dim${model.lora_tuning.kqv_adapter_dim}
-
-trainer:
-  devices: 1
-  accelerator: gpu
-  num_nodes: 1
-  precision: 16
-  logger: False 
-  enable_checkpointing: False
-  replace_sampler_ddp: False
-  max_epochs: 10
-  max_steps: 1000
-  log_every_n_steps: 1
-  val_check_interval: 2
-  accumulate_grad_batches: 1
-  gradient_clip_val: 0.0
-  benchmark: False
-
-exp_manager:
-  explicit_log_dir: null
-  exp_dir: nemo-lora-mt0-tr
-  name: ${name}
-  create_wandb_logger: False
-  wandb_logger_kwargs:
-    project: null
-    name: null
-  resume_from_checkpoint: null
-  resume_if_exists: True
-  resume_ignore_no_checkpoint: True
-  create_checkpoint_callback: True
-  checkpoint_callback_params:
-    monitor: reduced_train_loss
-    save_top_k: 1
-    mode: min
-    save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below
-    filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
-    model_parallel_size: ${model.tensor_model_parallel_size}
-    save_best_model: True
-
-model:
-  seed: 1234
-  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
-  virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts
-  encoder_seq_length: 2048
-  gradient_as_bucket_view: false
-  tensor_model_parallel_size: 1
-  pipeline_model_parallel_size: 1 
-  global_batch_size: 4
-  micro_batch_size: 4
-  validation_global_batch_size: ${model.global_batch_size}
-  validation_micro_batch_size: ${model.micro_batch_size}
-  validation_drop_last: False
-  report_validation_metric: False
-  validation_metric: accuracy
-  
-  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
-  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
-  existing_tasks: []
-  new_tasks: ["taskname"] 
-
-  task_templates: 
-  - taskname: "taskname" # The task name
-    prompt_template: "{prompt} {completion}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
-    total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
-    virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
-    truncate_field: "prompt" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
-    answer_field: "completion"
-
-  lora_tuning:
-      kqv_adapter_dim: 24
-      kv_adapter_dim: 16
-      q_adapter_dim: 8
-      adapter_dropout: 0.1
-      column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal
-      row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal
-
-  data:
-    train_ds: ???
-    validation_ds: ???
-    shuffle: True
-    num_workers: 0
-    pin_memory: True
-    add_eos: True
-
-
-  optim:
-    name: fused_adam
-    lr: 1e-3
-    weight_decay: 0.01 
-    betas: 
-    - 0.9
-    - 0.98
-    sched:
-      name: CosineAnnealing
-      warmup_steps: 50
-      constant_steps: 0
-      min_lr: 0.0
-      monitor: val_loss
-      reduce_on_plateau: false
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py
deleted file mode 100644
index 5fd07e85ce2d..000000000000
--- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torch.multiprocessing as mp
-from megatron.core import parallel_state
-from omegaconf import OmegaConf
-from omegaconf.omegaconf import open_dict
-from pytorch_lightning.trainer.trainer import Trainer
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5AdapterLearningModel
-from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
-from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
-from nemo.core.config import hydra_runner
-from nemo.utils.app_state import AppState
-from nemo.utils.decorators import deprecated
-
-mp.set_start_method("spawn", force=True)
-
-"""
-This is the script to run an Adapter Tuned GPT Model for text generation.
-
-Usage:
-    Assume the model has TP=1, PP=1 in the following use cases.
-    a. run greedy inference using a base gpt nemo file, and an adapter nemo file:
-        python megatron_gpt_ia3_eval.py \
-            gpt_model_file=PATH TO GPT MODEL NEMO FILE \
-            adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \
-            data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \
-            pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS
-"""
-
-if not torch.cuda.is_available():
-    raise EnvironmentError("GPU is needed for the inference")
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference")
-def main(cfg) -> None:
-
-    # trainer required for restoring model parallel models
-    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
-
-    if (
-        cfg.tensor_model_parallel_size < 0
-        or cfg.pipeline_model_parallel_size < 0
-        or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
-    ):
-        model_config = MegatronT5AdapterLearningModel.restore_from(
-            restore_path=cfg.language_model_path, trainer=trainer, return_config=True,
-        )
-
-        with open_dict(cfg):
-            cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0)
-
-    app_state = AppState()
-    if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
-        app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
-        (
-            app_state.tensor_model_parallel_rank,
-            app_state.pipeline_model_parallel_rank,
-            app_state.model_parallel_size,
-            app_state.data_parallel_size,
-            app_state.pipeline_model_parallel_split_rank,
-            app_state.virtual_pipeline_model_parallel_rank,
-        ) = fake_initialize_model_parallel(
-            world_size=app_state.model_parallel_size,
-            rank=trainer.global_rank,
-            tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
-            pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
-            pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank,
-        )
-
-    # Load an adapter model,  must be provided in config
-    if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None:
-        # Update frozen GPT model path in case it has changed
-        adapter_tuning_cfg = MegatronT5AdapterLearningModel.restore_from(
-            cfg.adapter_model_file, trainer=trainer, return_config=True
-        )
-        with open_dict(adapter_tuning_cfg):
-            adapter_tuning_cfg.language_model_path = cfg.language_model_path
-            adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path
-            adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size
-            adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size
-
-        # Now load prompt learning model with frozen gpt model base
-        model = MegatronT5AdapterLearningModel.restore_from(
-            restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg
-        )
-
-    # Or load regular GPT model
-    else:
-        raise NotImplementedError(
-            "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path"
-        )
-
-    # check whether the DDP is initialized
-    if parallel_state.is_unitialized():
-
-        def dummy():
-            return
-
-        if trainer.strategy.launcher is not None:
-            trainer.strategy.launcher.launch(dummy, trainer=trainer)
-        trainer.strategy.setup_environment()
-
-    model.freeze()
-
-    # Have to turn off activations_checkpoint_method for inference
-    try:
-        model.model.language_model.encoder.activations_checkpoint_method = None
-    except AttributeError:
-        pass
-
-    try:
-        model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None
-    except AttributeError:
-        pass
-
-    test_ds, test_dl = model.build_virtual_prompt_dataset(
-        dataset_paths=cfg.data.test_ds,
-        batch_size=cfg.data.global_batch_size,
-        for_train=False,
-        drop_last=False,
-        shuffle=False,
-        num_workers=cfg.data.num_workers,
-        pin_memory=True,
-    )
-
-    config = OmegaConf.to_container(cfg.inference)
-    model.set_inference_config(config)
-    response = trainer.predict(model, test_dl)
-    print("***************************")
-    if cfg.pred_file_path is not None:
-        with open(cfg.pred_file_path, "w", encoding="utf-8") as f:
-            for batch in response:
-                for inp, pred in zip(batch['input_text'], batch['preds_text']):
-                    inp = ' '.join(inp.split('\n'))
-                    pred = ' '.join(pred.split('\n'))
-                    f.write(f'{inp} {pred}\n')
-        print("predictions saved to {}".format(cfg.pred_file_path))
-    else:
-        print(response)
-    print("***************************")
-
-
-if __name__ == '__main__':
-    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py
deleted file mode 100644
index 96a8cba64863..000000000000
--- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch.multiprocessing as mp
-from omegaconf.omegaconf import OmegaConf, open_dict
-from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments import TorchElasticEnvironment
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5AdapterLearningModel
-from nemo.collections.nlp.parts.nlp_overrides import (
-    CustomProgressBar,
-    GradScaler,
-    MegatronHalfPrecisionPlugin,
-    NLPDDPStrategy,
-    NLPSaveRestoreConnector,
-    PipelineMixedPrecisionPlugin,
-)
-from nemo.core.config import hydra_runner
-from nemo.utils import logging
-from nemo.utils.decorators import deprecated
-from nemo.utils.exp_manager import exp_manager
-
-mp.set_start_method("spawn", force=True)
-
-"""
-This is the script to train an Adapter infused GPT Model for text generation.
-A base GPT Model is required as a starting point. This script will then insert
-Adapters into each Transformer layer and will train/update only these adapters
-during training. The base GPT Model weights will remain frozen.
-
-During training this script will only save the newly trained Adapter weights
-in checkpoints. At the end of training a .nemo file of Adapter weights will 
-be saved.
-
-Usage:
-    Assuming the base model is a 125m GPT Model, with TP=1, PP=1:
-    a. run a training run for a base gpt nemo file:
-        python megatron_gpt_adapter_tuning.py \
-            "model.data.train_ds=[PATH TO TRAINING JSONL FILE]",
-            "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]",
-            model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE"
-            name="NAME OF TRAINING RUN"
-            exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE",
-            trainer.max_epochs=2
-"""
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_tuning_config")
-def main(cfg) -> None:
-    logging.info("\n\n************** Experiment configuration ***********")
-    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
-
-    megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False)
-    with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam'
-
-    plugins = []
-    strategy = NLPDDPStrategy(
-        no_ddp_communication_hook=True,  # we don't use DDP for async grad allreduce
-        gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
-        find_unused_parameters=False,
-    )
-    if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']:
-        scaler = None
-        if cfg.trainer.precision in [16, '16', '16-mixed']:
-            scaler = GradScaler(
-                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
-                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
-                hysteresis=cfg.model.get('hysteresis', 2),
-            )
-            # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed
-            plugin_precision = '16-mixed'
-        else:
-            plugin_precision = 'bf16-mixed'
-        if megatron_amp_O2 and not with_distributed_adam:
-            plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
-        else:
-            plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
-
-    if cfg.get('cluster_type', None) == 'BCP':
-        plugins.append(TorchElasticEnvironment())
-
-    trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
-    exp_manager(trainer, cfg.exp_manager)
-
-    # load existing or init new soft prompt GPT model
-    if cfg.model.get("restore_path", None):
-        model = MegatronT5AdapterLearningModel.restore_from(
-            cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector()
-        )
-    else:
-        model = MegatronT5AdapterLearningModel(cfg.model, trainer=trainer)
-
-    trainer.fit(model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py
deleted file mode 100644
index cc9dfef059b8..000000000000
--- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torch.multiprocessing as mp
-from megatron.core import parallel_state
-from omegaconf import OmegaConf
-from omegaconf.omegaconf import open_dict
-from pytorch_lightning.trainer.trainer import Trainer
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5InfusedAdapterModel
-from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
-from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
-from nemo.core.config import hydra_runner
-from nemo.utils.app_state import AppState
-from nemo.utils.decorators import deprecated
-
-mp.set_start_method("spawn", force=True)
-
-"""
-This is the script to run an Adapter Tuned GPT Model for text generation.
-
-Usage:
-    Assume the model has TP=1, PP=1 in the following use cases.
-    a. run greedy inference using a base gpt nemo file, and an adapter nemo file:
-        python megatron_gpt_ia3_eval.py \
-            gpt_model_file=PATH TO GPT MODEL NEMO FILE \
-            adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \
-            data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \
-            pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS
-"""
-
-if not torch.cuda.is_available():
-    raise EnvironmentError("GPU is needed for the inference")
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_ia3_inference")
-def main(cfg) -> None:
-
-    # trainer required for restoring model parallel models
-    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
-
-    if (
-        cfg.tensor_model_parallel_size < 0
-        or cfg.pipeline_model_parallel_size < 0
-        or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
-    ):
-        model_config = MegatronT5InfusedAdapterModel.restore_from(
-            restore_path=cfg.language_model_path, trainer=trainer, return_config=True,
-        )
-
-        with open_dict(cfg):
-            cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0)
-
-    app_state = AppState()
-    if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
-        app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
-        (
-            app_state.tensor_model_parallel_rank,
-            app_state.pipeline_model_parallel_rank,
-            app_state.model_parallel_size,
-            app_state.data_parallel_size,
-            app_state.pipeline_model_parallel_split_rank,
-            app_state.virtual_pipeline_model_parallel_rank,
-        ) = fake_initialize_model_parallel(
-            world_size=app_state.model_parallel_size,
-            rank=trainer.global_rank,
-            tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
-            pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
-            pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank,
-        )
-
-    # Load an adapter model,  must be provided in config
-    if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None:
-        # Update frozen GPT model path in case it has changed
-        ia3_tuning_cfg = MegatronT5InfusedAdapterModel.restore_from(
-            cfg.adapter_model_file, trainer=trainer, return_config=True
-        )
-        with open_dict(ia3_tuning_cfg):
-            ia3_tuning_cfg.language_model_path = cfg.language_model_path
-            ia3_tuning_cfg.pretrained_language_model_path = cfg.language_model_path
-            ia3_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size
-            ia3_tuning_cfg.global_batch_size = cfg.data.global_batch_size
-
-        # Now load prompt learning model with frozen gpt model base
-        model = MegatronT5InfusedAdapterModel.restore_from(
-            restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=ia3_tuning_cfg
-        )
-
-    # Or load regular GPT model
-    else:
-        raise NotImplementedError(
-            "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path"
-        )
-
-    # check whether the DDP is initialized
-    if parallel_state.is_unitialized():
-
-        def dummy():
-            return
-
-        if trainer.strategy.launcher is not None:
-            trainer.strategy.launcher.launch(dummy, trainer=trainer)
-        trainer.strategy.setup_environment()
-
-    model.freeze()
-
-    # Have to turn off activations_checkpoint_method for inference
-    try:
-        model.model.language_model.encoder.activations_checkpoint_method = None
-    except AttributeError:
-        pass
-
-    try:
-        model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None
-    except AttributeError:
-        pass
-
-    test_ds, test_dl = model.build_virtual_prompt_dataset(
-        dataset_paths=cfg.data.test_ds,
-        batch_size=cfg.data.global_batch_size,
-        for_train=False,
-        drop_last=False,
-        shuffle=False,
-        num_workers=cfg.data.num_workers,
-        pin_memory=True,
-    )
-
-    config = OmegaConf.to_container(cfg.inference)
-    model.set_inference_config(config)
-    response = trainer.predict(model, test_dl)
-    print("***************************")
-    if cfg.pred_file_path is not None:
-        with open(cfg.pred_file_path, "w", encoding="utf-8") as f:
-            for batch in response:
-                for inp, pred in zip(batch['input_text'], batch['preds_text']):
-                    inp = ' '.join(inp.split('\n'))
-                    pred = ' '.join(pred.split('\n'))
-                    f.write(f'{inp} {pred}\n')
-        print("predictions saved to {}".format(cfg.pred_file_path))
-    else:
-        print(response)
-    print("***************************")
-
-
-if __name__ == '__main__':
-    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py
deleted file mode 100644
index 1edc87a416a4..000000000000
--- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch.multiprocessing as mp
-from omegaconf.omegaconf import OmegaConf, open_dict
-from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments import TorchElasticEnvironment
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5InfusedAdapterModel
-from nemo.collections.nlp.parts.nlp_overrides import (
-    CustomProgressBar,
-    GradScaler,
-    MegatronHalfPrecisionPlugin,
-    NLPDDPStrategy,
-    NLPSaveRestoreConnector,
-    PipelineMixedPrecisionPlugin,
-)
-from nemo.core.config import hydra_runner
-from nemo.utils import logging
-from nemo.utils.decorators import deprecated
-from nemo.utils.exp_manager import exp_manager
-
-mp.set_start_method("spawn", force=True)
-
-"""
-This is the script to train an Adapter infused GPT Model for text generation.
-A base GPT Model is required as a starting point. This script will then insert
-Adapters into each Transformer layer and will train/update only these adapters
-during training. The base GPT Model weights will remain frozen.
-
-During training this script will only save the newly trained Adapter weights
-in checkpoints. At the end of training a .nemo file of Adapter weights will 
-be saved.
-
-Usage:
-    Assuming the base model is a 125m GPT Model, with TP=1, PP=1:
-    a. run a training run for a base gpt nemo file:
-        python megatron_gpt_adapter_tuning.py \
-            "model.data.train_ds=[PATH TO TRAINING JSONL FILE]",
-            "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]",
-            model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE"
-            name="NAME OF TRAINING RUN"
-            exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE",
-            trainer.max_epochs=2
-"""
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_ia3_tuning_config")
-def main(cfg) -> None:
-    logging.info("\n\n************** Experiment configuration ***********")
-    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
-
-    megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False)
-    with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam'
-
-    plugins = []
-    strategy = NLPDDPStrategy(
-        no_ddp_communication_hook=True,  # we don't use DDP for async grad allreduce
-        gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
-        find_unused_parameters=False,
-    )
-    if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']:
-        scaler = None
-        if cfg.trainer.precision in [16, '16', '16-mixed']:
-            scaler = GradScaler(
-                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
-                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
-                hysteresis=cfg.model.get('hysteresis', 2),
-            )
-            # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed
-            plugin_precision = '16-mixed'
-        else:
-            plugin_precision = 'bf16-mixed'
-
-        if megatron_amp_O2 and not with_distributed_adam:
-            plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
-        else:
-            plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
-
-    if cfg.get('cluster_type', None) == 'BCP':
-        plugins.append(TorchElasticEnvironment())
-
-    trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
-    exp_manager(trainer, cfg.exp_manager)
-
-    # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
-    with open_dict(cfg):
-        cfg.model.pretrained_language_model_path = cfg.model.language_model_path
-
-    # load existing or init new soft prompt GPT model
-    if cfg.model.get("restore_path", None):
-        model = MegatronT5InfusedAdapterModel.restore_from(
-            cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector()
-        )
-    else:
-        model = MegatronT5InfusedAdapterModel(cfg.model, trainer=trainer)
-
-    trainer.fit(model)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py
deleted file mode 100644
index 38032d06a8c8..000000000000
--- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torch.multiprocessing as mp
-from megatron.core import parallel_state
-from omegaconf import OmegaConf
-from omegaconf.omegaconf import open_dict
-from pytorch_lightning.trainer.trainer import Trainer
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel
-from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
-from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
-from nemo.core.config import hydra_runner
-from nemo.utils.app_state import AppState
-from nemo.utils.decorators import deprecated
-
-mp.set_start_method("spawn", force=True)
-
-"""
-This is the script to run an Adapter Tuned GPT Model for text generation.
-
-Usage:
-    Assume the model has TP=1, PP=1 in the following use cases.
-    a. run greedy inference using a base gpt nemo file, and an adapter nemo file:
-        python megatron_gpt_ia3_eval.py \
-            gpt_model_file=PATH TO GPT MODEL NEMO FILE \
-            adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \
-            data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \
-            pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS
-"""
-
-if not torch.cuda.is_available():
-    raise EnvironmentError("GPU is needed for the inference")
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference")
-def main(cfg) -> None:
-
-    # trainer required for restoring model parallel models
-    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
-
-    if (
-        cfg.tensor_model_parallel_size < 0
-        or cfg.pipeline_model_parallel_size < 0
-        or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
-    ):
-        model_config = MegatronT5LoraModel.restore_from(
-            restore_path=cfg.language_model_path, trainer=trainer, return_config=True,
-        )
-
-        with open_dict(cfg):
-            cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1)
-            cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0)
-
-    app_state = AppState()
-    if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
-        app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
-        (
-            app_state.tensor_model_parallel_rank,
-            app_state.pipeline_model_parallel_rank,
-            app_state.model_parallel_size,
-            app_state.data_parallel_size,
-            app_state.pipeline_model_parallel_split_rank,
-            app_state.virtual_pipeline_model_parallel_rank,
-        ) = fake_initialize_model_parallel(
-            world_size=app_state.model_parallel_size,
-            rank=trainer.global_rank,
-            tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
-            pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
-            pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank,
-        )
-
-    # Load an adapter model,  must be provided in config
-    if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None:
-        # Update frozen GPT model path in case it has changed
-        adapter_tuning_cfg = MegatronT5LoraModel.restore_from(
-            cfg.adapter_model_file, trainer=trainer, return_config=True
-        )
-        with open_dict(adapter_tuning_cfg):
-            adapter_tuning_cfg.language_model_path = cfg.language_model_path
-            adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path
-            adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size
-            adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size
-
-        # Now load prompt learning model with frozen gpt model base
-        model = MegatronT5LoraModel.restore_from(
-            restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg
-        )
-
-    # Or load regular GPT model
-    else:
-        raise NotImplementedError(
-            "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path"
-        )
-
-    # check whether the DDP is initialized
-    if parallel_state.is_unitialized():
-
-        def dummy():
-            return
-
-        if trainer.strategy.launcher is not None:
-            trainer.strategy.launcher.launch(dummy, trainer=trainer)
-        trainer.strategy.setup_environment()
-
-    model.freeze()
-
-    # Have to turn off activations_checkpoint_method for inference
-    try:
-        model.model.language_model.encoder.activations_checkpoint_method = None
-    except AttributeError:
-        pass
-
-    try:
-        model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None
-    except AttributeError:
-        pass
-
-    test_ds, test_dl = model.build_virtual_prompt_dataset(
-        dataset_paths=cfg.data.test_ds,
-        batch_size=cfg.data.global_batch_size,
-        for_train=False,
-        drop_last=False,
-        shuffle=False,
-        num_workers=cfg.data.num_workers,
-        pin_memory=True,
-    )
-
-    config = OmegaConf.to_container(cfg.inference)
-    model.set_inference_config(config)
-    response = trainer.predict(model, test_dl)
-    print("***************************")
-    if cfg.pred_file_path is not None:
-        with open(cfg.pred_file_path, "w", encoding="utf-8") as f:
-            for batch in response:
-                for inp, pred in zip(batch['input_text'], batch['preds_text']):
-                    inp = ' '.join(inp.split('\n'))
-                    pred = ' '.join(pred.split('\n'))
-                    f.write(f'{inp} {pred}\n')
-        print("predictions saved to {}".format(cfg.pred_file_path))
-    else:
-        print(response)
-    print("***************************")
-
-
-if __name__ == '__main__':
-    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py
deleted file mode 100644
index 7178bf8145ba..000000000000
--- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch.multiprocessing as mp
-from omegaconf.omegaconf import OmegaConf, open_dict
-from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments import TorchElasticEnvironment
-
-from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel
-from nemo.collections.nlp.parts.nlp_overrides import (
-    CustomProgressBar,
-    GradScaler,
-    MegatronHalfPrecisionPlugin,
-    NLPDDPStrategy,
-    NLPSaveRestoreConnector,
-    PipelineMixedPrecisionPlugin,
-)
-from nemo.core.config import hydra_runner
-from nemo.utils import logging
-from nemo.utils.decorators import deprecated
-from nemo.utils.exp_manager import exp_manager
-
-mp.set_start_method("spawn", force=True)
-
-"""
-This is the script to train an Adapter infused GPT Model for text generation.
-A base GPT Model is required as a starting point. This script will then insert
-Adapters into each Transformer layer and will train/update only these adapters
-during training. The base GPT Model weights will remain frozen.
-
-During training this script will only save the newly trained Adapter weights
-in checkpoints. At the end of training a .nemo file of Adapter weights will 
-be saved.
-
-Usage:
-    Assuming the base model is a 125m GPT Model, with TP=1, PP=1:
-    a. run a training run for a base gpt nemo file:
-        python megatron_gpt_adapter_tuning.py \
-            "model.data.train_ds=[PATH TO TRAINING JSONL FILE]",
-            "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]",
-            model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE"
-            name="NAME OF TRAINING RUN"
-            exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE",
-            trainer.max_epochs=2
-"""
-
-
-@deprecated(
-    explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features."
-    "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples."
-)
-@hydra_runner(config_path="conf", config_name="megatron_t5_lora_tuning_config")
-def main(cfg) -> None:
-    logging.info("\n\n************** Experiment configuration ***********")
-    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
-
-    megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False)
-    with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam'
-
-    plugins = []
-    strategy = NLPDDPStrategy(
-        no_ddp_communication_hook=True,  # we don't use DDP for async grad allreduce
-        gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
-        find_unused_parameters=False,
-    )
-    if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']:
-        scaler = None
-        if cfg.trainer.precision in [16, '16', '16-mixed']:
-            scaler = GradScaler(
-                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
-                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
-                hysteresis=cfg.model.get('hysteresis', 2),
-            )
-            # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed
-            plugin_precision = '16-mixed'
-        else:
-            plugin_precision = 'bf16-mixed'
-        if megatron_amp_O2 and not with_distributed_adam:
-            plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
-        else:
-            plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
-
-    if cfg.get('cluster_type', None) == 'BCP':
-        plugins.append(TorchElasticEnvironment())
-
-    trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
-    exp_manager(trainer, cfg.exp_manager)
-
-    # load existing or init new soft prompt GPT model
-    if cfg.model.get("restore_path", None):
-        model = MegatronT5LoraModel.restore_from(
-            cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector()
-        )
-    else:
-        model = MegatronT5LoraModel(cfg.model, trainer=trainer)
-
-    trainer.fit(model)
-
-
-if __name__ == '__main__':
-    main()