From 08481fd6df280d002c19dcc4fa69ecc65e187b58 Mon Sep 17 00:00:00 2001 From: dimapihtar Date: Fri, 18 Oct 2024 06:25:07 -0700 Subject: [PATCH] remove bart tests Signed-off-by: dimapihtar --- .github/workflows/cicd-main.yml | 241 -------------------------------- 1 file changed, 241 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 75eb71db726b..ad0649687b85 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -1721,104 +1721,6 @@ jobs: model.decoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model AFTER_SCRIPT: | rm -rf examples/nlp/machine_translation/megatron_nmt_results - - L2_Megatron_BART_Perceiver_MIM_Training_TP2: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_BART_Perceiver_MIM_Training_TP2') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - SCRIPT: | - python examples/nlp/language_modeling/megatron_bart_pretraining.py \ - trainer.devices=2 \ - trainer.accelerator=gpu \ - trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ - trainer.limit_val_batches=2 \ - trainer.accumulate_grad_batches=1 \ - trainer.max_steps=10 \ - trainer.precision=16 \ - trainer.gradient_clip_val=1.0 \ - exp_manager.exp_dir=examples/nlp/language_modeling/megatron_mim_results \ - model.tensor_model_parallel_size=2 \ - model.seq_length=128 \ - model.encoder.num_layers=4 \ - model.encoder.hidden_size=64 \ - model.encoder.arch=perceiver \ - model.encoder.num_attention_heads=8 \ - model.encoder.activation="swiglu" \ - model.encoder.masked_softmax_fusion=False \ - model.encoder.bias_activation_fusion=False \ - model.encoder.activations_checkpoint_method="block" \ - model.encoder.activations_checkpoint_num_layers=1 \ - model.decoder.num_layers=2 \ - model.decoder.hidden_size=64 \ - model.decoder.num_attention_heads=8 \ - model.decoder.activation="swiglu" \ - model.decoder.masked_softmax_fusion=False \ - model.decoder.bias_activation_fusion=False \ - model.decoder.activations_checkpoint_method="block" \ - model.decoder.activations_checkpoint_num_layers=1 \ - model.micro_batch_size=2 \ - model.global_batch_size=4 \ - model.data.data_impl=text_mmap \ - model.data.data_prefix=[1.0,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src] \ - model.data.splits_string="\"800,100,100\"" \ - model.data.whole_word_masking=False \ - model.tokenizer.library=sentencepiece \ - model.tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \ - ++model.hiddens.enc_output_name=z \ - ++model.hiddens.transform.q_z_given_x.cls_name=cond_gaussian \ - ++model.hiddens.transform.q_z_given_x.hidden_size=64 \ - ++model.hiddens.loss.mim.cls_name=a_mim \ - ++model.hiddens.loss.mim.loss_weight=0.5 - # Change val_check_interval to 1 for resume as the len(dataloder) is 1 due to max_steps being the same as that of training and Lightning 2.0 raises an error - # if val_check_interval > len(dataloder: https://github.com/Lightning-AI/lightning/blob/2.0.6/src/lightning/pytorch/loops/fit_loop.py#L259 at the beginning of fit_loop.run() - python examples/nlp/language_modeling/megatron_bart_pretraining.py \ - trainer.devices=2 \ - trainer.accelerator=gpu \ - trainer.log_every_n_steps=1 \ - trainer.val_check_interval=1 \ - trainer.limit_val_batches=2 \ - trainer.accumulate_grad_batches=1 \ - trainer.max_steps=10 \ - trainer.precision=16 \ - trainer.gradient_clip_val=1.0 \ - exp_manager.exp_dir=examples/nlp/language_modeling/megatron_mim_results \ - model.tensor_model_parallel_size=2 \ - model.seq_length=128 \ - model.encoder.num_layers=4 \ - model.encoder.hidden_size=64 \ - model.encoder.arch=perceiver \ - model.encoder.num_attention_heads=8 \ - model.encoder.activation="swiglu" \ - model.encoder.masked_softmax_fusion=False \ - model.encoder.bias_activation_fusion=False \ - model.encoder.activations_checkpoint_method="block" \ - model.encoder.activations_checkpoint_num_layers=1 \ - model.decoder.num_layers=2 \ - model.decoder.hidden_size=64 \ - model.decoder.num_attention_heads=8 \ - model.decoder.activation="swiglu" \ - model.decoder.masked_softmax_fusion=False \ - model.decoder.bias_activation_fusion=False \ - model.decoder.activations_checkpoint_method="block" \ - model.decoder.activations_checkpoint_num_layers=1 \ - model.micro_batch_size=2 \ - model.global_batch_size=4 \ - model.data.data_impl=text_mmap \ - model.data.data_prefix=[1.0,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src] \ - model.data.splits_string="\"800,100,100\"" \ - model.data.whole_word_masking=False \ - model.tokenizer.library=sentencepiece \ - model.tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \ - ++model.hiddens.enc_output_name=z \ - ++model.hiddens.transform.q_z_given_x.cls_name=cond_gaussian \ - ++model.hiddens.transform.q_z_given_x.hidden_size=64 \ - ++model.hiddens.loss.mim.cls_name=a_mim \ - ++model.hiddens.loss.mim.loss_weight=0.5 - AFTER_SCRIPT: | - rm -rf examples/nlp/language_modeling/megatron_mim_results L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism: needs: [cicd-test-container-setup] @@ -3615,146 +3517,6 @@ jobs: --prompt "How do I fix my GPU memory issue? I am seeing out of memory." \ --tensor_model_parallel_size 1 - L2_Megatron_BART_Pretraining_and_Resume_Training_TP2: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_BART_Pretraining_and_Resume_Training_TP2') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - SCRIPT: | - python examples/nlp/language_modeling/megatron_bart_pretraining.py \ - trainer.devices=2 \ - trainer.accelerator=gpu \ - trainer.log_every_n_steps=1 \ - trainer.val_check_interval=2 \ - trainer.limit_val_batches=2 \ - trainer.accumulate_grad_batches=1 \ - trainer.max_steps=3 \ - trainer.precision=16 \ - trainer.gradient_clip_val=1.0 \ - exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \ - model.tensor_model_parallel_size=2 \ - model.seq_length=128 \ - model.encoder.num_layers=4 \ - model.encoder.hidden_size=64 \ - model.encoder.num_attention_heads=8 \ - model.encoder.activation="reglu" \ - model.encoder.bias_activation_fusion=False \ - model.encoder.activations_checkpoint_method="block" \ - model.encoder.activations_checkpoint_num_layers=1 \ - model.decoder.num_layers=4 \ - model.decoder.hidden_size=64 \ - model.decoder.num_attention_heads=8 \ - model.decoder.activation="reglu" \ - model.decoder.bias_activation_fusion=False \ - model.decoder.activations_checkpoint_method="block" \ - model.decoder.activations_checkpoint_num_layers=1 \ - model.data.data_prefix="{train:[1.0,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document],test:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document], validation:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document]}" - - python examples/nlp/language_modeling/megatron_bart_pretraining.py \ - trainer.devices=2 \ - trainer.accelerator=gpu \ - trainer.log_every_n_steps=1 \ - trainer.val_check_interval=2 \ - trainer.limit_val_batches=5 \ - trainer.accumulate_grad_batches=1 \ - trainer.max_steps=6 \ - trainer.precision=16 \ - trainer.gradient_clip_val=1.0 \ - exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \ - exp_manager.resume_if_exists=True \ - model.tensor_model_parallel_size=2 \ - model.seq_length=128 \ - model.encoder.num_layers=4 \ - model.encoder.hidden_size=64 \ - model.encoder.num_attention_heads=8 \ - model.encoder.activation="reglu" \ - model.encoder.bias_activation_fusion=False \ - model.encoder.activations_checkpoint_method="block" \ - model.encoder.activations_checkpoint_num_layers=1 \ - model.decoder.num_layers=4 \ - model.decoder.hidden_size=64 \ - model.decoder.num_attention_heads=8 \ - model.decoder.activation="reglu" \ - model.decoder.bias_activation_fusion=False \ - model.decoder.activations_checkpoint_method="block" \ - model.decoder.activations_checkpoint_num_layers=1 \ - model.data.data_prefix="{train:[1.0,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document],test:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document], validation:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document]}" - AFTER_SCRIPT: | - rm -rf examples/nlp/language_modeling/bart_pretrain_results - - L2_Megatron_BART_Pretraining_and_Resume_Training_PP2: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_BART_Pretraining_and_Resume_Training_PP2') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - SCRIPT: | - python examples/nlp/language_modeling/megatron_bart_pretraining.py \ - trainer.devices=2 \ - trainer.accelerator=gpu \ - trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ - trainer.limit_val_batches=2 \ - trainer.accumulate_grad_batches=1 \ - trainer.max_steps=10 \ - trainer.precision=16 \ - trainer.gradient_clip_val=1.0 \ - exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \ - model.pipeline_model_parallel_size=2 \ - model.pipeline_model_parallel_split_rank=1 \ - model.seq_length=256 \ - model.encoder.num_layers=4 \ - model.encoder.hidden_size=64 \ - model.encoder.num_attention_heads=8 \ - model.encoder.activation=geglu \ - model.encoder.bias_activation_fusion=False \ - model.encoder.activations_checkpoint_method=block \ - model.encoder.activations_checkpoint_num_layers=1 \ - model.decoder.num_layers=4 \ - model.decoder.hidden_size=64 \ - model.decoder.num_attention_heads=8 \ - model.decoder.activation=geglu \ - model.decoder.bias_activation_fusion=False \ - model.decoder.activations_checkpoint_method=block \ - model.decoder.activations_checkpoint_num_layers=1 \ - model.data.respect_document_boundaries=False \ - model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] - - python examples/nlp/language_modeling/megatron_bart_pretraining.py \ - trainer.devices=2 \ - trainer.accelerator=gpu \ - trainer.log_every_n_steps=1 \ - trainer.val_check_interval=1 \ - trainer.limit_val_batches=2 \ - trainer.accumulate_grad_batches=1 \ - trainer.max_steps=10 \ - trainer.precision=16 \ - trainer.gradient_clip_val=1.0 \ - exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \ - exp_manager.resume_if_exists=True \ - model.pipeline_model_parallel_size=2 \ - model.pipeline_model_parallel_split_rank=1 \ - model.seq_length=256 \ - model.encoder.num_layers=4 \ - model.encoder.hidden_size=64 \ - model.encoder.num_attention_heads=8 \ - model.encoder.activation=geglu \ - model.encoder.bias_activation_fusion=False \ - model.encoder.activations_checkpoint_method=block \ - model.encoder.activations_checkpoint_num_layers=1 \ - model.decoder.num_layers=4 \ - model.decoder.hidden_size=64 \ - model.decoder.num_attention_heads=8 \ - model.decoder.activation=geglu \ - model.decoder.bias_activation_fusion=False \ - model.decoder.activations_checkpoint_method=block \ - model.decoder.activations_checkpoint_num_layers=1 \ - model.data.respect_document_boundaries=False \ - model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] - AFTER_SCRIPT: | - rm -rf examples/nlp/language_modeling/bart_pretrain_results - L2_Megatron_Core_T5_PEFT_Lora_TP2: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml @@ -4577,7 +4339,6 @@ jobs: - L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation - L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation - L2_Megatron_NMT_Training_TP2 - - L2_Megatron_BART_Perceiver_MIM_Training_TP2 - L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism - L2_Megatron_Core_Bert_Pretraining_and_Resume_Training - L2_RAG_Pipeline_Indexing @@ -4609,8 +4370,6 @@ jobs: - L2_Megatron_T5_w_Mixture_of_Expert_Pretraining - L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2 - L2_Megatron_Core_T5_Eval - - L2_Megatron_BART_Pretraining_and_Resume_Training_TP2 - - L2_Megatron_BART_Pretraining_and_Resume_Training_PP2 - L2_Megatron_Core_T5_PEFT_Lora_TP2 - L2_Megatron_Mock_Data_Generation_MockGPTDataset - L2_Megatron_Mock_Data_Generation_MockT5Dataset