Skip to content

Commit

Permalink
remove bart tests
Browse files Browse the repository at this point in the history
Signed-off-by: dimapihtar <[email protected]>
  • Loading branch information
dimapihtar committed Oct 18, 2024
1 parent 435ad24 commit 08481fd
Showing 1 changed file with 0 additions and 241 deletions.
241 changes: 0 additions & 241 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1721,104 +1721,6 @@ jobs:
model.decoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model
AFTER_SCRIPT: |
rm -rf examples/nlp/machine_translation/megatron_nmt_results
L2_Megatron_BART_Perceiver_MIM_Training_TP2:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_BART_Perceiver_MIM_Training_TP2') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
python examples/nlp/language_modeling/megatron_bart_pretraining.py \
trainer.devices=2 \
trainer.accelerator=gpu \
trainer.log_every_n_steps=1 \
trainer.val_check_interval=10 \
trainer.limit_val_batches=2 \
trainer.accumulate_grad_batches=1 \
trainer.max_steps=10 \
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/megatron_mim_results \
model.tensor_model_parallel_size=2 \
model.seq_length=128 \
model.encoder.num_layers=4 \
model.encoder.hidden_size=64 \
model.encoder.arch=perceiver \
model.encoder.num_attention_heads=8 \
model.encoder.activation="swiglu" \
model.encoder.masked_softmax_fusion=False \
model.encoder.bias_activation_fusion=False \
model.encoder.activations_checkpoint_method="block" \
model.encoder.activations_checkpoint_num_layers=1 \
model.decoder.num_layers=2 \
model.decoder.hidden_size=64 \
model.decoder.num_attention_heads=8 \
model.decoder.activation="swiglu" \
model.decoder.masked_softmax_fusion=False \
model.decoder.bias_activation_fusion=False \
model.decoder.activations_checkpoint_method="block" \
model.decoder.activations_checkpoint_num_layers=1 \
model.micro_batch_size=2 \
model.global_batch_size=4 \
model.data.data_impl=text_mmap \
model.data.data_prefix=[1.0,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src] \
model.data.splits_string="\"800,100,100\"" \
model.data.whole_word_masking=False \
model.tokenizer.library=sentencepiece \
model.tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \
++model.hiddens.enc_output_name=z \
++model.hiddens.transform.q_z_given_x.cls_name=cond_gaussian \
++model.hiddens.transform.q_z_given_x.hidden_size=64 \
++model.hiddens.loss.mim.cls_name=a_mim \
++model.hiddens.loss.mim.loss_weight=0.5
# Change val_check_interval to 1 for resume as the len(dataloder) is 1 due to max_steps being the same as that of training and Lightning 2.0 raises an error
# if val_check_interval > len(dataloder: https://github.com/Lightning-AI/lightning/blob/2.0.6/src/lightning/pytorch/loops/fit_loop.py#L259 at the beginning of fit_loop.run()
python examples/nlp/language_modeling/megatron_bart_pretraining.py \
trainer.devices=2 \
trainer.accelerator=gpu \
trainer.log_every_n_steps=1 \
trainer.val_check_interval=1 \
trainer.limit_val_batches=2 \
trainer.accumulate_grad_batches=1 \
trainer.max_steps=10 \
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/megatron_mim_results \
model.tensor_model_parallel_size=2 \
model.seq_length=128 \
model.encoder.num_layers=4 \
model.encoder.hidden_size=64 \
model.encoder.arch=perceiver \
model.encoder.num_attention_heads=8 \
model.encoder.activation="swiglu" \
model.encoder.masked_softmax_fusion=False \
model.encoder.bias_activation_fusion=False \
model.encoder.activations_checkpoint_method="block" \
model.encoder.activations_checkpoint_num_layers=1 \
model.decoder.num_layers=2 \
model.decoder.hidden_size=64 \
model.decoder.num_attention_heads=8 \
model.decoder.activation="swiglu" \
model.decoder.masked_softmax_fusion=False \
model.decoder.bias_activation_fusion=False \
model.decoder.activations_checkpoint_method="block" \
model.decoder.activations_checkpoint_num_layers=1 \
model.micro_batch_size=2 \
model.global_batch_size=4 \
model.data.data_impl=text_mmap \
model.data.data_prefix=[1.0,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src] \
model.data.splits_string="\"800,100,100\"" \
model.data.whole_word_masking=False \
model.tokenizer.library=sentencepiece \
model.tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \
++model.hiddens.enc_output_name=z \
++model.hiddens.transform.q_z_given_x.cls_name=cond_gaussian \
++model.hiddens.transform.q_z_given_x.hidden_size=64 \
++model.hiddens.loss.mim.cls_name=a_mim \
++model.hiddens.loss.mim.loss_weight=0.5
AFTER_SCRIPT: |
rm -rf examples/nlp/language_modeling/megatron_mim_results
L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -3615,146 +3517,6 @@ jobs:
--prompt "How do I fix my GPU memory issue? I am seeing <mask> out of memory." \
--tensor_model_parallel_size 1
L2_Megatron_BART_Pretraining_and_Resume_Training_TP2:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_BART_Pretraining_and_Resume_Training_TP2') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
python examples/nlp/language_modeling/megatron_bart_pretraining.py \
trainer.devices=2 \
trainer.accelerator=gpu \
trainer.log_every_n_steps=1 \
trainer.val_check_interval=2 \
trainer.limit_val_batches=2 \
trainer.accumulate_grad_batches=1 \
trainer.max_steps=3 \
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \
model.tensor_model_parallel_size=2 \
model.seq_length=128 \
model.encoder.num_layers=4 \
model.encoder.hidden_size=64 \
model.encoder.num_attention_heads=8 \
model.encoder.activation="reglu" \
model.encoder.bias_activation_fusion=False \
model.encoder.activations_checkpoint_method="block" \
model.encoder.activations_checkpoint_num_layers=1 \
model.decoder.num_layers=4 \
model.decoder.hidden_size=64 \
model.decoder.num_attention_heads=8 \
model.decoder.activation="reglu" \
model.decoder.bias_activation_fusion=False \
model.decoder.activations_checkpoint_method="block" \
model.decoder.activations_checkpoint_num_layers=1 \
model.data.data_prefix="{train:[1.0,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document],test:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document], validation:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document]}"
python examples/nlp/language_modeling/megatron_bart_pretraining.py \
trainer.devices=2 \
trainer.accelerator=gpu \
trainer.log_every_n_steps=1 \
trainer.val_check_interval=2 \
trainer.limit_val_batches=5 \
trainer.accumulate_grad_batches=1 \
trainer.max_steps=6 \
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \
exp_manager.resume_if_exists=True \
model.tensor_model_parallel_size=2 \
model.seq_length=128 \
model.encoder.num_layers=4 \
model.encoder.hidden_size=64 \
model.encoder.num_attention_heads=8 \
model.encoder.activation="reglu" \
model.encoder.bias_activation_fusion=False \
model.encoder.activations_checkpoint_method="block" \
model.encoder.activations_checkpoint_num_layers=1 \
model.decoder.num_layers=4 \
model.decoder.hidden_size=64 \
model.decoder.num_attention_heads=8 \
model.decoder.activation="reglu" \
model.decoder.bias_activation_fusion=False \
model.decoder.activations_checkpoint_method="block" \
model.decoder.activations_checkpoint_num_layers=1 \
model.data.data_prefix="{train:[1.0,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document],test:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document], validation:[/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document]}"
AFTER_SCRIPT: |
rm -rf examples/nlp/language_modeling/bart_pretrain_results
L2_Megatron_BART_Pretraining_and_Resume_Training_PP2:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_BART_Pretraining_and_Resume_Training_PP2') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
python examples/nlp/language_modeling/megatron_bart_pretraining.py \
trainer.devices=2 \
trainer.accelerator=gpu \
trainer.log_every_n_steps=1 \
trainer.val_check_interval=10 \
trainer.limit_val_batches=2 \
trainer.accumulate_grad_batches=1 \
trainer.max_steps=10 \
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \
model.pipeline_model_parallel_size=2 \
model.pipeline_model_parallel_split_rank=1 \
model.seq_length=256 \
model.encoder.num_layers=4 \
model.encoder.hidden_size=64 \
model.encoder.num_attention_heads=8 \
model.encoder.activation=geglu \
model.encoder.bias_activation_fusion=False \
model.encoder.activations_checkpoint_method=block \
model.encoder.activations_checkpoint_num_layers=1 \
model.decoder.num_layers=4 \
model.decoder.hidden_size=64 \
model.decoder.num_attention_heads=8 \
model.decoder.activation=geglu \
model.decoder.bias_activation_fusion=False \
model.decoder.activations_checkpoint_method=block \
model.decoder.activations_checkpoint_num_layers=1 \
model.data.respect_document_boundaries=False \
model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document]
python examples/nlp/language_modeling/megatron_bart_pretraining.py \
trainer.devices=2 \
trainer.accelerator=gpu \
trainer.log_every_n_steps=1 \
trainer.val_check_interval=1 \
trainer.limit_val_batches=2 \
trainer.accumulate_grad_batches=1 \
trainer.max_steps=10 \
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/bart_pretrain_results \
exp_manager.resume_if_exists=True \
model.pipeline_model_parallel_size=2 \
model.pipeline_model_parallel_split_rank=1 \
model.seq_length=256 \
model.encoder.num_layers=4 \
model.encoder.hidden_size=64 \
model.encoder.num_attention_heads=8 \
model.encoder.activation=geglu \
model.encoder.bias_activation_fusion=False \
model.encoder.activations_checkpoint_method=block \
model.encoder.activations_checkpoint_num_layers=1 \
model.decoder.num_layers=4 \
model.decoder.hidden_size=64 \
model.decoder.num_attention_heads=8 \
model.decoder.activation=geglu \
model.decoder.bias_activation_fusion=False \
model.decoder.activations_checkpoint_method=block \
model.decoder.activations_checkpoint_num_layers=1 \
model.data.respect_document_boundaries=False \
model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document]
AFTER_SCRIPT: |
rm -rf examples/nlp/language_modeling/bart_pretrain_results
L2_Megatron_Core_T5_PEFT_Lora_TP2:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
Expand Down Expand Up @@ -4577,7 +4339,6 @@ jobs:
- L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation
- L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation
- L2_Megatron_NMT_Training_TP2
- L2_Megatron_BART_Perceiver_MIM_Training_TP2
- L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism
- L2_Megatron_Core_Bert_Pretraining_and_Resume_Training
- L2_RAG_Pipeline_Indexing
Expand Down Expand Up @@ -4609,8 +4370,6 @@ jobs:
- L2_Megatron_T5_w_Mixture_of_Expert_Pretraining
- L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2
- L2_Megatron_Core_T5_Eval
- L2_Megatron_BART_Pretraining_and_Resume_Training_TP2
- L2_Megatron_BART_Pretraining_and_Resume_Training_PP2
- L2_Megatron_Core_T5_PEFT_Lora_TP2
- L2_Megatron_Mock_Data_Generation_MockGPTDataset
- L2_Megatron_Mock_Data_Generation_MockT5Dataset
Expand Down

0 comments on commit 08481fd

Please sign in to comment.