From 6865c39d85c4f3b120a02fe74024ad10d5fab286 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 9 Feb 2024 07:37:16 -0800 Subject: [PATCH] bug fix in fast-conformer-aed.yaml and adding jenkins test for speech_to_text_aed model (#8368) (#8383) Signed-off-by: Krishna Puvvada Co-authored-by: Krishna Puvvada <93558329+krishnacpuvvada@users.noreply.github.com> Co-authored-by: Krishna Puvvada Co-authored-by: Somshubra Majumdar --- Jenkinsfile | 42 +++++++++++++++++++ .../speech_multitask/fast-conformer_aed.yaml | 2 +- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index f0f21170c9cb..474db51efdc8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -605,6 +605,48 @@ pipeline { } + stage('L2: Speech to Text AED') { + when { + anyOf { + branch 'r1.23.0' + changeRequest target: 'r1.23.0' + } + } + steps { + sh 'python examples/asr/speech_multitask/speech_to_text_aed.py \ + model.prompt_format=canary \ + model.model_defaults.asr_enc_hidden=256 \ + model.model_defaults.lm_dec_hidden=256 \ + model.encoder.n_layers=12 \ + model.transf_encoder.num_layers=0 \ + model.transf_decoder.config_dict.num_layers=12 \ + model.train_ds.manifest_filepath=/home/TestData/asr/manifests/canary/an4_canary_train.json \ + ++model.train_ds.is_tarred=false \ + model.train_ds.batch_duration=60 \ + +model.train_ds.text_field="answer" \ + +model.train_ds.lang_field="target_lang" \ + model.validation_ds.manifest_filepath=/home/TestData/asr/manifests/canary/an4_canary_val.json \ + +model.validation_ds.text_field="answer" \ + +model.validation_ds.lang_field="target_lang" \ + model.test_ds.manifest_filepath=/home/TestData/asr/manifests/canary/an4_canary_val.json \ + +model.test_ds.text_field="answer" \ + +model.test_ds.lang_field="target_lang" \ + model.tokenizer.langs.spl_tokens.dir=/home/TestData/asr_tokenizers/canary/canary_spl_tokenizer_v32 \ + model.tokenizer.langs.spl_tokens.type="bpe" \ + model.tokenizer.langs.en.dir=/home/TestData/asr_tokenizers/canary/en/tokenizer_spe_bpe_v1024_max_4 \ + model.tokenizer.langs.en.type=bpe \ + ++model.tokenizer.langs.es.dir=/home/TestData/asr_tokenizers/canary/es/tokenizer_spe_bpe_v1024_max_4 \ + ++model.tokenizer.langs.es.type=bpe \ + trainer.devices=[0] \ + trainer.accelerator="gpu" \ + +trainer.use_distributed_sampler=false \ + +trainer.fast_dev_run=True \ + exp_manager.exp_dir=examples/asr/speech_to_text_aed_results' + sh 'rm -rf examples/asr/speech_to_text_results' + } + + } + stage('L2: Speaker dev run') { when { anyOf { diff --git a/examples/asr/conf/speech_multitask/fast-conformer_aed.yaml b/examples/asr/conf/speech_multitask/fast-conformer_aed.yaml index 295821871a70..f6adc68e8ab4 100644 --- a/examples/asr/conf/speech_multitask/fast-conformer_aed.yaml +++ b/examples/asr/conf/speech_multitask/fast-conformer_aed.yaml @@ -38,7 +38,7 @@ model: # https://github.com/NVIDIA/NeMo/blob/main/docs/source/asr/datasets.rst#lhotse-dataloading # You can also check the following configuration dataclass: # https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/common/data/lhotse/dataloader.py#L36 - batch_size: None + batch_size: null batch_duration: 360 quadratic_duration: 15 use_bucketing: True