Skip to content

Commit

Permalink
Merge final bugfix r1.3.0 (#2749)
Browse files Browse the repository at this point in the history
* update jenkins branch

Signed-off-by: ericharper <[email protected]>

* update notebooks branch

Signed-off-by: ericharper <[email protected]>

* Replaced unfold() with split_view() (#2671)

* Replaced unfold() with split_view()

Signed-off-by: Boris Fomitchev <[email protected]>

* fixed typo

Signed-off-by: Boris Fomitchev <[email protected]>

Co-authored-by: Somshubra Majumdar <[email protected]>

* Fix issues with ASR notebooks (#2698)

Signed-off-by: smajumdar <[email protected]>

* Allow non divisible split_size (#2699)

* bugfix

Signed-off-by: Jason <[email protected]>

* bugfix

Signed-off-by: Jason <[email protected]>

* Fix the feat_out param. (#2714)

* broken link fix (#2720)

Signed-off-by: nithinraok <[email protected]>

* rename (#2721)

Signed-off-by: fayejf <[email protected]>

* apply fix (#2726)

Signed-off-by: Jason <[email protected]>

* [DOCS] Updating adobe and copyright for docs (#2740)

* update

Signed-off-by: ericharper <[email protected]>

* update

Signed-off-by: ericharper <[email protected]>

* update

Signed-off-by: ericharper <[email protected]>

* update

Signed-off-by: ericharper <[email protected]>

* update

Signed-off-by: ericharper <[email protected]>

* update

Signed-off-by: ericharper <[email protected]>

* update notebook branch

Signed-off-by: ericharper <[email protected]>

* update jenkins branch

Signed-off-by: ericharper <[email protected]>

* update jenkins test to use less memory

Signed-off-by: ericharper <[email protected]>

* update jenkins test to use less memory

Signed-off-by: ericharper <[email protected]>

Co-authored-by: Boris Fomitchev <[email protected]>
Co-authored-by: Somshubra Majumdar <[email protected]>
Co-authored-by: Jason <[email protected]>
Co-authored-by: Vahid Noroozi <[email protected]>
Co-authored-by: Nithin Rao <[email protected]>
Co-authored-by: fayejf <[email protected]>
  • Loading branch information
7 people authored Aug 31, 2021
1 parent 122ee9a commit 234e496
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 68 deletions.
14 changes: 12 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -1246,15 +1246,20 @@ pipeline {
model.shared_tokenizer=False \
model.encoder_tokenizer.library=huggingface \
model.encoder.library=huggingface \
model.encoder.model_name=bert-base-cased \
model.encoder.model_name=distilbert-base-cased \
model.encoder.pretrained=true \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tokens_in_batch=128 \
model.validation_ds.tokens_in_batch=128 \
model.test_ds.tokens_in_batch=128 \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder.hidden_size=128 \
model.decoder.inner_size=256 \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
exp_manager=null \
Expand All @@ -1275,14 +1280,19 @@ pipeline {
model.encoder.model_name=null \
model.encoder.pretrained=false \
+model.encoder._target_=transformers.BertConfig \
+model.encoder.hidden_size=1536 \
+model.encoder.hidden_size=48 \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tokens_in_batch=128 \
model.validation_ds.tokens_in_batch=128 \
model.test_ds.tokens_in_batch=128 \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder.hidden_size=128 \
model.decoder.inner_size=256 \
trainer.gpus=[1] \
+trainer.fast_dev_run=true \
exp_manager=null \
Expand Down
13 changes: 13 additions & 0 deletions docs/source/_templates/layouts.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% extends "!layout.html" %}

{% block extrahead %}

<script src="//assets.adobedtm.com/5d4962a43b79/c1061d2c5e7b/launch-191c2462b890.min.js"></script>

{% endblock %}

{% block footer %}

<script type="text/javascript">_satellite.pageBottom();</script>

{% endblock %}
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@

# General information about the project.
project = "NVIDIA NeMo"
copyright = "2021-, NVIDIA CORPORATION"
copyright = "© 2020-2021 NVIDIA Corporation & Affiliates. All rights reserved."
author = "NVIDIA CORPORATION"

# The version info for the project you're documenting, acts as replacement for
Expand Down
6 changes: 3 additions & 3 deletions nemo/collections/asr/modules/conformer_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def __init__(
)
self._feat_out = d_model
else:
self._feat_out = d_model
self.pre_encode = nn.Linear(feat_in, d_model)
self._feat_out = d_model

if not untie_biases and self_attention_model == "rel_pos":
d_head = d_model // n_heads
Expand Down Expand Up @@ -199,8 +199,8 @@ def __init__(
)
self.layers.append(layer)

if feat_out > 0 and feat_out != self.output_dim:
self.out_proj = nn.Linear(self.feat_out, feat_out)
if feat_out > 0 and feat_out != self._feat_out:
self.out_proj = nn.Linear(self._feat_out, feat_out)
self._feat_out = feat_out
else:
self.out_proj = None
Expand Down
36 changes: 20 additions & 16 deletions nemo/collections/tts/models/talknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,19 +370,23 @@ def list_available_models(cls) -> 'List[PretrainedModelInfo]':
Returns:
List of available pre-trained models.
"""
list_of_models = []
model = PretrainedModelInfo(
pretrained_model_name="tts_en_talknet",
location=(
"https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_talknet/versions/1.0.0rc1/files"
"/talknet_spect.nemo"
),
description=(
"This model is trained on LJSpeech sampled at 22050Hz, and can be used to generate female "
"English voices with an American accent."
),
class_=cls, # noqa
aliases=["TalkNet-22050Hz"],
)
list_of_models.append(model)
return list_of_models
# list_of_models = []
# model = PretrainedModelInfo(
# pretrained_model_name="tts_en_talknet",
# location=(
# "https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_talknet/versions/1.0.0rc1/files"
# "/talknet_spect.nemo"
# ),
# description=(
# "This model is trained on LJSpeech sampled at 22050Hz, and can be used to generate female "
# "English voices with an American accent."
# ),
# class_=cls, # noqa
# aliases=["TalkNet-22050Hz"],
# )
# list_of_models.append(model)
# return list_of_models

# NOTE: TalkNet loading is currently broken in main and newer. Please revert to r1.2.0 if interested in
# TalkNet.
pass
2 changes: 1 addition & 1 deletion tutorials/asr/Intro_to_Transducers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@
"\n",
"The Joint model config has several essential components which we discuss below :\n",
"\n",
"1) `log_softmax`: Due to the cost of computing softmax on such large tensors, the Numba CUDA implementation of RNNT loss will implicitly compute the log softmax when called (so its inputs should be logits). The CPU version of the loss doesn't face such memory issues so it requires log-probabilities instead. Since the behaviour is different for CPU-GPU, the `null` value will automatically switch behaviour dependent on whether the input tensor is on a CPU or GPU device.\n",
"1) `log_softmax`: Due to the cost of computing softmax on such large tensors, the Numba CUDA implementation of RNNT loss will implicitly compute the log softmax when called (so its inputs should be logits). The CPU version of the loss doesn't face such memory issues so it requires log-probabilities instead. Since the behaviour is different for CPU-GPU, the `None` value will automatically switch behaviour dependent on whether the input tensor is on a CPU or GPU device.\n",
"\n",
"2) `preserve_memory`: This flag will call `torch.cuda.empty_cache()` at certain critical sections when computing the Joint tensor. While this operation might allow us to preserve some memory, the empty_cache() operation is tremendously slow and will slow down training by an order of magnitude or more. It is available to use but not recommended.\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions tutorials/asr/Online_ASR_Microphone_Demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"This notebook demonstrates automatic speech recognition (ASR) from a microphone's stream in NeMo.\n",
"\n",
"It is **not a recommended** way to do inference in production workflows. If you are interested in \n",
"production-level inference using NeMo ASR models, please sign-up to Jarvis early access program: https://developer.nvidia.com/nvidia-jarvis"
"production-level inference using NeMo ASR models, please refer to NVIDIA RIVA: https://developer.nvidia.com/riva"
]
},
{
Expand Down Expand Up @@ -537,4 +537,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
2 changes: 1 addition & 1 deletion tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -877,4 +877,4 @@
]
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@
"source": [
"To generate VAD predicted time step. We perform VAD inference to have frame level prediction &#8594; (optional: use decision smoothing) &#8594; given `threshold`, write speech segment to RTTM-like time stamps manifest.\n",
"\n",
"we use vad decision smoothing (87.5% overlap median) as described [here](https://github.com/NVIDIA/NeMo/blob/stable/nemo/collections/asr/parts/vad_utils.py)\n",
"we use vad decision smoothing (87.5% overlap median) as described [here](https://github.com/NVIDIA/NeMo/blob/stable/nemo/collections/asr/parts/utils/vad_utils.py)\n",
"\n",
"you can also tune the threshold on your dev set. Use this provided [script](https://github.com/NVIDIA/NeMo/blob/stable/scripts/voice_activity_detection/vad_tune_threshold.py)"
]
Expand Down
6 changes: 4 additions & 2 deletions tutorials/tts/1_Inference_ModelSelect.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"- [Tacotron 2](https://ngc.nvidia.com/catalog/models/nvidia:nemo:tts_en_tacotron2)\n",
"- [Glow-TTS](https://ngc.nvidia.com/catalog/models/nvidia:nemo:tts_en_glowtts)\n",
"- [TalkNet](https://ngc.nvidia.com/catalog/models/nvidia:nemo:tts_en_talknet)\n",
" - <span style=\"color:red\"> NOTE: TalkNet loading is not working in main. Please use r1.2.0 for TalkNet inference </span>\n",
"- [FastPitch](https://ngc.nvidia.com/catalog/models/nvidia:nemo:tts_en_fastpitch)\n",
"- [FastSpeech2](https://ngc.nvidia.com/catalog/models/nvidia:nemo:tts_en_fastspeech_2)\n",
"\n",
Expand All @@ -93,7 +94,8 @@
"from IPython.display import display\n",
"\n",
"supported_e2e = [\"fastpitch_hifigan\", \"fastspeech2_hifigan\", None]\n",
"supported_spec_gen = [\"tacotron2\", \"glow_tts\", \"talknet\", \"fastpitch\", \"fastspeech2\", None]\n",
"# supported_spec_gen = [\"tacotron2\", \"glow_tts\", \"talknet\", \"fastpitch\", \"fastspeech2\", None]\n",
"supported_spec_gen = [\"tacotron2\", \"glow_tts\", \"fastpitch\", \"fastspeech2\", None]\n",
"supported_audio_gen = [\"waveglow\", \"squeezewave\", \"uniglow\", \"melgan\", \"hifigan\", \"griffin-lim\", None]\n",
"\n",
"print(\"Select the model(s) that you want to use. Please choose either 1 end-to-end model or 1 spectrogram generator and 1 vocoder.\")\n",
Expand Down Expand Up @@ -388,7 +390,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.8.10"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 234e496

Please sign in to comment.