Fix various issues with broken links and bugs (#8064)

* Fix ASR CTC notebook to use HuggingFace Datasets Signed-off-by: smajumdar <[email protected]> * Various fixes Signed-off-by: smajumdar <[email protected]> --------- Signed-off-by: smajumdar <[email protected]>
NVIDIA · Dec 20, 2023 · 8785bb3 · 8785bb3
1 parent 39d883f
commit 8785bb3
Show file tree

Hide file tree

Showing 9 changed files with 124 additions and 61 deletions.
diff --git a/README.rst b/README.rst
@@ -124,7 +124,7 @@ Key Features
     * `GLUE benchmark <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/glue_benchmark.html>`_
     * `Information retrieval <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/information_retrieval.html>`_
     * `Entity Linking <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/entity_linking.html>`_
-    * `Dialogue State Tracking <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/sgd_qa.html>`_
+    * `Dialogue State Tracking <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/nlp/dialogue.html>`_
     * `Prompt Learning <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html>`_
     * `NGC collection of pre-trained NLP models. <https://ngc.nvidia.com/catalog/collections/nvidia:nemo_nlp>`_
     * `Synthetic Tabular Data Generation <https://developer.nvidia.com/blog/generating-synthetic-data-with-transformers-a-solution-for-enterprise-data-challenges/>`_

diff --git a/docs/source/asr/asr_language_modeling.rst b/docs/source/asr/asr_language_modeling.rst
@@ -440,7 +440,7 @@ works in lexicon decoding mode, it does not work in lexicon-free mode. Word boos
 such that you can manually increase or decrease the probability of emitting certain words. This can be very helpful if you have certain
 uncommon or industry-specific words which you want to ensure transcribe correctly.
 
-For more information on word boosting, see `here <https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tutorials/asr-python-advanced-wordboosting.html>`__
+For more information on word boosting, see `here <https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-customizing.html#word-boosting>`__
 and `here <https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-customizing.html#word-boosting>`__
 
 In order to use word boosting in Nemo, you need to create a simple tab-separated text file which contains each word to be boosted, followed by

diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
diff --git a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb
@@ -212,7 +212,7 @@
         "if not os.path.exists(get_librispeech_script):\n",
         "    !wget -P $scripts_dir https://raw.githubusercontent.com/{GIT_USER}/NeMo/{GIT_BRANCH}/scripts/dataset_processing/get_librispeech_data.py\n",
         "\n",
-        "# Dowload the data\n",
+        "# Download the data\n",
         "if not speech_dir.is_dir():\n",
         "    speech_dir.mkdir(exist_ok=True)\n",
         "    !python {get_librispeech_script} --data_root={speech_dir} --data_set={speech_data_set}\n",
@@ -1037,7 +1037,7 @@
       },
       "outputs": [],
       "source": [
-        "# Asign equal weights to speech and noise loss\n",
+        "# Assign equal weights to speech and noise loss\n",
         "#   total_loss = 0.5 * speech_loss + 0.5 * noise_loss\n",
         "config_dual_output.model.loss.weight = [0.5, 0.5]"
       ]

diff --git a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
@@ -296,7 +296,7 @@
         "    with open(\"wordlist.txt\", \"r\", encoding=\"utf-8\") as f:\n",
         "        for line in f:\n",
         "            word = line.strip().casefold()\n",
-        "            # skip words contaning digits\n",
+        "            # skip words containing digits\n",
         "            if re.match(r\".*\\d.*\", word):\n",
         "                continue\n",
         "            if re.match(r\".*[\\[\\]\\(\\)\\+\\,\\.].*\", word):\n",
@@ -335,10 +335,10 @@
         "                sent = re.sub(r\"\\(.+\\)\", r\"\", sent)\n",
         "                # remove quotes from text\n",
         "                sent = sent.replace(\"\\\"\", \"\")\n",
-        "                # skip sentences contaning digits because normalization is out of scope of this tutorial\n",
+        "                # skip sentences containing digits because normalization is out of scope of this tutorial\n",
         "                if re.match(r\".*\\d.*\", sent):\n",
         "                    continue\n",
-        "                # skip sentences contaning abbreviations with period inside the sentence (for the same reason)\n",
+        "                # skip sentences containing abbreviations with period inside the sentence (for the same reason)\n",
         "                if \". \" in sent:\n",
         "                    continue\n",
         "                # skip long sentences as they may cause OOM issues\n",

diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb
@@ -61,7 +61,7 @@
     "# !pip install wget text-unidecode matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'r1.21.0'\n",
+    "BRANCH = 'r1.22.0'\n",
     "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\"\n",
     "\n",
     "# !pip install opencc-python-reimplemented\n",

diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
@@ -61,7 +61,7 @@
     "# !pip install wget text-unidecode matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'r1.21.0'\n",
+    "BRANCH = 'r1.22.0'\n",
     "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\"\n",
     "\n",
     "\"\"\"\n",

diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb
@@ -1993,7 +1993,7 @@
     "*   Running pretrained models: [Inference_ModelSelect](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n",
     "*   FastPitch [training](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) and [fine-tuning](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/tts/FastPitch_Finetuning.ipynb)\n",
     "\n",
-    "To learn how to deploy and serve your TTS models, visit [Riva](https://docs.nvidia.com/deeplearning/riva/index.html)."
+    "To learn how to deploy and serve your TTS models, visit [Riva](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/index.html)."
    ]
   },
   {

diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb
@@ -63,7 +63,7 @@
     "# !pip install wget text-unidecode matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'r1.21.0'\n",
+    "BRANCH = 'r1.22.0'\n",
     "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\"\n",
     "\n",
     "\"\"\"\n",