Skip to content

Commit

Permalink
Fix typos (#6494) (#6495)
Browse files Browse the repository at this point in the history
Signed-off-by: smajumdar <[email protected]>
Co-authored-by: Somshubra Majumdar <[email protected]>
  • Loading branch information
2 people authored and yaoyu-33 committed May 26, 2023
1 parent c34f647 commit 26eae0a
Show file tree
Hide file tree
Showing 12 changed files with 27 additions and 27 deletions.
2 changes: 1 addition & 1 deletion tutorials/VoiceSwapSample.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
},
"outputs": [],
"source": [
"BRANCH = 'r1.17.0'\n",
"BRANCH = 'r1.18.0'\n",
"!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n"
]
},
Expand Down
16 changes: 8 additions & 8 deletions tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@
" \"\"\"\n",
" Longest Common Subsequence merge algorithm for aligning two consecutive buffers.\n",
"\n",
" Base alignment construction algorithm is Longest Common Subsequence (reffered to as LCS hear after)\n",
" Base alignment construction algorithm is Longest Common Subsequence (referred to as LCS hear after)\n",
"\n",
" LCS Merge algorithm looks at two chunks i-1 and i, determines the aligned overlap at the\n",
" end of i-1 and beginning of ith chunk, and then clips the subsegment of the ith chunk.\n",
Expand Down Expand Up @@ -467,7 +467,7 @@
" j_temp = j_partial + 1 # diagonal next j\n",
"\n",
" j_exp = 0 # number of tokens to expand along the diagonal\n",
" j_skip = 0 # how many diagonals didnt have the token. Incremented by 1 for every row i\n",
" j_skip = 0 # how many diagonals didn't have the token. Incremented by 1 for every row i\n",
"\n",
" for i_idx in range(i_temp, m + 1): # walk from i_partial + 1 => m + 1\n",
" j_any_skip = 0 # If the diagonal element at this location is not found, set to 1\n",
Expand Down Expand Up @@ -496,13 +496,13 @@
" j_partial += j_exp\n",
"\n",
" # (3) Given new leftmost j_partial with expansions, backtrack the partial alignments\n",
" # counting how many diagonal skips occured to compute slice length\n",
" # counting how many diagonal skips occurred to compute slice length\n",
" # as well as starting point of slice.\n",
"\n",
" # Partial backward trace to find start of slice\n",
" while i_partial > 0 and j_partial > 0:\n",
" if LCSuff[i_partial][j_partial] == 0:\n",
" # diagonal skip occured, move j to left 1 extra time\n",
" # diagonal skip occurred, move j to left 1 extra time\n",
" j_partial -= 1\n",
" j_skip += 1\n",
"\n",
Expand Down Expand Up @@ -531,7 +531,7 @@
" \"slice_idx\": result_idx,\n",
" }\n",
" write_lcs_alignment_to_pickle(LCSuff, filepath=filepath, extras=extras)\n",
" print(\"Wrote alignemnt to :\", filepath)\n",
" print(\"Wrote alignment to :\", filepath)\n",
"\n",
" return result_idx, LCSuff\n",
"\n",
Expand Down Expand Up @@ -664,7 +664,7 @@
" ):\n",
" if self.lcs_delay < 0:\n",
" raise ValueError(\n",
" \"Please set LCS Delay valus as `(buffer_duration - chunk_duration) / model_stride_in_secs`\"\n",
" \"Please set LCS Delay values as `(buffer_duration - chunk_duration) / model_stride_in_secs`\"\n",
" )\n",
"\n",
" self.infer_logits()\n",
Expand Down Expand Up @@ -1216,7 +1216,7 @@
"worse_idx = find_first_sample_with_alignment(lcs_alignments_path, lcs_worse, start_idx=0)\n",
"worse_sample = lcs_worse[worse_idx]\n",
"\n",
"print(\"A sample where LCS did worse than Middle Token merge algoritm :\")\n",
"print(\"A sample where LCS did worse than Middle Token merge algorithm :\")\n",
"print(\"The texts are structured as (Ground Truth, Middle Token, LCS Merge)\")\n",
"worse_sample"
],
Expand Down Expand Up @@ -1256,7 +1256,7 @@
"better_idx = find_first_sample_with_alignment(lcs_alignments_path, lcs_better, start_idx=0)\n",
"better_sample = lcs_better[better_idx]\n",
"\n",
"print(\"A sample where LCS did better than Middle Token merge algoritm :\")\n",
"print(\"A sample where LCS did better than Middle Token merge algorithm :\")\n",
"print(\"The texts are structured as (Ground Truth, Middle Token, LCS Merge)\")\n",
"better_sample"
],
Expand Down
2 changes: 1 addition & 1 deletion tutorials/nlp/Entity_Linking_Medical.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@
" topk_idxs = np.argpartition(query_scores, -k)[-k:]\n",
" topk_cids = [test_kb_cids[idx] for idx in topk_idxs]\n",
" \n",
" # If the correct query ID is amoung the top k closest kb IDs\n",
" # If the correct query ID is among the top k closest kb IDs\n",
" # the model correctly linked the entity\n",
" match = int(query_cid in topk_cids)\n",
" accs[k] += match\n",
Expand Down
2 changes: 1 addition & 1 deletion tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"import os\n",
"\n",
"# install NeMo\n",
"BRANCH = 'r1.17.0'\n",
"BRANCH = 'r1.18.0'\n",
"\n",
"GITHUB_ACCOUNT = 'NVIDIA' # change this if using a fork\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@
" full_msgs.append(delimiter.join(str_items)+eod_str)\n",
" else:\n",
" full_msgs.append(delimiter.join(str_items))\n",
" # use end of line to seperate rows\n",
" # use end of line to separate rows\n",
" text = '\\n'.join(full_msgs)\n",
" text_doc = {'text': text}\n",
" doc = json.dumps(text_doc)+'\\n'\n",
Expand Down Expand Up @@ -739,7 +739,7 @@
" '\\n')[history_rows:]]) for s in sentences]\n",
" return extra_text\n",
"\n",
"# generate the inital transactions \n",
"# generate the initial transactions \n",
"data = {\n",
" \"sentences\": [\"\"] * batch_size,\n",
" \"tokens_to_generate\": num_of_rows * token_per_rows,\n",
Expand Down
2 changes: 1 addition & 1 deletion tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@
"### Prompt Formatting\n",
"Now that we have our dataset, lets define what we want the prompt to look like. \n",
"\n",
"The squad dataset json files contain fields named \"context\", \"question\" and \"answer\". The prompt formatting template allows us to arrange these fields and decide where to insert virtual prompts. We can add the `<|VIRTUAL_PROMPT_0|>` token anywere between the fields (although we recommend simply adding it in the leftmost position will be sufficient).\n",
"The squad dataset json files contain fields named \"context\", \"question\" and \"answer\". The prompt formatting template allows us to arrange these fields and decide where to insert virtual prompts. We can add the `<|VIRTUAL_PROMPT_0|>` token anywhere between the fields (although we recommend simply adding it in the leftmost position will be sufficient).\n",
"\n",
"For example, given a data jsonl file with examples like this: \n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions tutorials/nlp/Token_Classification-BioMegatron.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@
"metadata": {},
"outputs": [],
"source": [
"# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n",
"# in this tutorial train and dev datasets are located in the same folder, so it is enough to add the path of the data directory to the config\n",
"config.model.dataset.data_dir = os.path.join(DATA_DIR, 'NER')\n",
"\n",
"# if you want to decrease the size of your datasets, uncomment the lines below:\n",
Expand Down Expand Up @@ -385,7 +385,7 @@
"metadata": {},
"outputs": [],
"source": [
"# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n",
"# in this tutorial train and dev datasets are located in the same folder, so it is enough to add the path of the data directory to the config\n",
"config.model.dataset.data_dir = os.path.join(DATA_DIR, 'NER')\n",
"\n",
"# if you want to decrease the size of your datasets, uncomment the lines below:\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@
"colab": {}
},
"source": [
"# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n",
"# in this tutorial train and dev datasets are located in the same folder, so it is enough to add the path of the data directory to the config\n",
"config.model.dataset.data_dir = DATA_DIR\n",
"\n",
"# if you want to use the full dataset, set NUM_SAMPLES to -1\n",
Expand Down
8 changes: 4 additions & 4 deletions tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"This tutorial covers speaker diarization inference. We will cover how to setup configurations and launch NeMo speaker diarization system with a few different settings. NeMo speaker diarization pipline includes the following steps as described in the above figure: VAD, Segmentation, Speaker Embedding Extraction, Clustering and Neural Diarizer. We will explain what each module does and we will run NeMo speaker diarization system on a small toy example. "
"This tutorial covers speaker diarization inference. We will cover how to setup configurations and launch NeMo speaker diarization system with a few different settings. NeMo speaker diarization pipeline includes the following steps as described in the above figure: VAD, Segmentation, Speaker Embedding Extraction, Clustering and Neural Diarizer. We will explain what each module does and we will run NeMo speaker diarization system on a small toy example. "
]
},
{
Expand Down Expand Up @@ -112,7 +112,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The information from each scale is combined by calculating the weighted sum of affininty matrix. An affinity matrix is calculated by cosine similarity value between all the segments (and corresponding embedding vectors) in that scale. Once affinity matrix for each sacle is calculated, we calculate a weighted sum on all the affinity matrices calculated as in the below figure."
"The information from each scale is combined by calculating the weighted sum of affininty matrix. An affinity matrix is calculated by cosine similarity value between all the segments (and corresponding embedding vectors) in that scale. Once affinity matrix for each scale is calculated, we calculate a weighted sum on all the affinity matrices calculated as in the below figure."
]
},
{
Expand Down Expand Up @@ -171,7 +171,7 @@
"metadata": {},
"source": [
"#### Neural Diarizer\n",
"In NeMo speaker diarization pipeline, the term **neural diarizer** referes to trainable neural modules that estimate speaker labels from the given feature or audio input. Neural diarizer contrasts with **clustering diarizer** in a way that clustering diarizer is not a trainable module. Neural diarizer is needed to enable overlap-aware diarization, more improved accucy and joint training with speaker embedding models using multispeaker datasets (diarization training datasets).\n",
"In NeMo speaker diarization pipeline, the term **neural diarizer** refers to trainable neural modules that estimate speaker labels from the given feature or audio input. Neural diarizer contrasts with **clustering diarizer** in a way that clustering diarizer is not a trainable module. Neural diarizer is needed to enable overlap-aware diarization, more improved accucy and joint training with speaker embedding models using multispeaker datasets (diarization training datasets).\n",
"\n",
"#### Multi-scale Diarization Decoder (MSDD)\n",
"Currently, you can use Multi-scale Diarization Decoder (MSDD) model as a neural diarizer. MSDD models use clustering diarizer for obtaining the estimated speaker profile of each speaker and the estimated number of speakers. The below figure shows training and inference of MSDD model."
Expand Down Expand Up @@ -375,7 +375,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Using **oracle VAD** for speaker diarization can be regarded as performing a diarization inference based on ground-truth speech/non-speech labels. The motivation behind using oracle-VAD is to factor out the influence of VAD performane when we evaluate a speaker diarization system. Speaker diarization with oracle-VAD can also be used to run speaker diarization with rttms generated from any external VAD, not just VAD model from NeMo.\n",
"Using **oracle VAD** for speaker diarization can be regarded as performing a diarization inference based on ground-truth speech/non-speech labels. The motivation behind using oracle-VAD is to factor out the influence of VAD performance when we evaluate a speaker diarization system. Speaker diarization with oracle-VAD can also be used to run speaker diarization with rttms generated from any external VAD, not just VAD model from NeMo.\n",
"\n",
"The first step is to start converting reference audio RTTM file (containing VAD output) timestamps to oracle manifest file. This manifest file would be sent to our speaker diarizer to extract embeddings.\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions tutorials/tts/FastPitch_ChineseTTS_Training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@
"1. `audio_filepath`: location of the wav file;\n",
"2. `duration`: duration of the wav file;\n",
"3. `text`: original text;\n",
"4. `normalized_text`: normalized text through our text normalization pipline.\n",
"4. `normalized_text`: normalized text through our text normalization pipeline.\n",
" \n",
"Please refer to [sfspeech-chinese-english-bilingual-speech](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#sfspeech-chinese-english-bilingual-speech) for more details about the SFSpeech dataset. \n",
"\n",
Expand Down Expand Up @@ -440,7 +440,7 @@
"id": "35f2f667",
"metadata": {},
"source": [
"Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to overide `pitch_mean` and `pitch_std` configs below."
"Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to override `pitch_mean` and `pitch_std` configs below."
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions tutorials/tts/FastPitch_GermanTTS_Training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,11 @@
"1. `audio_filepath`: location of the wav file;\n",
"2. `duration`: duration of the wav file;\n",
"3. `text`: original text;\n",
"4. `normalized_text`: normalized text through our text normalization pipline.\n",
"4. `normalized_text`: normalized text through our text normalization pipeline.\n",
" \n",
"This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n",
"\n",
"You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to dowload and normalize the entire dataset."
"You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to download and normalize the entire dataset."
]
},
{
Expand Down Expand Up @@ -316,7 +316,7 @@
"id": "d4364261",
"metadata": {},
"source": [
"Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to overide `pitch_mean` and `pitch_std` configs below."
"Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to override `pitch_mean` and `pitch_std` configs below."
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion tutorials/tts/Inference_DurationPitchControl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@
" spec_shift, audio_shift, durs_shift_pred, _ = str_to_audio(input_string, pitch=pitch_shift)\n",
" # NOTE: We do not plot the pitch returned from str_to_audio.\n",
" # When we override the pitch, we want to plot the pitch that override the model with.\n",
" # In thise case, it is `pitch_shift`\n",
" # In these case, it is `pitch_shift`\n",
"\n",
"# Let's see both results\n",
"print(\"The first unshifted sample\")\n",
Expand Down

0 comments on commit 26eae0a

Please sign in to comment.