NVIDIA · XuesongYang · Apr 17, 2023 · Apr 11, 2023 · Apr 11, 2023
diff --git a/scripts/dataset_processing/tts/ljspeech/get_data.py b/scripts/dataset_processing/tts/ljspeech/get_data.py
@@ -27,7 +27,11 @@
 def get_args():
     parser = argparse.ArgumentParser(description='Download LJSpeech and create manifests with predefined split')
     parser.add_argument("--data-root", required=True, type=Path)
-    parser.add_argument('--whitelist-path', type=str, default="lj_speech.tsv")
+    parser.add_argument(
+        '--whitelist-path',
+        type=str,
+        default="lj_speech.tsv extracted from the readme file in the dataset. You can also download the file from https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv",
+    )
 
     args = parser.parse_args()
     return args
@@ -56,7 +60,7 @@ def __extract_file(filepath, data_dir):
 def __process_data(data_root, whitelist_path):
     if whitelist_path is None:
         wget.download(
-            "https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tts_dataset_files/en/whitelist_lj_speech.tsv",
+            "https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv",
             out=str(data_root),
         )
         whitelist_path = data_root / "lj_speech.tsv"