[TTS] fixed wrong pronunciations for r1.11. (#4677)

* [TTS] fixed wrong pronunciations. Signed-off-by: Xuesong Yang <[email protected]> * incremented the version number to 22.08 as @blisc suggested. Signed-off-by: Xuesong Yang <[email protected]> * correct cmudict versions in world-wide places. Signed-off-by: Xuesong Yang <[email protected]>
NVIDIA · Sep 10, 2022 · fec1fb5 · fec1fb5
1 parent 669fd89
commit fec1fb5
Show file tree

Hide file tree

Showing 10 changed files with 72,785 additions and 72,409 deletions.
diff --git a/examples/tts/conf/fastpitch_align_44100.yaml b/examples/tts/conf/fastpitch_align_44100.yaml
@@ -67,14 +67,14 @@ model:
     punct_post_process: true
 
   text_tokenizer:
-    _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer
+    _target_: nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
     punct: true
     stresses: true
     chars: true
     apostrophe: true
     pad_with_space: true
     g2p:
-      _target_: nemo_text_processing.g2p.modules.EnglishG2p
+      _target_: nemo.collections.tts.torch.g2ps.EnglishG2p
       phoneme_dict: ${phoneme_dict_path}
       heteronyms: ${heteronyms_path}
       phoneme_probability: 0.5

diff --git a/examples/tts/conf/fastpitch_align_v1.05.yaml b/examples/tts/conf/fastpitch_align_v1.05.yaml
@@ -67,14 +67,14 @@ model:
     punct_post_process: true
 
   text_tokenizer:
-    _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer
+    _target_: nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
     punct: true
     stresses: true
     chars: true
     apostrophe: true
     pad_with_space: true
     g2p:
-      _target_: nemo_text_processing.g2p.modules.EnglishG2p
+      _target_: nemo.collections.tts.torch.g2ps.EnglishG2p
       phoneme_dict: ${phoneme_dict_path}
       heteronyms: ${heteronyms_path}
       phoneme_probability: 0.5

diff --git a/examples/tts/conf/mixer-tts.yaml b/examples/tts/conf/mixer-tts.yaml
@@ -68,14 +68,14 @@ model:
     punct_post_process: true
 
   text_tokenizer:
-    _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer
+    _target_: nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
     punct: true
     stresses: true
     chars: true
     apostrophe: true
     pad_with_space: true
     g2p:
-      _target_: nemo_text_processing.g2p.modules.EnglishG2p
+      _target_: nemo.collections.tts.torch.g2ps.EnglishG2p
       phoneme_dict: ${phoneme_dict_path}
       heteronyms: ${heteronyms_path}
 

diff --git a/examples/tts/conf/tacotron2.yaml b/examples/tts/conf/tacotron2.yaml
@@ -42,14 +42,14 @@ model:
     punct_post_process: true
 
   text_tokenizer:
-    _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer
+    _target_: nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
     punct: true
     stresses: true
     chars: true
     apostrophe: true
     pad_with_space: true
     g2p:
-      _target_: nemo_text_processing.g2p.modules.EnglishG2p
+      _target_: nemo.collections.tts.torch.g2ps.EnglishG2p
       phoneme_dict: ${phoneme_dict_path}
       heteronyms: ${heteronyms_path}
 

diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py
@@ -46,15 +46,15 @@
 
 @dataclass
 class G2PConfig:
-    _target_: str = "nemo_text_processing.g2p.modules.EnglishG2p"
+    _target_: str = "nemo.collections.tts.torch.g2ps.EnglishG2p"
     phoneme_dict: str = "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
     heteronyms: str = "scripts/tts_dataset_files/heteronyms-052722"
     phoneme_probability: float = 0.5
 
 
 @dataclass
 class TextTokenizer:
-    _target_: str = "nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer"
+    _target_: str = "nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer"
     punct: bool = True
     stresses: bool = True
     chars: bool = True
@@ -181,16 +181,6 @@ def _setup_normalizer(self, cfg):
 
     def _setup_tokenizer(self, cfg):
         text_tokenizer_kwargs = {}
-
-        if "phoneme_dict" in cfg.text_tokenizer:
-            text_tokenizer_kwargs["phoneme_dict"] = self.register_artifact(
-                "text_tokenizer.phoneme_dict", cfg.text_tokenizer.phoneme_dict,
-            )
-        if "heteronyms" in cfg.text_tokenizer:
-            text_tokenizer_kwargs["heteronyms"] = self.register_artifact(
-                "text_tokenizer.heteronyms", cfg.text_tokenizer.heteronyms,
-            )
-
         if "g2p" in cfg.text_tokenizer:
             g2p_kwargs = {}