diff --git a/examples/tts/conf/fastpitch_align_44100.yaml b/examples/tts/conf/fastpitch_align_44100.yaml
index 95442a0eb44f..f8719325c088 100644
--- a/examples/tts/conf/fastpitch_align_44100.yaml
+++ b/examples/tts/conf/fastpitch_align_44100.yaml
@@ -28,7 +28,7 @@ highfreq: null
 window: hann
 
 phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
+heteronyms_path: "scripts/tts_dataset_files/heteronyms-030921"
 whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
 
 model:
diff --git a/examples/tts/conf/fastpitch_align_v1.05.yaml b/examples/tts/conf/fastpitch_align_v1.05.yaml
index 692b2500cb29..caf057e0f89b 100644
--- a/examples/tts/conf/fastpitch_align_v1.05.yaml
+++ b/examples/tts/conf/fastpitch_align_v1.05.yaml
@@ -28,7 +28,7 @@ highfreq: 8000
 window: hann
 
 phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
+heteronyms_path: "scripts/tts_dataset_files/heteronyms-030921"
 whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
 
 model:
diff --git a/examples/tts/conf/mixer-tts.yaml b/examples/tts/conf/mixer-tts.yaml
index c66aac76d446..48e146eb11c3 100644
--- a/examples/tts/conf/mixer-tts.yaml
+++ b/examples/tts/conf/mixer-tts.yaml
@@ -28,7 +28,7 @@ highfreq: 8000
 window: hann
 
 phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
+heteronyms_path: "scripts/tts_dataset_files/heteronyms-030921"
 whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
 
 model:
diff --git a/examples/tts/conf/tacotron2.yaml b/examples/tts/conf/tacotron2.yaml
index e227a82d49af..2470b2d46a88 100644
--- a/examples/tts/conf/tacotron2.yaml
+++ b/examples/tts/conf/tacotron2.yaml
@@ -10,7 +10,7 @@ sup_data_path: null
 sup_data_types: null
 
 phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
+heteronyms_path: "scripts/tts_dataset_files/heteronyms-030921"
 whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
 
 
diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py
index 9a02822371e4..cdb48b5d4c19 100644
--- a/nemo/collections/tts/models/fastpitch.py
+++ b/nemo/collections/tts/models/fastpitch.py
@@ -46,9 +46,9 @@
 
 @dataclass
 class G2PConfig:
-    _target_: str = "nemo_text_processing.g2p.modules.EnglishG2p"
+    _target_: str = "nemo.collections.tts.torch.g2ps.EnglishG2p"
     phoneme_dict: str = "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-    heteronyms: str = "scripts/tts_dataset_files/heteronyms-052722"
+    heteronyms: str = "scripts/tts_dataset_files/heteronyms-030921"
     phoneme_probability: float = 0.5
 
 
diff --git a/nemo/collections/tts/torch/g2ps.py b/nemo/collections/tts/torch/g2ps.py
index 554f214975ba..ef47705feddf 100644
--- a/nemo/collections/tts/torch/g2ps.py
+++ b/nemo/collections/tts/torch/g2ps.py
@@ -12,6 +12,449 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO (xueyang): deprecate this file since no other places import modules from here anymore. However,
-#  all checkpoints uploaded in ngc used this path. So it requires to update all ngc checkpoints g2p path as well.
-from nemo_text_processing.g2p.modules import IPAG2P, BaseG2p, EnglishG2p
+import abc
+import pathlib
+import random
+import re
+import time
+from collections import defaultdict
+from typing import Optional
+
+import nltk
+import torch
+
+from nemo.collections.tts.torch.en_utils import english_word_tokenize
+from nemo.utils import logging
+from nemo.utils.decorators import experimental
+from nemo.utils.get_rank import is_global_rank_zero
+
+
+class BaseG2p(abc.ABC):
+    def __init__(
+        self, phoneme_dict=None, word_tokenize_func=lambda x: x, apply_to_oov_word=None,
+    ):
+        """Abstract class for creating an arbitrary module to convert grapheme words to phoneme sequences (or leave unchanged or use apply_to_oov_word).
+        Args:
+            phoneme_dict: Arbitrary representation of dictionary (phoneme -> grapheme) for known words.
+            word_tokenize_func: Function for tokenizing text to words.
+            apply_to_oov_word: Function that will be applied to out of phoneme_dict word.
+        """
+        self.phoneme_dict = phoneme_dict
+        self.word_tokenize_func = word_tokenize_func
+        self.apply_to_oov_word = apply_to_oov_word
+
+    @abc.abstractmethod
+    def __call__(self, text: str) -> str:
+        pass
+
+
+class EnglishG2p(BaseG2p):
+    def __init__(
+        self,
+        phoneme_dict=None,
+        word_tokenize_func=english_word_tokenize,
+        apply_to_oov_word=None,
+        ignore_ambiguous_words=True,
+        heteronyms=None,
+        encoding='latin-1',
+        phoneme_probability: Optional[float] = None,
+    ):
+        """English G2P module. This module converts words from grapheme to phoneme representation using phoneme_dict in CMU dict format.
+        Optionally, it can ignore words which are heteronyms, ambiguous or marked as unchangeable by word_tokenize_func (see code for details).
+        Ignored words are left unchanged or passed through apply_to_oov_word for handling.
+        Args:
+            phoneme_dict (str, Path, Dict): Path to file in CMUdict format or dictionary of CMUdict-like entries.
+            word_tokenize_func: Function for tokenizing text to words.
+                It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word representation and flag whether to leave unchanged or not.
+                It is expected that unchangeable word representation will be represented as List[str], other cases are represented as str.
+                It is useful to mark word as unchangeable which is already in phoneme representation.
+            apply_to_oov_word: Function that will be applied to out of phoneme_dict word.
+            ignore_ambiguous_words: Whether to not handle word via phoneme_dict with ambiguous phoneme sequences. Defaults to True.
+            heteronyms (str, Path, List): Path to file with heteronyms (every line is new word) or list of words.
+            encoding: Encoding type.
+            phoneme_probability (Optional[float]): The probability (0.<var<1.) that each word is phonemized. Defaults to None which is the same as 1.
+                Note that this code path is only run if the word can be phonemized. For example: If the word does not have a entry in the g2p dict, it will be returned
+                as characters. If the word has multiple entries and ignore_ambiguous_words is True, it will be returned as characters.
+        """
+        phoneme_dict = (
+            self._parse_as_cmu_dict(phoneme_dict, encoding)
+            if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path) or phoneme_dict is None
+            else phoneme_dict
+        )
+
+        if apply_to_oov_word is None:
+            logging.warning(
+                "apply_to_oov_word=None, This means that some of words will remain unchanged "
+                "if they are not handled by any of the rules in self.parse_one_word(). "
+                "This may be intended if phonemes and chars are both valid inputs, otherwise, "
+                "you may see unexpected deletions in your input."
+            )
+
+        super().__init__(
+            phoneme_dict=phoneme_dict, word_tokenize_func=word_tokenize_func, apply_to_oov_word=apply_to_oov_word,
+        )
+
+        self.ignore_ambiguous_words = ignore_ambiguous_words
+        self.heteronyms = (
+            set(self._parse_file_by_lines(heteronyms, encoding))
+            if isinstance(heteronyms, str) or isinstance(heteronyms, pathlib.Path)
+            else heteronyms
+        )
+        self.phoneme_probability = phoneme_probability
+        self._rng = random.Random()
+
+    @staticmethod
+    def _parse_as_cmu_dict(phoneme_dict_path=None, encoding='latin-1'):
+        if phoneme_dict_path is None:
+            # this part of code downloads file, but it is not rank zero guarded
+            # Try to check if torch distributed is available, if not get global rank zero to download corpora and make
+            # all other ranks sleep for a minute
+            if torch.distributed.is_available() and torch.distributed.is_initialized():
+                group = torch.distributed.group.WORLD
+                if is_global_rank_zero():
+                    try:
+                        nltk.data.find('corpora/cmudict.zip')
+                    except LookupError:
+                        nltk.download('cmudict', quiet=True)
+                torch.distributed.barrier(group=group)
+            elif is_global_rank_zero():
+                logging.error(
+                    f"Torch distributed needs to be initialized before you initialized EnglishG2p. This class is prone to "
+                    "data access race conditions. Now downloading corpora from global rank 0. If other ranks pass this "
+                    "before rank 0, errors might result."
+                )
+                try:
+                    nltk.data.find('corpora/cmudict.zip')
+                except LookupError:
+                    nltk.download('cmudict', quiet=True)
+            else:
+                logging.error(
+                    f"Torch distributed needs to be initialized before you initialized EnglishG2p. This class is prone to "
+                    "data access race conditions. This process is not rank 0, and now going to sleep for 1 min. If this "
+                    "rank wakes from sleep prior to rank 0 finishing downloading, errors might result."
+                )
+                time.sleep(60)
+
+            logging.warning(
+                f"English g2p_dict will be used from nltk.corpus.cmudict.dict(), because phoneme_dict_path=None. "
+                "Note that nltk.corpus.cmudict.dict() has old version (0.6) of CMUDict. "
+                "You can use the latest official version of CMUDict (0.7b) with additional changes from NVIDIA directly from NeMo "
+                "using the path scripts/tts_dataset_files/cmudict-0.7b_nv22.08."
+            )
+
+            return nltk.corpus.cmudict.dict()
+
+        _alt_re = re.compile(r'\([0-9]+\)')
+        g2p_dict = {}
+        with open(phoneme_dict_path, encoding=encoding) as file:
+            for line in file:
+                if len(line) and ('A' <= line[0] <= 'Z' or line[0] == "'"):
+                    parts = line.split('  ')
+                    word = re.sub(_alt_re, '', parts[0])
+                    word = word.lower()
+
+                    pronunciation = parts[1].strip().split(" ")
+                    if word in g2p_dict:
+                        g2p_dict[word].append(pronunciation)
+                    else:
+                        g2p_dict[word] = [pronunciation]
+        return g2p_dict
+
+    @staticmethod
+    def _parse_file_by_lines(p, encoding):
+        with open(p, encoding=encoding) as f:
+            return [l.rstrip() for l in f.readlines()]
+
+    def is_unique_in_phoneme_dict(self, word):
+        return len(self.phoneme_dict[word]) == 1
+
+    def parse_one_word(self, word: str):
+        """
+        Returns parsed `word` and `status` as bool.
+        `status` will be `False` if word wasn't handled, `True` otherwise.
+        """
+
+        if self.phoneme_probability is not None and self._rng.random() > self.phoneme_probability:
+            return word, True
+
+        # punctuation
+        if re.search("[a-zA-Z]", word) is None:
+            return list(word), True
+
+        # heteronym
+        if self.heteronyms is not None and word in self.heteronyms:
+            return word, True
+
+        # `'s` suffix
+        if (
+            len(word) > 2
+            and word.endswith("'s")
+            and (word not in self.phoneme_dict)
+            and (word[:-2] in self.phoneme_dict)
+            and (not self.ignore_ambiguous_words or self.is_unique_in_phoneme_dict(word[:-2]))
+        ):
+            return self.phoneme_dict[word[:-2]][0] + ["Z"], True
+
+        # `s` suffix
+        if (
+            len(word) > 1
+            and word.endswith("s")
+            and (word not in self.phoneme_dict)
+            and (word[:-1] in self.phoneme_dict)
+            and (not self.ignore_ambiguous_words or self.is_unique_in_phoneme_dict(word[:-1]))
+        ):
+            return self.phoneme_dict[word[:-1]][0] + ["Z"], True
+
+        # phoneme dict
+        if word in self.phoneme_dict and (not self.ignore_ambiguous_words or self.is_unique_in_phoneme_dict(word)):
+            return self.phoneme_dict[word][0], True
+
+        if self.apply_to_oov_word is not None:
+            return self.apply_to_oov_word(word), True
+        else:
+            return word, False
+
+    def __call__(self, text):
+        words = self.word_tokenize_func(text)
+
+        prons = []
+        for word, without_changes in words:
+            if without_changes:
+                prons.extend(word)
+                continue
+
+            word_by_hyphen = word.split("-")
+
+            pron, is_handled = self.parse_one_word(word)
+
+            if not is_handled and len(word_by_hyphen) > 1:
+                pron = []
+                for sub_word in word_by_hyphen:
+                    p, _ = self.parse_one_word(sub_word)
+                    pron.extend(p)
+                    pron.extend(["-"])
+                pron.pop()
+
+            prons.extend(pron)
+
+        return prons
+
+
+@experimental
+class IPAG2P(BaseG2p):
+    # fmt: off
+    STRESS_SYMBOLS = ["ˈ", "ˌ"]
+
+    def __init__(
+        self,
+        phoneme_dict,
+        word_tokenize_func=english_word_tokenize,
+        apply_to_oov_word=None,
+        ignore_ambiguous_words=True,
+        heteronyms=None,
+        phoneme_probability: Optional[float]=None,
+        use_stresses: Optional[bool]=True,
+        set_graphemes_upper: Optional[bool]=True
+    ):
+        """Generic IPA G2P module. This module converts words from grapheme to International Phonetic Alphabet representations.
+        Optionally, it can ignore heteronyms, ambiguous words, or words marked as unchangeable by word_tokenize_func (see code for details).
+        Ignored words are left unchanged or passed through apply_to_oov_word for handling.
+        
+        Args:
+            phoneme_dict (str, Path, Dict): Path to file in CMUdict format or dictionary of CMUdict-like entries.
+                Must be given for IPA G2P. (Consider using scripts/tts_dataset_files/ipa_cmudict-0.7b_nv22.08.txt.)
+            word_tokenize_func: Function for tokenizing text to words.
+                It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word
+                representation and flag whether to leave unchanged or not.
+                It is expected that unchangeable word representation will be represented as List[str], other
+                cases are represented as str.
+                It is useful to mark word as unchangeable which is already in phoneme representation.
+                Defaults to the English word tokenizer.
+            apply_to_oov_word: Function that will be applied to out of phoneme_dict word.
+            ignore_ambiguous_words: Whether to not handle word via phoneme_dict with ambiguous phoneme sequences.
+                Defaults to True.
+            heteronyms (str, Path, List): Path to file with heteronyms (every line is new word) or list of words.
+            phoneme_probability (Optional[float]): The probability (0.<var<1.) that each word is phonemized. Defaults to None which is the same as 1.
+                Note that this code path is only run if the word can be phonemized. For example: If the word does not have a entry in the g2p dict, it will be returned
+                as characters. If the word has multiple entries and ignore_ambiguous_words is True, it will be returned as characters.
+            use_stresses (Optional[bool]): Whether or not to include the stress symbols (ˈ and ˌ).
+            set_graphemes_upper (Optional[bool]): Whether or not to convert all graphemes to uppercase (if not converted to phonemes).
+                You may want to set this if there is an overlap between grapheme/IPA symbols.
+                Defaults to True.
+        """
+        self.use_stresses = use_stresses
+        self.set_graphemes_upper = set_graphemes_upper
+
+        if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path):
+            # Load phoneme_dict from file path
+            self.phoneme_dict, self.symbols = self._parse_as_cmu_dict(
+                phoneme_dict,
+                use_stresses,
+                self.STRESS_SYMBOLS,
+                upper=set_graphemes_upper,
+            )
+        else:
+            # Load phoneme_dict as dictionary object
+            logging.info("Loading phoneme_dict as a Dict object. Extracting valid symbols from values.")
+            self.phoneme_dict = phoneme_dict
+
+            self.symbols = set()
+            for entries in phoneme_dict.values():
+                for phonemes in entries:
+                    self.symbols.update(phonemes)
+
+            # Update dict to remove stresses if use_stresses=False
+            if not use_stresses:
+                for stress_symbol in self.STRESS_SYMBOLS:
+                    self.symbols.remove(stress_symbol)
+
+                updated_phoneme_dict = {}
+                for k,vs in self.phoneme_dict.items():
+                    new_vs = []
+                    for pron in vs:
+                        new_vs.append([symbol for symbol in pron if symbol not in self.STRESS_SYMBOLS])
+                    updated_phoneme_dict[k] = new_vs
+                self.phoneme_dict = updated_phoneme_dict
+
+            # Update dict keys if graphemes should be set to uppercase
+            if set_graphemes_upper:
+                updated_phoneme_dict = {}
+                for k,vs, in self.phoneme_dict.items():
+                    updated_phoneme_dict[k.upper()] = vs
+                self.phoneme_dict = updated_phoneme_dict
+
+        if apply_to_oov_word is None:
+            logging.warning(
+                "apply_to_oov_word=None, This means that some of words will remain unchanged "
+                "if they are not handled by any of the rules in self.parse_one_word(). "
+                "This may be intended if phonemes and chars are both valid inputs, otherwise, "
+                "you may see unexpected deletions in your input."
+            )
+
+        super().__init__(
+            phoneme_dict=self.phoneme_dict, word_tokenize_func=word_tokenize_func, apply_to_oov_word=apply_to_oov_word,
+        )
+
+        self.ignore_ambiguous_words = ignore_ambiguous_words
+        self.heteronyms = (
+            set(self._parse_file_by_lines(heteronyms))
+            if isinstance(heteronyms, str) or isinstance(heteronyms, pathlib.Path)
+            else heteronyms
+        )
+        self.phoneme_probability = phoneme_probability
+        self._rng = random.Random()
+
+    @staticmethod
+    def _parse_as_cmu_dict(phoneme_dict_path, use_stresses=False, stress_symbols=[], upper=True):
+        """Loads IPA CMUdict, and generates a set of all valid symbols."""
+        g2p_dict = defaultdict(list)
+        symbols = set()
+
+        _alt_re = re.compile(r'\([0-9]+\)')
+        with open(phoneme_dict_path, 'r') as file:
+            for line in file:
+                if len(line) and ('A' <= line[0] <= 'Z' or line[0] == "'"):
+                    parts = line.split('  ')
+                    word = re.sub(_alt_re, '', parts[0])
+                    if upper:
+                        word = word.upper()
+                    else:
+                        word = word.lower()
+
+                    pronunciation = parts[1].strip()
+                    if not use_stresses:
+                        for stress_symbol in stress_symbols:
+                            pronunciation = pronunciation.replace(stress_symbol, '')
+                    symbols.update(pronunciation)  # This will insert each char individually
+                    g2p_dict[word].append(list(pronunciation))
+
+        return g2p_dict, symbols
+
+    @staticmethod
+    def _parse_file_by_lines(p):
+        with open(p, 'r') as f:
+            return [l.rstrip() for l in f.readlines()]
+
+    def is_unique_in_phoneme_dict(self, word):
+        return len(self.phoneme_dict[word]) == 1
+
+    def parse_one_word(self, word: str):
+        """Returns parsed `word` and `status` (bool: False if word wasn't handled, True otherwise).
+        """
+        if self.set_graphemes_upper:
+            word = word.upper()
+
+        if self.phoneme_probability is not None and self._rng.random() > self.phoneme_probability:
+            return word, True
+
+        # Punctuation (assumes other chars have been stripped)
+        if re.search("[a-zA-Z]", word) is None:
+            return list(word), True
+
+        # Heteronym
+        if self.heteronyms and word in self.heteronyms:
+            return word, True
+
+        # `'s` suffix (with apostrophe) - not in phoneme dict
+        if (
+            len(word) > 2
+            and word.endswith("'s")
+            and (word not in self.phoneme_dict)
+            and (word[:-2] in self.phoneme_dict)
+            and (not self.ignore_ambiguous_words or self.is_unique_in_phoneme_dict(word[:-2]))
+        ):
+            if word[-3] == 'T':
+                # Case like "airport's"
+                return self.phoneme_dict[word[:-2]][0] + ["s"], True
+            elif word[-3] == 'S':
+                # Case like "jones's"
+                return self.phoneme_dict[word[:-2]][0] + ["ɪ", "z"], True
+            else:
+                return self.phoneme_dict[word[:-2]][0] + ["z"], True
+
+        # `s` suffix (without apostrophe) - not in phoneme dict
+        if (
+            len(word) > 1
+            and word.endswith("s")
+            and (word not in self.phoneme_dict)
+            and (word[:-1] in self.phoneme_dict)
+            and (not self.ignore_ambiguous_words or self.is_unique_in_phoneme_dict(word[:-1]))
+        ):
+            if word[-2] == 'T':
+                # Case like "airports"
+                return self.phoneme_dict[word[:-1]][0] + ["s"], True
+            else:
+                return self.phoneme_dict[word[:-1]][0] + ["z"], True
+
+        # Phoneme dict lookup for unique words (or default pron if ignore_ambiguous_words=False)
+        if word in self.phoneme_dict and (not self.ignore_ambiguous_words or self.is_unique_in_phoneme_dict(word)):
+            return self.phoneme_dict[word][0], True
+
+        if self.apply_to_oov_word is not None:
+            return self.apply_to_oov_word(word), True
+        else:
+            return word, False
+
+    def __call__(self, text):
+        words = self.word_tokenize_func(text)
+
+        prons = []
+        for word, without_changes in words:
+            if without_changes:
+                prons.extend(word)
+                continue
+
+            pron, is_handled = self.parse_one_word(word)
+
+            word_by_hyphen = word.split("-")
+            if not is_handled and len(word_by_hyphen) > 1:
+                pron = []
+                for sub_word in word_by_hyphen:
+                    p, _ = self.parse_one_word(sub_word)
+                    pron.extend(p)
+                    pron.extend(["-"])
+                pron.pop()
+
+            prons.extend(pron)
+
+        return prons
diff --git a/nemo/collections/tts/torch/tts_dataset.yaml b/nemo/collections/tts/torch/tts_dataset.yaml
index 510f8f8ce20b..10cad3d249c2 100644
--- a/nemo/collections/tts/torch/tts_dataset.yaml
+++ b/nemo/collections/tts/torch/tts_dataset.yaml
@@ -41,6 +41,6 @@ tts_dataset:
     add_blank_at: null
     pad_with_space: True
     g2p:
-      _target_: nemo_text_processing.g2p.modules.EnglishG2p
+      _target_: nemo.collections.tts.torch.g2ps.EnglishG2p
       phoneme_dict: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-      heteronyms: "scripts/tts_dataset_files/heteronyms-052722"
+      heteronyms: "scripts/tts_dataset_files/heteronyms-030921"
diff --git a/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_fastpitch_align.yaml b/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_fastpitch_align.yaml
index bed6b2ee49af..80471f589dcc 100644
--- a/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_fastpitch_align.yaml
+++ b/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_fastpitch_align.yaml
@@ -5,7 +5,7 @@ sup_data_path: "sup_data"
 sup_data_types: [ "align_prior_matrix", "pitch" ]
 whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
 phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
+heteronyms_path: "scripts/tts_dataset_files/heteronyms-030921"
 
 dataset:
   _target_: nemo.collections.tts.torch.data.TTSDataset
diff --git a/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_mixer_tts.yaml b/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_mixer_tts.yaml
index d4151e888ae0..ae21fc674e4c 100644
--- a/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_mixer_tts.yaml
+++ b/scripts/dataset_processing/tts/ljspeech/ds_conf/ds_for_mixer_tts.yaml
@@ -5,7 +5,7 @@ sup_data_path: "sup_data"
 sup_data_types: [ "align_prior_matrix", "pitch" ]
 whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
 phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.08"
-heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
+heteronyms_path: "scripts/tts_dataset_files/heteronyms-030921"
 
 dataset:
   _target_: nemo.collections.tts.torch.data.TTSDataset
diff --git a/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv22.08.txt b/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv22.08.txt
index 1bedf98caf1f..e19979d0ad7c 100644
--- a/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv22.08.txt
+++ b/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv22.08.txt
@@ -57525,7 +57525,7 @@ HURRAY  həˈɹeɪ
 HURRELL  ˈhɔɹəl
 HURRI  ˈhɝi
 HURRICANE  ˈhɝəˌkeɪn
-HURRICANE(1)  ˈhʌɹəˌkeɪn
+HURRICANE  ˈhəɹəˌkeɪn
 HURRICANE'S  ˈhɝəˌkeɪnz
 HURRICANES  ˈhɝəˌkeɪnz
 HURRIED  ˈhɝid