Skip to content

Commit

Permalink
1) Add Text to Phonemes function
Browse files Browse the repository at this point in the history
2) Update requirements.txt
  • Loading branch information
rishikksh20 committed Jul 6, 2020
1 parent ca59db1 commit 5bc2b40
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 4 deletions.
75 changes: 75 additions & 0 deletions dataset/texts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from dataset.texts.symbols import symbols, _eos, phonemes_symbols, PAD, EOS, _PHONEME_SEP
import hparams as hp
from dataset.texts.dict_ import symbols_
import nltk
from g2p_en import G2p

# Mappings from symbol to numeric ID and vice versa:
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
Expand Down Expand Up @@ -104,3 +106,76 @@ def sequence_to_phonemes(sequence, use_eos=False):
if use_eos:
string = string.replace(EOS, '')
return string

def text_to_phonemes(text, custom_words={}):
"""
Convert text into ARPAbet.
For known words use CMUDict; for the rest try 'espeak' (to IPA) followed by 'listener'.
:param text: str, input text.
:param custom_words:
dict {str: list of str}, optional
Pronounciations (a list of ARPAbet phonemes) you'd like to override.
Example: {'word': ['W', 'EU1', 'R', 'D']}
:return: list of str, phonemes
"""
g2p = G2p()

def convert_phoneme_CMU(phoneme):
REMAPPING = {
'AA0': 'AA1',
'AA2': 'AA1',
'AE2': 'AE1',
'AH2': 'AH1',
'AO0': 'AO1',
'AO2': 'AO1',
'AW2': 'AW1',
'AY2': 'AY1',
'EH2': 'EH1',
'ER0': 'EH1',
'ER1': 'EH1',
'ER2': 'EH1',
'EY2': 'EY1',
'IH2': 'IH1',
'IY2': 'IY1',
'OW2': 'OW1',
'OY2': 'OY1',
'UH2': 'UH1',
'UW2': 'UW1',
}
return REMAPPING.get(phoneme, phoneme)

def convert_phoneme_listener(phoneme):
VOWELS = ['A', 'E', 'I', 'O', 'U']
if phoneme[0] in VOWELS:
phoneme += '1'
return convert_phoneme_CMU(phoneme)

try:
known_words = nltk.corpus.cmudict.dict()
except LookupError:
nltk.download('cmudict')
known_words = nltk.corpus.cmudict.dict()

for word, phonemes in custom_words.items():
known_words[word.lower()] = [phonemes]

words = nltk.tokenize.WordPunctTokenizer().tokenize(text.lower())

phonemes = []
PUNCTUATION = '!?.,-:;"\'()'
for word in words:
if all(c in PUNCTUATION for c in word):
pronounciation = ['pau']
elif word in known_words:
pronounciation = known_words[word][0]
pronounciation = list(map(convert_phoneme_CMU, pronounciation))
else:
pronounciation = g2p(word)
pronounciation = list(map(convert_phoneme_CMU, pronounciation))

phonemes += pronounciation

return phonemes



3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ inflect
nltk
tqdm
pyworld==0.2.10
configargparse
tensorboardX

10 changes: 6 additions & 4 deletions synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from utils.util import set_deterministic_pytorch
from fastspeech import FeedForwardTransformer
import hparams as hp
from dataset.texts import phonemes_to_sequence
from dataset.texts import phonemes_to_sequence, text_to_phonemes
import time
from dataset.audio_processing import reconstruct_waveform, griffin_lim
from dataset.audio_processing import save_wav
Expand Down Expand Up @@ -200,6 +200,9 @@ def synthesis_tts(args, text, path):
# read training config
idim = hp.symbol_len
odim = hp.num_mels
print("Text :", text)
input = np.asarray(phonemes_to_sequence(text.split()))
print("Input :", input)
model = FeedForwardTransformer(idim, odim)

if os.path.exists(path):
Expand All @@ -214,9 +217,7 @@ def synthesis_tts(args, text, path):
# set torch device
device = torch.device("cuda" if args.ngpu > 0 else "cpu")
model = model.to(device)
print("Text :",text)
input = np.asarray(phonemes_to_sequence(text.split()))
print("Input :",input)

text = torch.LongTensor(input)
text = text.cuda()
#[num_char]
Expand Down Expand Up @@ -344,6 +345,7 @@ def main(args):
logging.info('python path = ' + os.environ.get('PYTHONPATH', '(None)'))

print("Text : ", args.text)
print("Checkpoint : ", args.path)
audio = synthesis_tts(args, args.text, args.path)
m = audio.T

Expand Down

0 comments on commit 5bc2b40

Please sign in to comment.