Skip to content

Commit

Permalink
use phoneme tokenizer for edit speech
Browse files Browse the repository at this point in the history
Signed-off-by: Paarth Neekhara <[email protected]>
  • Loading branch information
paarthneekhara committed Dec 1, 2023
1 parent 28ebdc8 commit 981c34b
Showing 1 changed file with 5 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,11 @@ def _get_tokens(self, doc, field, field_data):
instruction_tokens = self._get_text_tokens("Phoneme TTS")
field_tokens = self._get_phoneme_tokens(_text.replace("Phoneme TTS ", ""))
field_tokens = instruction_tokens + field_tokens
elif _text.startswith("Edit Speech"):
# Always use phoneme tokenizer for edit speech
instruction_tokens = self._get_text_tokens("Edit Speech")
field_tokens = self._get_phoneme_tokens(_text.replace("Edit Speech ", ""))
field_tokens = instruction_tokens + field_tokens
else:
field_tokens = self._get_text_tokens(field_data.strip(" ")) # list of ids
elif doc[f"{field}_type"] == 'SPEECH':
Expand Down

0 comments on commit 981c34b

Please sign in to comment.