Skip to content

Commit bd6d4de

Browse files
committed
Bug fix on zh-cn inference
1 parent 7bfd20e commit bd6d4de

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

Diff for: TTS/tts/layers/xtts/tokenizer.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@
115115
# There are not many common abbreviations in Arabic as in English.
116116
]
117117
],
118-
"zh": [
118+
"zh-cn": [
119119
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
120120
for x in [
121121
# Chinese doesn't typically use abbreviations in the same way as Latin-based scripts.
@@ -280,7 +280,7 @@ def expand_abbreviations_multilingual(text, lang="en"):
280280
("°", " درجة "),
281281
]
282282
],
283-
"zh": [
283+
"zh-cn": [
284284
# Chinese
285285
(re.compile(r"%s" % re.escape(x[0]), re.IGNORECASE), x[1])
286286
for x in [
@@ -571,9 +571,9 @@ def check_input_length(self, txt, lang):
571571
)
572572

573573
def preprocess_text(self, txt, lang):
574-
if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "zh-cn"}:
574+
if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh-cn", "zh-cn"}:
575575
txt = multilingual_cleaners(txt, lang)
576-
if lang in {"zh", "zh-cn"}:
576+
if lang in {"zh-cn", "zh-cn"}:
577577
txt = chinese_transliterate(txt)
578578
elif lang == "ja":
579579
txt = japanese_cleaners(txt, self.katsu)
@@ -682,8 +682,8 @@ def test_expand_numbers_multilingual():
682682
("Dat wordt dan $20 meneer.", "Dat wordt dan twintig dollar meneer.", "nl"),
683683
("Dat wordt dan 20€ meneer.", "Dat wordt dan twintig euro meneer.", "nl"),
684684
# Chinese (Simplified)
685-
("在12.5秒内", "在十二点五秒内", "zh"),
686-
("有50名士兵", "有五十名士兵", "zh"),
685+
("在12.5秒内", "在十二点五秒内", "zh-cn"),
686+
("有50名士兵", "有五十名士兵", "zh-cn"),
687687
# ("那将是$20先生", '那将是二十美元先生', 'zh'), currency doesn't work
688688
# ("那将是20€先生", '那将是二十欧元先生', 'zh'),
689689
# Turkish
@@ -764,7 +764,7 @@ def test_symbols_multilingual():
764764
("Ik heb 14% batterij", "Ik heb 14 procent batterij", "nl"),
765765
("Ik zie je @ het feest", "Ik zie je bij het feest", "nl"),
766766
("لدي 14% في البطارية", "لدي 14 في المئة في البطارية", "ar"),
767-
("我的电量为 14%", "我的电量为 14 百分之", "zh"),
767+
("我的电量为 14%", "我的电量为 14 百分之", "zh-cn"),
768768
("Pilim %14 dolu.", "Pilim yüzde 14 dolu.", "tr"),
769769
("Az akkumulátorom töltöttsége 14%", "Az akkumulátorom töltöttsége 14 százalék", "hu"),
770770
("배터리 잔량이 14%입니다.", "배터리 잔량이 14 퍼센트입니다.", "ko"),

0 commit comments

Comments
 (0)