|
115 | 115 | # There are not many common abbreviations in Arabic as in English.
|
116 | 116 | ]
|
117 | 117 | ],
|
118 |
| - "zh": [ |
| 118 | + "zh-cn": [ |
119 | 119 | (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
|
120 | 120 | for x in [
|
121 | 121 | # Chinese doesn't typically use abbreviations in the same way as Latin-based scripts.
|
@@ -280,7 +280,7 @@ def expand_abbreviations_multilingual(text, lang="en"):
|
280 | 280 | ("°", " درجة "),
|
281 | 281 | ]
|
282 | 282 | ],
|
283 |
| - "zh": [ |
| 283 | + "zh-cn": [ |
284 | 284 | # Chinese
|
285 | 285 | (re.compile(r"%s" % re.escape(x[0]), re.IGNORECASE), x[1])
|
286 | 286 | for x in [
|
@@ -571,9 +571,9 @@ def check_input_length(self, txt, lang):
|
571 | 571 | )
|
572 | 572 |
|
573 | 573 | def preprocess_text(self, txt, lang):
|
574 |
| - if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "zh-cn"}: |
| 574 | + if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh-cn", "zh-cn"}: |
575 | 575 | txt = multilingual_cleaners(txt, lang)
|
576 |
| - if lang in {"zh", "zh-cn"}: |
| 576 | + if lang in {"zh-cn", "zh-cn"}: |
577 | 577 | txt = chinese_transliterate(txt)
|
578 | 578 | elif lang == "ja":
|
579 | 579 | txt = japanese_cleaners(txt, self.katsu)
|
@@ -682,8 +682,8 @@ def test_expand_numbers_multilingual():
|
682 | 682 | ("Dat wordt dan $20 meneer.", "Dat wordt dan twintig dollar meneer.", "nl"),
|
683 | 683 | ("Dat wordt dan 20€ meneer.", "Dat wordt dan twintig euro meneer.", "nl"),
|
684 | 684 | # Chinese (Simplified)
|
685 |
| - ("在12.5秒内", "在十二点五秒内", "zh"), |
686 |
| - ("有50名士兵", "有五十名士兵", "zh"), |
| 685 | + ("在12.5秒内", "在十二点五秒内", "zh-cn"), |
| 686 | + ("有50名士兵", "有五十名士兵", "zh-cn"), |
687 | 687 | # ("那将是$20先生", '那将是二十美元先生', 'zh'), currency doesn't work
|
688 | 688 | # ("那将是20€先生", '那将是二十欧元先生', 'zh'),
|
689 | 689 | # Turkish
|
@@ -764,7 +764,7 @@ def test_symbols_multilingual():
|
764 | 764 | ("Ik heb 14% batterij", "Ik heb 14 procent batterij", "nl"),
|
765 | 765 | ("Ik zie je @ het feest", "Ik zie je bij het feest", "nl"),
|
766 | 766 | ("لدي 14% في البطارية", "لدي 14 في المئة في البطارية", "ar"),
|
767 |
| - ("我的电量为 14%", "我的电量为 14 百分之", "zh"), |
| 767 | + ("我的电量为 14%", "我的电量为 14 百分之", "zh-cn"), |
768 | 768 | ("Pilim %14 dolu.", "Pilim yüzde 14 dolu.", "tr"),
|
769 | 769 | ("Az akkumulátorom töltöttsége 14%", "Az akkumulátorom töltöttsége 14 százalék", "hu"),
|
770 | 770 | ("배터리 잔량이 14%입니다.", "배터리 잔량이 14 퍼센트입니다.", "ko"),
|
|
0 commit comments