diff --git a/num2words/lang_RU.py b/num2words/lang_RU.py index 2ff1c0a0..6bc31ede 100644 --- a/num2words/lang_RU.py +++ b/num2words/lang_RU.py @@ -22,8 +22,8 @@ GENDER_PLURAL_INDEXES = { 'm': 0, 'masculine': 0, 'м': 0, 'мужской': 0, - 'f': 1, 'feminine': 1, 'ж': 0, 'женский': 0, - 'n': 2, 'neuter': 2, 'с': 0, 'средний': 0, + 'f': 1, 'feminine': 1, 'ж': 1, 'женский': 1, + 'n': 2, 'neuter': 2, 'с': 2, 'средний': 2, 'p': 3, 'plural': 3 } CASE_INDEXES = { @@ -41,32 +41,50 @@ D_ANIMATE = True -def get_num_element(cases_dict, num, **kwargs): - return case_classifier_element(cases_dict[num], **kwargs) - - -def case_classifier_element(classifier, case=D_CASE, plural=D_PLURAL, - gender=D_GENDER, animate=D_ANIMATE): - case = classifier[CASE_INDEXES[case]] - if isinstance(case, str): - return case +# Filling multilevel dictionaries: +# - ONES, ONES_ORD (0-9), +# - TENS, TENS_ORD (10-19) +# - TWENTIES, TWENTIES_ORD (20-99) +# - HUNDREDS, HUNDREDS_ORD (100-900) +# +# format of dictionaries (lets name it as mldir): +# {n : [case_0 .. case_5]} +# case: text or [gender_0 .. gender_2 plural_3] +# gender: text or [animate, inanimate] +# animate and inanimate: text + +def make_ord_cases(prefix, post_group): + """ + Returns one value of mldir for specific number + :param prefix: text - prefix of ord_value like нулев + :param post_group: 0 or 1 - affects the choice of postfix in some cases + :returns: array [case_0 .. case_5] for mldir + """ + return [[ + prefix + postfix if isinstance(postfix, str) else + [prefix + animate if isinstance(animate, str) else + prefix + animate[post_group] + for animate in postfix] if isinstance(postfix, list) else + prefix + postfix[post_group] + for postfix in case] + for case in ORD_CASE_POSTFIXES] - if plural: - gender = case[GENDER_PLURAL_INDEXES['plural']] - else: - gender = case[GENDER_PLURAL_INDEXES[gender]] - if isinstance(gender, str): - return gender - if animate: - return gender[0] - return gender[1] +def make_ord_mldir(prefixes, post_groups): + """ + Returns dir in mldir format for given prefixes and post_groups + :param prefixes: dict with format {num: prefix} + :param post_groups: int or dict with format {num: group_num} + """ + if isinstance(post_groups, int): + post_groups = {n: post_groups for n in prefixes} + return { + num: make_ord_cases(prefix, post_groups[num]) + for num, prefix in prefixes.items() + } -# format: -# {n : [case_1 .. case_5]} -# case: text or [gender_1 .. gender_3 plural_4] -# gender: text or [animate, inanimate] +# Start filling ONES (mldir format) ONES = { 0: ['ноль', 'ноля', 'нолю', 'ноль', 'нолём', 'ноле'], 1: [['один', 'одна', 'одно', 'одни'], @@ -99,39 +117,19 @@ def case_classifier_element(classifier, case=D_CASE, plural=D_PLURAL, 8: ['восемь', 'восьми', 'восьми', 'восемь', 'восемью', 'восьми'], 9: ['девять', 'девяти', 'девяти', 'девять', 'девятью', 'девяти'] } - +# End filling ONES + +# Start filling ONES_ORD (mldir format) +ORD_CASE_POSTFIXES = [[{0: 'ой', 1: 'ый'}, 'ая', 'ое', 'ые'], + ['ого', 'ой', 'ого', 'ых'], + ['ому', 'ой', 'ому', 'ым'], + [['ого', {0: 'ой', 1: 'ый'}], 'ую', 'ое', ['ых', 'ые']], + ['ым', 'ой', 'ым', 'ыми'], + ['ом', 'ой', 'ом', 'ых']] ONES_ORD_PREFIXES = {0: 'нулев', 1: 'перв', 2: 'втор', 4: 'четвёрт', 5: 'пят', 6: 'шест', 7: 'седьм', 8: 'восьм', 9: 'девят'} ONES_ORD_POSTFIXES_GROUPS = {0: 0, 1: 1, 2: 0, 4: 1, 5: 1, 6: 0, 7: 0, 8: 0, 9: 1} -CASE_POSTFIXES = [[{0: 'ой', 1: 'ый'}, 'ая', 'ое', 'ые'], - ['ого', 'ой', 'ого', 'ых'], - ['ому', 'ой', 'ому', 'ым'], - [['ого', {0: 'ой', 1: 'ый'}], 'ую', 'ое', ['ых', 'ые']], - ['ым', 'ой', 'ым', 'ыми'], - ['ом', 'ой', 'ом', 'ых']] - - -def get_cases(prefix, post_group): - return [[ - prefix + postfix if isinstance(postfix, str) else - [prefix + animate if isinstance(animate, str) else - prefix + animate[post_group] - for animate in postfix] if isinstance(postfix, list) else - prefix + postfix[post_group] - for postfix in case] - for case in CASE_POSTFIXES] - - -def get_ord_classifier(prefixes, post_groups): - if isinstance(post_groups, int): - post_groups = {n: post_groups for n, i in prefixes.items()} - return { - num: get_cases(prefix, post_groups[num]) - for num, prefix in prefixes.items() - } - - ONES_ORD = { 3: [['третий', 'третья', 'третье', 'третьи'], ['третьего', 'третьей', 'третьего', 'третьих'], @@ -141,9 +139,11 @@ def get_ord_classifier(prefixes, post_groups): ['третьем', 'третьей', 'третьем', 'третьих']], } ONES_ORD.update( - get_ord_classifier(ONES_ORD_PREFIXES, ONES_ORD_POSTFIXES_GROUPS) + make_ord_mldir(ONES_ORD_PREFIXES, ONES_ORD_POSTFIXES_GROUPS) ) +# End filling ONES_ORD +# Start filling TENS (mldir format) TENS_PREFIXES = {1: 'один', 2: 'две', 3: 'три', 4: 'четыр', 5: 'пят', 6: 'шест', 7: 'сем', 8: 'восем', 9: 'девят'} TENS_POSTFIXES = ['надцать', 'надцати', 'надцати', 'надцать', 'надцатью', @@ -153,13 +153,17 @@ def get_ord_classifier(prefixes, post_groups): num: [prefix + postfix for postfix in TENS_POSTFIXES] for num, prefix in TENS_PREFIXES.items() }) +# End filling TENS +# Start filling TENS_ORD (mldir format) TENS_ORD_PREFIXES = {0: "десят"} TENS_ORD_PREFIXES.update({ num: prefix + 'надцат' for num, prefix in TENS_PREFIXES.items() }) -TENS_ORD = get_ord_classifier(TENS_ORD_PREFIXES, 1) +TENS_ORD = make_ord_mldir(TENS_ORD_PREFIXES, 1) +# End filling TENS_ORD +# Start filling TWENTIES (mldir format) TWENTIES = { 2: ['двадцать', 'двадцати', 'двадцати', 'двадцать', 'двадцатью', 'двадцати'], @@ -177,15 +181,19 @@ def get_ord_classifier(prefixes, post_groups): 9: ['девяносто', 'девяноста', 'девяноста', 'девяносто', 'девяноста', 'девяноста'], } +# End filling TWENTIES +# Start filling TWENTIES_ORD (mldir format) TWENTIES_ORD_PREFIXES = {2: 'двадцат', 3: 'тридцат', 4: 'сороков', 5: 'пятидесят', 6: 'шестидесят', 7: 'семидесят', 8: 'восьмидесят', 9: 'девяност'} TWENTIES_ORD_POSTFIXES_GROUPS = {2: 1, 3: 1, 4: 0, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1} -TWENTIES_ORD = get_ord_classifier(TWENTIES_ORD_PREFIXES, - TWENTIES_ORD_POSTFIXES_GROUPS) +TWENTIES_ORD = make_ord_mldir(TWENTIES_ORD_PREFIXES, + TWENTIES_ORD_POSTFIXES_GROUPS) +# End filling TWENTIES_ORD +# Start filling HUNDREDS (mldir format) HUNDREDS = { 1: ['сто', 'ста', 'ста', 'сто', 'ста', 'ста'], 2: ['двести', 'двухсот', 'двумстам', 'двести', 'двумястами', 'двухстах'], @@ -201,13 +209,18 @@ def get_ord_classifier(prefixes, post_groups): 9: ['девятьсот', 'девятисот', 'девятистам', 'девятьсот', 'девятьюстами', 'девятистах'], } +# End filling HUNDREDS +# Start filling HUNDREDS_ORD (mldir format) HUNDREDS_ORD_PREFIXES = { num: case[1] if num != 1 else 'сот' for num, case in HUNDREDS.items() } -HUNDREDS_ORD = get_ord_classifier(HUNDREDS_ORD_PREFIXES, 1) - +HUNDREDS_ORD = make_ord_mldir(HUNDREDS_ORD_PREFIXES, 1) +# End filling HUNDREDS_ORD +# Start filling THOUSANDS (not mldir format) +# format: {n : [case_0 .. case_5]}, where +# case: [plural_1 plural_2 plural_5] THOUSANDS_PREFIXES = {2: 'миллион', 3: 'миллиард', 4: 'триллион', 5: 'квадриллион', 6: 'квинтиллион', 7: 'секстиллион', 8: 'септиллион', 9: 'октиллион', 10: 'нониллион'} @@ -230,17 +243,53 @@ def get_ord_classifier(prefixes, post_groups): [prefix + postfix for postfix in case] for case in THOUSANDS_POSTFIXES ] for num, prefix in THOUSANDS_PREFIXES.items() }) +# End filling THOUSANDS - -def get_thousands_elements(num, case): - return THOUSANDS[num][CASE_INDEXES[case]] - - +# Start filling THOUSANDS_ORD (mldir format) THOUSANDS_ORD_PREFIXES = {1: 'тысячн'} THOUSANDS_ORD_PREFIXES.update({ num: prefix + 'н' for num, prefix in THOUSANDS_PREFIXES.items() }) -THOUSANDS_ORD = get_ord_classifier(THOUSANDS_ORD_PREFIXES, 1) +THOUSANDS_ORD = make_ord_mldir(THOUSANDS_ORD_PREFIXES, 1) +# End filling THOUSANDS_ORD + + +def get_mldir_value(cases_dict, num, case=D_CASE, plural=D_PLURAL, + gender=D_GENDER, animate=D_ANIMATE): + """ + Returns value from mldir + :param cases_dict: one of case dictionaries (ONES, TENS, ... ) + :param num: number (the key of cases_dict) + :param case: one of the key from CASE_INDEXES dict + :param plural: True or False + :param gender: one of the gender key from GENDER_PLURAL_INDEXES + :param animate: True or False + :return: text + """ + case_val = cases_dict[num][CASE_INDEXES[case]] + if isinstance(case_val, str): + return case_val + + if plural: + gender_val = case_val[GENDER_PLURAL_INDEXES['plural']] + else: + gender_val = case_val[GENDER_PLURAL_INDEXES[gender]] + if isinstance(gender_val, str): + return gender_val + + if animate: + return gender_val[0] + return gender_val[1] + + +def get_thousands_elements(num, case): + """ + Return pluralize forms for thousands that used for pluralize function + :param num: number of elements + :param case: key from CASE_INDEXES + :return: list with 3 items for pluralize function + """ + return THOUSANDS[num][CASE_INDEXES[case]] class Num2Word_RU(Num2Word_Base): @@ -268,7 +317,7 @@ class Num2Word_RU(Num2Word_Base): ('сум', 'сума', 'сумов'), ('тийин', 'тийина', 'тийинов') ), 'PLN': ( - ('польский злотый', 'польских слотых', 'польских злотых'), + ('польский злотый', 'польских злотых', 'польских злотых'), ('грош', 'гроша', 'грошей') ), } @@ -276,10 +325,14 @@ class Num2Word_RU(Num2Word_Base): def setup(self): self.negword = "минус" self.pointword = ('целая', 'целых', 'целых') - self.pointword_ord = get_cases("цел", 1) + self.pointword_ord = make_ord_cases("цел", 1) def to_cardinal(self, number, case=D_CASE, plural=D_PLURAL, - gender=D_GENDER, animate=D_ANIMATE): + gender=D_GENDER, animate=D_ANIMATE, feminine=False): + # Backward compatibility + if feminine: + gender = 'f' + n = str(number).replace(',', '.') if '.' in n: left, right = n.split('.') @@ -303,6 +356,13 @@ def __decimal_bitness(self, n): plural=True) def pluralize(self, n, forms): + """ + Returns appropriate form of item that should be after its quantity + :param n: number of items + :param forms: list of 3 forms - for 1, 2 and 5 counts + ('яблоко', 'яблока', 'яблок') + :return: one of the forms element + """ if n % 100 in (11, 12, 13, 14): return forms[2] if n % 10 == 1: @@ -312,7 +372,11 @@ def pluralize(self, n, forms): return forms[2] def to_ordinal(self, number, case=D_CASE, plural=D_PLURAL, gender=D_GENDER, - animate=D_ANIMATE): + animate=D_ANIMATE, feminine=False): + # Backward compatibility + if feminine: + gender = 'f' + self.verify_ordinal(number) n = str(number).replace(',', '.') return self._int2word(int(n), cardinal=False, case=case, plural=plural, @@ -328,31 +392,19 @@ def _cents_verbose(self, number, currency): return self._int2word(number, gender='f') return self._int2word(number, gender='m') - def _int2word(self, n, feminine=False, cardinal=True, case=D_CASE, + def _int2word(self, n, cardinal=True, case=D_CASE, plural=D_PLURAL, gender=D_GENDER, animate=D_ANIMATE): """ - n: number - feminine: not used - for backward compatibility - cardinal:True - cardinal - False - ordinal - case: 'n' - nominative - 'g' - genitive - 'd' - dative - 'a' - accusative - 'i' - instrumental - 'p' - prepositional - plural: True - plural - False - singular - gender: 'f' - masculine - 'm' - feminine - 'n' - neuter - animate: True - animate - False - inanimate + Main function + :param n: number of items + :param cardinal: True(cardinal), False(ordinal) + :param case: 'n'(nominative), 'g'(genitive), 'd'(dative), + 'a'(accusative),'i'(instrumental), 'p'(prepositional) + :param plural: True(plural), False(singular) + :param gender: 'm'(masculine), 'f'(feminine), 'n'(neuter) + :param animate: True(animate), False(inanimate) + :return text """ - # For backward compatibility - if feminine: - gender = 'f' - kwargs = {'case': case, 'plural': plural, 'gender': gender, 'animate': animate} @@ -362,8 +414,8 @@ def _int2word(self, n, feminine=False, cardinal=True, case=D_CASE, **kwargs)]) if n == 0: - return get_num_element(ONES, 0, **kwargs) if cardinal else \ - get_num_element(ONES_ORD, 0, **kwargs) + return get_mldir_value(ONES, 0, **kwargs) if cardinal else \ + get_mldir_value(ONES_ORD, 0, **kwargs) words = [] chunks = list(splitbyx(str(n), 3)) @@ -387,7 +439,7 @@ def _int2word(self, n, feminine=False, cardinal=True, case=D_CASE, if i > 0: chunk_words.append( self.pluralize(x, get_thousands_elements(i, case))) - # ordinal, not joined like 'двухтысячный' + # ordinal, not joined (not like 'двухтысячный') elif not (ord_join and rightest_nonzero_chunk_i == i): chunk_words.extend( self.__chunk_ordinal(n3, n2, n1, i, **kwargs) @@ -398,14 +450,10 @@ def _int2word(self, n, feminine=False, cardinal=True, case=D_CASE, self.pluralize(x, get_thousands_elements(i, t_case))) # ordinal, joined else: - chunk_words.extend( - self.__chunk_ordinal_join(n3, n2, n1, i, **kwargs) + chunk_words.append( + self.__chunk_ordinal_joined(n3, n2, n1, **kwargs) + + get_mldir_value(THOUSANDS_ORD, i, **kwargs) ) - if i > 0: - chunk_words.append( - get_num_element(THOUSANDS_ORD, i, **kwargs)) - - chunk_words = [''.join(chunk_words)] words.extend(chunk_words) @@ -414,23 +462,23 @@ def _int2word(self, n, feminine=False, cardinal=True, case=D_CASE, def __chunk_cardianl(self, hundreds, tens, ones, chunk_num, **kwargs): words = [] if hundreds > 0: - words.append(get_num_element(HUNDREDS, hundreds, **kwargs)) + words.append(get_mldir_value(HUNDREDS, hundreds, **kwargs)) if tens > 1: - words.append(get_num_element(TWENTIES, tens, **kwargs)) + words.append(get_mldir_value(TWENTIES, tens, **kwargs)) if tens == 1: - words.append(get_num_element(TENS, ones, **kwargs)) + words.append(get_mldir_value(TENS, ones, **kwargs)) elif ones > 0: if chunk_num == 0: - w_ones = get_num_element(ONES, ones, **kwargs) + w_ones = get_mldir_value(ONES, ones, **kwargs) elif chunk_num == 1: # Thousands are feminine f_kwargs = kwargs.copy() f_kwargs['gender'] = 'f' - w_ones = get_num_element(ONES, ones, **f_kwargs) + w_ones = get_mldir_value(ONES, ones, **f_kwargs) else: - w_ones = get_num_element(ONES, ones, **kwargs) + w_ones = get_mldir_value(ONES, ones, **kwargs) words.append(w_ones) return words @@ -439,63 +487,61 @@ def __chunk_ordinal(self, hundreds, tens, ones, chunk_num, **kwargs): words = [] if hundreds > 0: if tens == 0 and ones == 0: - words.append(get_num_element(HUNDREDS_ORD, hundreds, **kwargs)) + words.append(get_mldir_value(HUNDREDS_ORD, hundreds, **kwargs)) else: - words.append(get_num_element(HUNDREDS, hundreds)) + words.append(get_mldir_value(HUNDREDS, hundreds)) if tens > 1: if ones == 0: - words.append(get_num_element(TWENTIES_ORD, tens, **kwargs)) + words.append(get_mldir_value(TWENTIES_ORD, tens, **kwargs)) else: - words.append(get_num_element(TWENTIES, tens)) + words.append(get_mldir_value(TWENTIES, tens)) if tens == 1: - words.append(get_num_element(TENS_ORD, ones, **kwargs)) + words.append(get_mldir_value(TENS_ORD, ones, **kwargs)) elif ones > 0: if chunk_num == 0: - w_ones = get_num_element(ONES_ORD, ones, **kwargs) + w_ones = get_mldir_value(ONES_ORD, ones, **kwargs) # тысячный, миллионнный и т.д. elif chunk_num > 0 and ones == 1 and hundreds == 0 and tens == 0: w_ones = None elif chunk_num == 1: # Thousands are feminine - w_ones = get_num_element(ONES, ones, gender='f') + w_ones = get_mldir_value(ONES, ones, gender='f') else: - w_ones = get_num_element(ONES, ones) + w_ones = get_mldir_value(ONES, ones) if w_ones: words.append(w_ones) return words - def __chunk_ordinal_join(self, hundreds, tens, ones, chunk_num, **kwargs): + def __chunk_ordinal_joined(self, hundreds, tens, ones, **kwargs): words = [] if hundreds > 1: - words.append(get_num_element(HUNDREDS, hundreds, case='g')) + words.append(get_mldir_value(HUNDREDS, hundreds, case='g')) elif hundreds == 1: - words.append(get_num_element(HUNDREDS, hundreds)) # стО, not стА + words.append(get_mldir_value(HUNDREDS, hundreds)) # стО, not стА if tens == 9: - words.append(get_num_element(TWENTIES, tens)) # девяностО, not А + words.append(get_mldir_value(TWENTIES, tens)) # девяностО, not А elif tens > 1: - words.append(get_num_element(TWENTIES, tens, case='g')) + words.append(get_mldir_value(TWENTIES, tens, case='g')) if tens == 1: - words.append(get_num_element(TENS, ones, case='g')) + words.append(get_mldir_value(TENS, ones, case='g')) elif ones > 0: - if chunk_num == 0: - w_ones = get_num_element(ONES_ORD, ones, **kwargs) # тысячный, миллионнный и т.д., двадцатиодномиллионный - elif chunk_num > 0 and ones == 1 and tens != 1: + if ones == 1 and tens != 1: if tens == 0 and hundreds == 0: w_ones = None else: - w_ones = get_num_element(ONES, 1, gender='n') + w_ones = get_mldir_value(ONES, 1, gender='n') else: - w_ones = get_num_element(ONES, ones, case='g') + w_ones = get_mldir_value(ONES, ones, case='g') if w_ones: words.append(w_ones) - return words + return ''.join(words) diff --git a/tests/test_ru.py b/tests/test_ru.py index 206873c4..640cc833 100644 --- a/tests/test_ru.py +++ b/tests/test_ru.py @@ -77,6 +77,7 @@ def test_cardinal(self): def test_cardinal_feminine(self): self.assertEqual(num2words(1, lang='ru', gender='f'), 'одна') + self.assertEqual(num2words(1, lang='ru', gender='ж'), 'одна') self.assertEqual(num2words(2, lang='ru', gender='f'), 'две') self.assertEqual(num2words(3, lang='ru', gender='f'), 'три') self.assertEqual(num2words(100, lang='ru', gender='f'), "сто") @@ -187,10 +188,18 @@ def test_to_ordinal(self): num2words(1000, lang='ru', to='ordinal'), 'тысячный' ) + self.assertEqual( + num2words(1000, lang='ru', to='ordinal', plural=True), + 'тысячные' + ) self.assertEqual( num2words(1001, lang='ru', to='ordinal'), 'тысяча первый' ) + self.assertEqual( + num2words(1001, lang='ru', to='ordinal', plural=True), + 'тысяча первые' + ) self.assertEqual( num2words(1060, lang='ru', to='ordinal'), 'тысяча шестидесятый' @@ -223,6 +232,10 @@ def test_to_ordinal(self): num2words(135100, lang='ru', to='ordinal'), 'сто тридцать пять тысяч сотый' ) + self.assertEqual( + num2words(135100, lang='ru', to='ordinal', plural=True), + 'сто тридцать пять тысяч сотые' + ) self.assertEqual( num2words(135120, lang='ru', to='ordinal'), 'сто тридцать пять тысяч сто двадцатый' @@ -281,6 +294,11 @@ def test_to_ordinal_feminine(self): num2words(2000000, lang='ru', to='ordinal', gender='f'), 'двухмиллионная' ) + self.assertEqual( + num2words(2000000, lang='ru', to='ordinal', gender='f', + plural=True), + 'двухмиллионные' + ) def test_to_ordinal_neuter(self): self.assertEqual( @@ -303,6 +321,11 @@ def test_to_ordinal_neuter(self): num2words(2000000, lang='ru', to='ordinal', gender='n'), 'двухмиллионное' ) + self.assertEqual( + num2words(2000000, lang='ru', to='ordinal', gender='n', + plural=True), + 'двухмиллионные' + ) def test_cardinal_cases(self): self.assertEqual( @@ -491,3 +514,18 @@ def test_to_currency(self): separator=' и'), 'сто один сум и двадцать два тийина' ) + self.assertEqual( + num2words(1234.56, lang='ru', to='currency', currency='PLN'), + 'одна тысяча двести тридцать четыре польских злотых, ' + 'пятьдесят шесть грошей' + ) + + def test_backward_compatibility(self): + self.assertEqual( + num2words(1, lang='ru', feminine=True), + 'одна' + ) + self.assertEqual( + num2words(1, lang='ru', to='ordinal', feminine=True), + 'первая' + )