diff --git a/characters.py b/characters.py index cc83e81..e0b31a6 100644 --- a/characters.py +++ b/characters.py @@ -31,8 +31,12 @@ def add_breathing(ch, breathing): Add the given breathing to the given (possibly accented) character. """ decomposed = unicodedata.normalize("NFD", ch) - return unicodedata.normalize( - "NFC", decomposed[0] + breathing + decomposed[1:]) + if len(decomposed) > 1 and decomposed[1] == LONG: + return unicodedata.normalize( + "NFC", decomposed[0:2] + breathing + decomposed[2:]) + else: + return unicodedata.normalize( + "NFC", decomposed[0] + breathing + decomposed[1:]) def remove_diacritic(*diacritics): diff --git a/docs.rst b/docs.rst index 1570da8..f99b067 100644 --- a/docs.rst +++ b/docs.rst @@ -218,6 +218,44 @@ A "word" without vowels is just treated as having an onset: >>> coda('βββ') +You can split out the initial breathing: + +>>> split_initial_breathing('οἰκία') == (SMOOTH, 'οικία') +True + +>>> split_initial_breathing('λόγος') == (None, 'λόγος') +True + +>>> split_initial_breathing('ὅ') == (ROUGH, 'ό') +True + + +This is actually more commonly just used as the `debreath` function which +drops smooth breathing and replaces rough breathing with an `h`: + +>>> debreath('οἰκία') +'οικία' + +>>> debreath('ὅ') +'hό' + +>>> debreath('λόγος') +'λόγος' + +>>> debreath('κἀγω') +'κἀγω' + + +The `rebreath` function will convert `h` back to rough breathing and add +smooth breathing if necessary, effectively reversing `debreath`. + +>>> rebreath('οικία') +'οἰκία' + +>>> rebreath('hό') +'ὅ' + + You can find out the length of a syllable: >>> syllable_length('κός') == SHORT diff --git a/extra.rst b/extra.rst index 6bc1721..7dd9a1c 100644 --- a/extra.rst +++ b/extra.rst @@ -1,4 +1,5 @@ ->>> from characters import add_diacritic, accent, breathing, length +>>> from characters import add_diacritic, add_breathing +>>> from characters import accent, breathing, length >>> from characters import LONG, ACUTE, ROUGH, SMOOTH >>> a = add_diacritic(add_diacritic(add_diacritic('ι', LONG), ACUTE), ROUGH) @@ -24,7 +25,10 @@ >>> length(a) == LONG, length(b) == LONG (True, True) ->>> from syllabify import syllabify, add_necessary_breathing +>>> add_breathing("ῑ", ROUGH) +'ῑ̔' + +>>> from syllabify import syllabify, add_necessary_breathing, rebreath >>> from syllabify import onset, nucleus, coda, onset_nucleus_coda >>> syllabify('ῑ́̔στην') @@ -76,6 +80,13 @@ True >>> add_necessary_breathing('ῑ̔́στην') 'ῑ̔́στην' +>>> add_necessary_breathing('ῑ́') +'ῑ̓́' + +>>> rebreath('') +'' + + >>> from accentuation import recessive, add_accent, syllable_add_accent, PAROXYTONE >>> syllabify('ῑ̔στην') diff --git a/syllabify.py b/syllabify.py index 83c882b..3cd02c7 100644 --- a/syllabify.py +++ b/syllabify.py @@ -1,4 +1,5 @@ from characters import accent, base, diaeresis, iota_subscript, length +from characters import remove_redundant_macron from characters import breathing, strip_breathing, add_breathing, SMOOTH, ROUGH from characters import ACUTE, CIRCUMFLEX, SHORT, LONG @@ -255,14 +256,51 @@ def contonation(w): return [] -def add_necessary_breathing(w): +def split_initial_breathing(word): + s = syllabify(word) + o, n, c = onset_nucleus_coda(s[0]) + if o in [SMOOTH, ROUGH]: + return o, n + c + "".join(s[1:]) + else: + return None, word + + +def debreath(word): + a, word = split_initial_breathing(word) + if a == ROUGH: + return "h" + word + else: + return word + + +def rebreath(word): + if word == "": + return word + if word.startswith("h"): + word = add_necessary_breathing(word[1:], ROUGH) + else: + word = add_necessary_breathing(word) + word = remove_redundant_macron(word) + + return word + + +def add_necessary_breathing(w, breathing=SMOOTH): s = syllabify(w) o, n, c = onset_nucleus_coda(s[0]) if o == "": - if len(n) == 2: - n = n[0] + add_breathing(n[1], SMOOTH) + for i, ch in enumerate(n): + if base(ch) in "αεηιουω": + last_vowel = i + if last_vowel > 0: + pre = n[:last_vowel] + else: + pre = "" + if last_vowel + 1 < len(n): + post = n[last_vowel + 1:] else: - n = add_breathing(n, SMOOTH) + post = "" + n = pre + add_breathing(n[last_vowel], breathing) + post return o + n + c + "".join(s[1:]) else: return w