Merge pull request #3 from jtauber/from-greek-inflexion

From greek inflexion
jtauber · Jul 27, 2016 · fdef4a7 · fdef4a7
2 parents f147732 + 9aff7bf
commit fdef4a7
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 8 deletions.
diff --git a/characters.py b/characters.py
@@ -31,8 +31,12 @@ def add_breathing(ch, breathing):
     Add the given breathing to the given (possibly accented) character.
     """
     decomposed = unicodedata.normalize("NFD", ch)
-    return unicodedata.normalize(
-        "NFC", decomposed[0] + breathing + decomposed[1:])
+    if len(decomposed) > 1 and decomposed[1] == LONG:
+        return unicodedata.normalize(
+            "NFC", decomposed[0:2] + breathing + decomposed[2:])
+    else:
+        return unicodedata.normalize(
+            "NFC", decomposed[0] + breathing + decomposed[1:])
 
 
 def remove_diacritic(*diacritics):

diff --git a/docs.rst b/docs.rst
@@ -218,6 +218,44 @@ A "word" without vowels is just treated as having an onset:
 >>> coda('βββ')
 
 
+You can split out the initial breathing:
+
+>>> split_initial_breathing('οἰκία') == (SMOOTH, 'οικία')
+True
+
+>>> split_initial_breathing('λόγος') == (None, 'λόγος')
+True
+
+>>> split_initial_breathing('ὅ') == (ROUGH, 'ό')
+True
+
+
+This is actually more commonly just used as the `debreath` function which
+drops smooth breathing and replaces rough breathing with an `h`:
+
+>>> debreath('οἰκία')
+'οικία'
+
+>>> debreath('ὅ')
+'hό'
+
+>>> debreath('λόγος')
+'λόγος'
+
+>>> debreath('κἀγω')
+'κἀγω'
+
+
+The `rebreath` function will convert `h` back to rough breathing and add
+smooth breathing if necessary, effectively reversing `debreath`.
+
+>>> rebreath('οικία')
+'οἰκία'
+
+>>> rebreath('hό')
+'ὅ'
+
+
 You can find out the length of a syllable:
 
 >>> syllable_length('κός') == SHORT

diff --git a/extra.rst b/extra.rst
@@ -1,4 +1,5 @@
->>> from characters import add_diacritic, accent, breathing, length
+>>> from characters import add_diacritic, add_breathing
+>>> from characters import accent, breathing, length
 >>> from characters import LONG, ACUTE, ROUGH, SMOOTH
 
 >>> a = add_diacritic(add_diacritic(add_diacritic('ι', LONG), ACUTE), ROUGH)
@@ -24,7 +25,10 @@
 >>> length(a) == LONG, length(b) == LONG
 (True, True)
 
->>> from syllabify import syllabify, add_necessary_breathing
+>>> add_breathing("ῑ", ROUGH)
+'ῑ̔'
+
+>>> from syllabify import syllabify, add_necessary_breathing, rebreath
 >>> from syllabify import onset, nucleus, coda, onset_nucleus_coda
 
 >>> syllabify('ῑ́̔στην')
@@ -76,6 +80,13 @@ True
 >>> add_necessary_breathing('ῑ̔́στην')
 'ῑ̔́στην'
 
+>>> add_necessary_breathing('ῑ́')
+'ῑ̓́'
+
+>>> rebreath('')
+''
+
+
 >>> from accentuation import recessive, add_accent, syllable_add_accent, PAROXYTONE
 
 >>> syllabify('ῑ̔στην')

diff --git a/syllabify.py b/syllabify.py
@@ -1,4 +1,5 @@
 from characters import accent, base, diaeresis, iota_subscript, length
+from characters import remove_redundant_macron
 from characters import breathing, strip_breathing, add_breathing, SMOOTH, ROUGH
 from characters import ACUTE, CIRCUMFLEX, SHORT, LONG
 
@@ -255,14 +256,51 @@ def contonation(w):
     return []
 
 
-def add_necessary_breathing(w):
+def split_initial_breathing(word):
+    s = syllabify(word)
+    o, n, c = onset_nucleus_coda(s[0])
+    if o in [SMOOTH, ROUGH]:
+        return o, n + c + "".join(s[1:])
+    else:
+        return None, word
+
+
+def debreath(word):
+    a, word = split_initial_breathing(word)
+    if a == ROUGH:
+        return "h" + word
+    else:
+        return word
+
+
+def rebreath(word):
+    if word == "":
+        return word
+    if word.startswith("h"):
+        word = add_necessary_breathing(word[1:], ROUGH)
+    else:
+        word = add_necessary_breathing(word)
+    word = remove_redundant_macron(word)
+
+    return word
+
+
+def add_necessary_breathing(w, breathing=SMOOTH):
     s = syllabify(w)
     o, n, c = onset_nucleus_coda(s[0])
     if o == "":
-        if len(n) == 2:
-            n = n[0] + add_breathing(n[1], SMOOTH)
+        for i, ch in enumerate(n):
+            if base(ch) in "αεηιουω":
+                last_vowel = i
+        if last_vowel > 0:
+            pre = n[:last_vowel]
+        else:
+            pre = ""
+        if last_vowel + 1 < len(n):
+            post = n[last_vowel + 1:]
         else:
-            n = add_breathing(n, SMOOTH)
+            post = ""
+        n = pre + add_breathing(n[last_vowel], breathing) + post
         return o + n + c + "".join(s[1:])
     else:
         return w