Skip to content

Commit

Permalink
Merge pull request #3 from jtauber/from-greek-inflexion
Browse files Browse the repository at this point in the history
From greek inflexion
  • Loading branch information
jtauber authored Jul 27, 2016
2 parents f147732 + 9aff7bf commit fdef4a7
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 8 deletions.
8 changes: 6 additions & 2 deletions characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,12 @@ def add_breathing(ch, breathing):
Add the given breathing to the given (possibly accented) character.
"""
decomposed = unicodedata.normalize("NFD", ch)
return unicodedata.normalize(
"NFC", decomposed[0] + breathing + decomposed[1:])
if len(decomposed) > 1 and decomposed[1] == LONG:
return unicodedata.normalize(
"NFC", decomposed[0:2] + breathing + decomposed[2:])
else:
return unicodedata.normalize(
"NFC", decomposed[0] + breathing + decomposed[1:])


def remove_diacritic(*diacritics):
Expand Down
38 changes: 38 additions & 0 deletions docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,44 @@ A "word" without vowels is just treated as having an onset:
>>> coda('βββ')


You can split out the initial breathing:

>>> split_initial_breathing('οἰκία') == (SMOOTH, 'οικία')
True

>>> split_initial_breathing('λόγος') == (None, 'λόγος')
True

>>> split_initial_breathing('') == (ROUGH, 'ό')
True


This is actually more commonly just used as the `debreath` function which
drops smooth breathing and replaces rough breathing with an `h`:

>>> debreath('οἰκία')
'οικία'

>>> debreath('')
'hό'

>>> debreath('λόγος')
'λόγος'

>>> debreath('κἀγω')
'κἀγω'


The `rebreath` function will convert `h` back to rough breathing and add
smooth breathing if necessary, effectively reversing `debreath`.

>>> rebreath('οικία')
'οἰκία'

>>> rebreath('')
'ὅ'


You can find out the length of a syllable:

>>> syllable_length('κός') == SHORT
Expand Down
15 changes: 13 additions & 2 deletions extra.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
>>> from characters import add_diacritic, accent, breathing, length
>>> from characters import add_diacritic, add_breathing
>>> from characters import accent, breathing, length
>>> from characters import LONG, ACUTE, ROUGH, SMOOTH

>>> a = add_diacritic(add_diacritic(add_diacritic('ι', LONG), ACUTE), ROUGH)
Expand All @@ -24,7 +25,10 @@
>>> length(a) == LONG, length(b) == LONG
(True, True)

>>> from syllabify import syllabify, add_necessary_breathing
>>> add_breathing("", ROUGH)
'ῑ̔'

>>> from syllabify import syllabify, add_necessary_breathing, rebreath
>>> from syllabify import onset, nucleus, coda, onset_nucleus_coda

>>> syllabify('ῑ́̔στην')
Expand Down Expand Up @@ -76,6 +80,13 @@ True
>>> add_necessary_breathing('ῑ̔́στην')
'ῑ̔́στην'

>>> add_necessary_breathing('ῑ́')
'ῑ̓́'

>>> rebreath('')
''


>>> from accentuation import recessive, add_accent, syllable_add_accent, PAROXYTONE

>>> syllabify('ῑ̔στην')
Expand Down
46 changes: 42 additions & 4 deletions syllabify.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from characters import accent, base, diaeresis, iota_subscript, length
from characters import remove_redundant_macron
from characters import breathing, strip_breathing, add_breathing, SMOOTH, ROUGH
from characters import ACUTE, CIRCUMFLEX, SHORT, LONG

Expand Down Expand Up @@ -255,14 +256,51 @@ def contonation(w):
return []


def add_necessary_breathing(w):
def split_initial_breathing(word):
s = syllabify(word)
o, n, c = onset_nucleus_coda(s[0])
if o in [SMOOTH, ROUGH]:
return o, n + c + "".join(s[1:])
else:
return None, word


def debreath(word):
a, word = split_initial_breathing(word)
if a == ROUGH:
return "h" + word
else:
return word


def rebreath(word):
if word == "":
return word
if word.startswith("h"):
word = add_necessary_breathing(word[1:], ROUGH)
else:
word = add_necessary_breathing(word)
word = remove_redundant_macron(word)

return word


def add_necessary_breathing(w, breathing=SMOOTH):
s = syllabify(w)
o, n, c = onset_nucleus_coda(s[0])
if o == "":
if len(n) == 2:
n = n[0] + add_breathing(n[1], SMOOTH)
for i, ch in enumerate(n):
if base(ch) in "αεηιουω":
last_vowel = i
if last_vowel > 0:
pre = n[:last_vowel]
else:
pre = ""
if last_vowel + 1 < len(n):
post = n[last_vowel + 1:]
else:
n = add_breathing(n, SMOOTH)
post = ""
n = pre + add_breathing(n[last_vowel], breathing) + post
return o + n + c + "".join(s[1:])
else:
return w

0 comments on commit fdef4a7

Please sign in to comment.