From ff6aaa58baa34d2ae175311d6caca1b127cc6909 Mon Sep 17 00:00:00 2001 From: James Tauber Date: Tue, 26 Jul 2016 23:10:50 -0400 Subject: [PATCH 01/10] handle macron in add_breathing --- characters.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/characters.py b/characters.py index cc83e81..e0b31a6 100644 --- a/characters.py +++ b/characters.py @@ -31,8 +31,12 @@ def add_breathing(ch, breathing): Add the given breathing to the given (possibly accented) character. """ decomposed = unicodedata.normalize("NFD", ch) - return unicodedata.normalize( - "NFC", decomposed[0] + breathing + decomposed[1:]) + if len(decomposed) > 1 and decomposed[1] == LONG: + return unicodedata.normalize( + "NFC", decomposed[0:2] + breathing + decomposed[2:]) + else: + return unicodedata.normalize( + "NFC", decomposed[0] + breathing + decomposed[1:]) def remove_diacritic(*diacritics): From c902f494c0a7d25861af3420b1dd6cb80fa8b8ab Mon Sep 17 00:00:00 2001 From: James Tauber Date: Tue, 26 Jul 2016 23:13:44 -0400 Subject: [PATCH 02/10] added split_initial_breathing and debreath methods --- syllabify.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/syllabify.py b/syllabify.py index 83c882b..c514911 100644 --- a/syllabify.py +++ b/syllabify.py @@ -255,6 +255,23 @@ def contonation(w): return [] +def split_initial_breathing(word): + s = syllabify(word) + o, n, c = onset_nucleus_coda(s[0]) + if o in [SMOOTH, ROUGH]: + return o, n + c + "".join(s[1:]) + else: + return None, word + + +def debreath(word): + a, word = split_initial_breathing(word) + if a == ROUGH: + return "h" + word + else: + return word + + def add_necessary_breathing(w): s = syllabify(w) o, n, c = onset_nucleus_coda(s[0]) From 26641ae57eb5f9e2a5de1a6893afbabad815c5f9 Mon Sep 17 00:00:00 2001 From: James Tauber Date: Tue, 26 Jul 2016 23:15:14 -0400 Subject: [PATCH 03/10] improved add_necessary_breathing that supports rough breathing and macrons --- syllabify.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/syllabify.py b/syllabify.py index c514911..8b99906 100644 --- a/syllabify.py +++ b/syllabify.py @@ -272,14 +272,22 @@ def debreath(word): return word -def add_necessary_breathing(w): +def add_necessary_breathing(w, breathing=SMOOTH): s = syllabify(w) o, n, c = onset_nucleus_coda(s[0]) if o == "": - if len(n) == 2: - n = n[0] + add_breathing(n[1], SMOOTH) + for i, ch in enumerate(n): + if base(ch) in "αεηιουω": + last_vowel = i + if last_vowel > 0: + pre = n[:last_vowel] else: - n = add_breathing(n, SMOOTH) + pre = "" + if last_vowel + 1 < len(n): + post = n[last_vowel + 1:] + else: + post = "" + n = pre + add_breathing(n[last_vowel], breathing) + post return o + n + c + "".join(s[1:]) else: return w From 7fddd0cd7f958c5e5727bdfd64026f3c5b9a23ff Mon Sep 17 00:00:00 2001 From: James Tauber Date: Tue, 26 Jul 2016 23:17:50 -0400 Subject: [PATCH 04/10] added rebreath method added rebreath function --- syllabify.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/syllabify.py b/syllabify.py index 8b99906..179d331 100644 --- a/syllabify.py +++ b/syllabify.py @@ -272,6 +272,18 @@ def debreath(word): return word +def rebreath(word): + if word == "": + return word + if word.startswith("h"): + word = add_necessary_breathing(word[1:], ROUGH) + else: + word = add_necessary_breathing(word) + word = remove_redundant_macron(word) + + return word + + def add_necessary_breathing(w, breathing=SMOOTH): s = syllabify(w) o, n, c = onset_nucleus_coda(s[0]) From 6bb518f49ca23635f06c896d27bb48479d2b78ea Mon Sep 17 00:00:00 2001 From: James Tauber Date: Tue, 26 Jul 2016 23:22:05 -0400 Subject: [PATCH 05/10] fixed import error --- syllabify.py | 1 + 1 file changed, 1 insertion(+) diff --git a/syllabify.py b/syllabify.py index 179d331..3cd02c7 100644 --- a/syllabify.py +++ b/syllabify.py @@ -1,4 +1,5 @@ from characters import accent, base, diaeresis, iota_subscript, length +from characters import remove_redundant_macron from characters import breathing, strip_breathing, add_breathing, SMOOTH, ROUGH from characters import ACUTE, CIRCUMFLEX, SHORT, LONG From 379f2b7f830dcc7736565be173785491dd31230c Mon Sep 17 00:00:00 2001 From: James Tauber Date: Tue, 26 Jul 2016 23:27:40 -0400 Subject: [PATCH 06/10] added test for add_breathing with macron --- extra.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extra.rst b/extra.rst index 6bc1721..a4c43dd 100644 --- a/extra.rst +++ b/extra.rst @@ -1,4 +1,5 @@ ->>> from characters import add_diacritic, accent, breathing, length +>>> from characters import add_diacritic, add_breathing +>>> from characters import accent, breathing, length >>> from characters import LONG, ACUTE, ROUGH, SMOOTH >>> a = add_diacritic(add_diacritic(add_diacritic('ι', LONG), ACUTE), ROUGH) @@ -24,6 +25,9 @@ >>> length(a) == LONG, length(b) == LONG (True, True) +>>> add_breathing("ῑ", ROUGH) +'ῑ̔' + >>> from syllabify import syllabify, add_necessary_breathing >>> from syllabify import onset, nucleus, coda, onset_nucleus_coda From 2e13ca870840781ea57a449ef4239beca4a1f93f Mon Sep 17 00:00:00 2001 From: James Tauber Date: Wed, 27 Jul 2016 00:16:57 -0400 Subject: [PATCH 07/10] added examples/tests for split_initial_breathing --- docs.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs.rst b/docs.rst index 1570da8..74668f1 100644 --- a/docs.rst +++ b/docs.rst @@ -218,6 +218,18 @@ A "word" without vowels is just treated as having an onset: >>> coda('βββ') +You can split out the initial breathing: + +>>> split_initial_breathing('οἰκία') == (SMOOTH, 'οικία') +True + +>>> split_initial_breathing('λόγος') == (None, 'λόγος') +True + +>>> split_initial_breathing('ὅ') == (ROUGH, 'ό') +True + + You can find out the length of a syllable: >>> syllable_length('κός') == SHORT From ec52eb43c71e251fe9c109d5a282d0bdf81b08ea Mon Sep 17 00:00:00 2001 From: James Tauber Date: Wed, 27 Jul 2016 00:39:37 -0400 Subject: [PATCH 08/10] added examples/tests for debreath --- docs.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs.rst b/docs.rst index 74668f1..f6332af 100644 --- a/docs.rst +++ b/docs.rst @@ -230,6 +230,22 @@ True True +This is actually more commonly just used as the `debreath` function which +drops smooth breathing and replaces rough breathing with an `h`: + +>>> debreath('οἰκία') +'οικία' + +>>> debreath('ὅ') +'hό' + +>>> debreath('λόγος') +'λόγος' + +>>> debreath('κἀγω') +'κἀγω' + + You can find out the length of a syllable: >>> syllable_length('κός') == SHORT From 8c07d080ebcc67623a5d6d213fefe5bbab5d27ab Mon Sep 17 00:00:00 2001 From: James Tauber Date: Wed, 27 Jul 2016 00:44:58 -0400 Subject: [PATCH 09/10] added examples/tests for rebreath --- docs.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs.rst b/docs.rst index f6332af..f99b067 100644 --- a/docs.rst +++ b/docs.rst @@ -246,6 +246,16 @@ drops smooth breathing and replaces rough breathing with an `h`: 'κἀγω' +The `rebreath` function will convert `h` back to rough breathing and add +smooth breathing if necessary, effectively reversing `debreath`. + +>>> rebreath('οικία') +'οἰκία' + +>>> rebreath('hό') +'ὅ' + + You can find out the length of a syllable: >>> syllable_length('κός') == SHORT From 9aff7bf786b6d89b7f3a45a739b4c44c5db6fb22 Mon Sep 17 00:00:00 2001 From: James Tauber Date: Wed, 27 Jul 2016 00:52:21 -0400 Subject: [PATCH 10/10] more test coverage --- extra.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/extra.rst b/extra.rst index a4c43dd..7dd9a1c 100644 --- a/extra.rst +++ b/extra.rst @@ -28,7 +28,7 @@ >>> add_breathing("ῑ", ROUGH) 'ῑ̔' ->>> from syllabify import syllabify, add_necessary_breathing +>>> from syllabify import syllabify, add_necessary_breathing, rebreath >>> from syllabify import onset, nucleus, coda, onset_nucleus_coda >>> syllabify('ῑ́̔στην') @@ -80,6 +80,13 @@ True >>> add_necessary_breathing('ῑ̔́στην') 'ῑ̔́στην' +>>> add_necessary_breathing('ῑ́') +'ῑ̓́' + +>>> rebreath('') +'' + + >>> from accentuation import recessive, add_accent, syllable_add_accent, PAROXYTONE >>> syllabify('ῑ̔στην')