forked from Winnetou/Latin2Greek
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParser.py
44 lines (32 loc) · 981 Bytes
/
Parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from collections import Counter
import re
from resource import greek_untranslatables
def try_with_zipfs_law():
''' Use Zipf's Law to identify words with the same counts'''
#use counter for greek and latin text
def greek_text_rip():
"""
Function for initial preparation of the Greek texts from Musaios
"""
pass
def latin_text_rip():
"""
Function for initial preparation of the Latin texts from Musaios
"""
pass
def read_verses(latin_text):
"""
For latin vulgate: read verses
"""
lines = open(latin_text).read().split('\n')
for line in lines:
chapter_and_verse = line.split(' ')[0]
chapter, verse = chapter_and_verse.split()[0], chapter_and_verse.split()[1]
return
def clean_clementina(text):
"""
Clean the text of clementina
"""
to_be_replaced = {"æ":"ae","œ":"oe", "Æ": "AE" } #"ë":"?"
for key in to_be_replaced.keys():
text.replace(key, to_be_replaced[key])