-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
67 lines (50 loc) · 2.08 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import string
from typing import List
class Token:
def __init__(self, original: str, corrected: str):
"""
Helper class for everything related to tokens.
:param original: String value of the original token.
:param corrected: String value of the original tokens correction.
"""
self.original = original
self.corrected = corrected
def to_json(self):
return {
"original": self.original,
"corrected": self.corrected
}
def __str__(self):
return "Original: {} - Corrected: {}".format(self.original, self.corrected)
class Correction:
def __init__(self, original: str, correction: str, candidates: dict):
"""
Helper class for containing functions related to corrections.
:param original: Original text in its unprocessed form.
:param correction: Correction of the original text.
:param candidates: Dictionary which contains the possible candidates of tokens offered by the corrector.
"""
self.original = original
self.correction = correction
self.candidates = candidates
self.original_tokens = [token for token in self.original.split(" ")]
self.correction_tokens = [token for token in self.correction.split(" ")]
def _clean_token(self, token: str) -> str:
"""
Remove symbol characters from the token like '!' or ','.
"""
for char in string.punctuation:
token = token.replace(char, '')
return token.strip()
@property
def corrected_tokens(self) -> List[Token]:
container = []
for index, original_token in enumerate(self.original_tokens):
correction_token = self._clean_token(self.correction_tokens[index])
original_token = self._clean_token(original_token)
if correction_token.lower() != original_token.lower():
token = Token(original_token, correction_token)
container.append(token)
return container
def __str__(self):
return self.correction