-
-
Notifications
You must be signed in to change notification settings - Fork 273
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
- Loading branch information
Showing
12 changed files
with
158 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,33 @@ | ||
import joblib | ||
from os.path import join, dirname | ||
import logging | ||
import os | ||
from os.path import dirname | ||
import sys | ||
from languageflow.data import Sentence | ||
from languageflow.models.text_classifier import TextClassifier | ||
from underthesea.model_fetcher import ModelFetcher, UTSModel | ||
|
||
sys.path.insert(0, dirname(__file__)) | ||
|
||
bank_sentiment = {} | ||
FORMAT = '%(message)s' | ||
logging.basicConfig(format=FORMAT) | ||
logger = logging.getLogger('underthesea') | ||
|
||
sys.path.insert(0, dirname(dirname(__file__))) | ||
model_path = ModelFetcher.get_model_path(UTSModel.sa_bank) | ||
classifier = None | ||
|
||
def sentiment(X): | ||
global bank_sentiment | ||
if "x_transform" not in bank_sentiment: | ||
bank_sentiment["x_transform"] = joblib.load(join(dirname(__file__), "count.transformer.bin")) | ||
if "y_transform" not in bank_sentiment: | ||
bank_sentiment["y_transform"] = joblib.load(join(dirname(__file__), "label.transformer.bin")) | ||
if "estimator" not in bank_sentiment: | ||
bank_sentiment["estimator"] = joblib.load(join(dirname(__file__), "model.bin")) | ||
x_transform = bank_sentiment["x_transform"] | ||
y_transform = bank_sentiment["y_transform"] | ||
estimator = bank_sentiment["estimator"] | ||
if isinstance(X, list): | ||
return y_transform.inverse_transform( | ||
estimator.predict(x_transform.transform(X))) | ||
else: | ||
return y_transform.inverse_transform( | ||
estimator.predict(x_transform.transform([X])))[0] | ||
|
||
def sentiment(text): | ||
global classifier | ||
|
||
if not classifier: | ||
if os.path.exists(model_path): | ||
classifier = TextClassifier.load(model_path) | ||
else: | ||
logger.error( | ||
f"Could not load model at {model_path}.\n" | ||
f"Download model with \"underthesea download {UTSModel.sa_bank.value}\".") | ||
sys.exit(1) | ||
sentence = Sentence(text) | ||
classifier.predict(sentence) | ||
labels = sentence.labels | ||
return [label.value for label in labels] |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import unidecode | ||
from sklearn.base import BaseEstimator, TransformerMixin | ||
import string | ||
from underthesea.word_tokenize.regex_tokenize import tokenize | ||
|
||
|
||
negative_emoticons = {':(', '☹', '❌', '👎', '👹', '💀', '🔥', '🤔', '😏', '😐', '😑', '😒', '😓', '😔', '😕', '😖', | ||
'😞', '😟', '😠', '😡', '😢', '😣', '😤', '😥', '😧', '😨', '😩', '😪', '😫', '😭', '😰', '😱', | ||
'😳', '😵', '😶', '😾', '🙁', '🙏', '🚫', '>:[', ':-(', ':(', ':-c', ':c', ':-<', ':っC', ':<', | ||
':-[', ':[', ':{'} | ||
|
||
positive_emoticons = {'=))', 'v', ';)', '^^', '<3', '☀', '☺', '♡', '♥', '✌', '✨', '❣', '❤', '🌝', '🌷', '🌸', | ||
'🌺', '🌼', '🍓', '🎈', '🐅', '🐶', '🐾', '👉', '👌', '👍', '👏', '👻', '💃', '💄', '💋', | ||
'💌', '💎', '💐', '💓', '💕', '💖', '💗', '💙', '💚', '💛', '💜', '💞', ':-)', ':)', ':D', ':o)', | ||
':]', ':3', ':c)', ':>', '=]', '8)'} | ||
|
||
|
||
class Lowercase(BaseEstimator, TransformerMixin): | ||
def transform(self, x): | ||
return [s.lower() for s in x] | ||
|
||
def fit(self, x, y=None): | ||
return self | ||
|
||
|
||
class RemoveTone(BaseEstimator, TransformerMixin): | ||
def remove_tone(self, s): | ||
return unidecode.unidecode(s) | ||
|
||
def transform(self, x): | ||
return [self.remove_tone(s) for s in x] | ||
|
||
def fit(self, x, y=None): | ||
return self | ||
|
||
|
||
class CountEmoticons(BaseEstimator, TransformerMixin): | ||
def count_emoticon(self, s): | ||
positive_count = 0 | ||
negative_count = 0 | ||
for emoticon in positive_emoticons: | ||
positive_count += s.count(emoticon) | ||
for emoticon in negative_emoticons: | ||
negative_count += s.count(emoticon) | ||
return positive_count, negative_count | ||
|
||
def transform(self, x): | ||
return [self.count_emoticon(s) for s in x] | ||
|
||
def fit(self, x, y=None): | ||
return self | ||
|
||
|
||
class Tokenize(BaseEstimator, TransformerMixin): | ||
def pun_num(self, s): | ||
for token in s.split(): | ||
if token in string.punctuation: | ||
if token == '.': | ||
s = s | ||
else: | ||
s = s.replace(token, 'punc') | ||
else: | ||
s = s | ||
return s | ||
|
||
def transform(self, x): | ||
return [self.pun_num(tokenize(s, format='text')) for s in x] | ||
|
||
def fit(self, x, y=None): | ||
return self |