Skip to content

Commit

Permalink
rerun bigrams weighted
Browse files Browse the repository at this point in the history
  • Loading branch information
gicraveiro committed Feb 5, 2022
1 parent 0d97939 commit abf383f
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 26 deletions.
36 changes: 18 additions & 18 deletions AIclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def create_vectors_list(sents, conversion_dict):
bigrams_vector.append(sent_bigrams_vector)
mixed_vector.append(sent_mixed_vector)

return vectors_list # unigrams
#return bigrams_vector
#return vectors_list # unigrams
return bigrams_vector
#return mixed_vector

# def create_word_embedding(partition):
Expand Down Expand Up @@ -195,8 +195,8 @@ def create_vectors_list(sents, conversion_dict):
#train_word_embedding_features = create_word_embedding(sents_train)
#dev_word_embedding_features = create_word_embedding(sents_dev)
#test_word_embedding_features = numpy.asarray(create_word_embedding(sents_test))
print("Length of the dictionary of word representations:",len(words_to_numbers))
# print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
#print("Length of the dictionary of word representations:",len(words_to_numbers))
print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
#print("Length of the dictionary of word representations:",len(mixed_to_numbers))

# FLAG - CHECK IF DICTIONARY IS BUILT CORRECTLY
Expand All @@ -205,13 +205,13 @@ def create_vectors_list(sents, conversion_dict):
# count frequency before and after removing unknown words - ??? - ASK GABRIEL!!
# checked that it seems ok

train_vectors_list = create_vectors_list(sents_train, words_to_numbers)
dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
test_vectors_list = create_vectors_list(sents_test, words_to_numbers)
# train_vectors_list = create_vectors_list(sents_train, words_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, words_to_numbers)

# train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)
train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)

# train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
Expand All @@ -221,13 +221,13 @@ def create_vectors_list(sents, conversion_dict):

# FLAG - CHECK IF SENTENCE REPRESENTATIONS WERE DONE CORRECTLY

train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, words_to_numbers)
dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)
# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, words_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)

# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)
train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)

# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
Expand Down Expand Up @@ -283,9 +283,9 @@ def create_vectors_list(sents, conversion_dict):
importances = model.feature_importances_

features = {}
for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
if (value != 0):
features[token] = value
features = sorted([(value, key) for (key, value) in features.items()], reverse=True)
Expand Down
Binary file modified output/AI Classifier/1Label_confusion_matrix_NonNorm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/AI Classifier/1Label_confusion_matrix_NormTrue.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 8 additions & 8 deletions output/AI Classifier/1labelPredictionsStatsTest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ Performance measures - Unigram Dictionary - Adaboost

Test set:

Precision macro: 0.254
Precision Individually: [0.357 0. 0.59 0.125 0.2 ]
Recall macro: 0.258
Recall Individually: [0.357 0. 0.742 0.091 0.1 ]
F1 Score micro: 0.448
F1 Score macro: 0.251
F1 Score weighted: 0.416
F1 Score Individually: [0.357 0. 0.657 0.105 0.133]
Precision macro: 0.257
Precision Individually: [0.8 0. 0.484 0. 0. ]
Recall macro: 0.251
Recall Individually: [0.286 0. 0.968 0. 0. ]
F1 Score micro: 0.507
F1 Score macro: 0.213
F1 Score weighted: 0.386
F1 Score Individually: [0.421 0. 0.645 0. 0. ]


0 comments on commit abf383f

Please sign in to comment.