Skip to content

Commit

Permalink
reran mixed features with list of features lr 0.5 n est 100
Browse files Browse the repository at this point in the history
  • Loading branch information
gicraveiro committed Feb 5, 2022
1 parent 354ade9 commit 3189279
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 29 deletions.
41 changes: 21 additions & 20 deletions AIclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def create_vectors_list(sents, conversion_dict):
mixed_vector.append(sent_mixed_vector)

#return vectors_list # unigrams
return bigrams_vector
#return mixed_vector
#return bigrams_vector
return mixed_vector

# def create_word_embedding(partition):

Expand Down Expand Up @@ -184,7 +184,8 @@ def create_vectors_list(sents, conversion_dict):
bigrams_to_numbers = create_dict(bigrams_filtered_lexicon)

# Mixed dictionary
with open('features.txt', 'r') as file:
with open('featureslr0.5nEst100.txt', 'r') as file:
#with open('features.txt', 'r') as file:
features_list = file.read()
features_list = features_list.split('\n')
mixed_to_numbers = create_dict(features_list)
Expand All @@ -198,8 +199,8 @@ def create_vectors_list(sents, conversion_dict):
#dev_word_embedding_features = create_word_embedding(sents_dev)
#test_word_embedding_features = numpy.asarray(create_word_embedding(sents_test))
#print("Length of the dictionary of word representations:",len(words_to_numbers))
print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
#print("Length of the dictionary of word representations:",len(mixed_to_numbers))
#print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
print("Length of the dictionary of word representations:",len(mixed_to_numbers))

# FLAG - CHECK IF DICTIONARY IS BUILT CORRECTLY
# SHOULD PUNCTUATION BE UNKNOWN? BECAUSE RIGHT NOW IT IS -NOPE, FIXED
Expand All @@ -211,13 +212,13 @@ def create_vectors_list(sents, conversion_dict):
# dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, words_to_numbers)

train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)
# train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)

# train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, mixed_to_numbers)
train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
test_vectors_list = create_vectors_list(sents_test, mixed_to_numbers)

# COUNT STATISTICS - HOW MANY WORDS WERE CONSIDERED UNK, AND HOW MANY OF EACH WORD

Expand All @@ -227,13 +228,13 @@ def create_vectors_list(sents, conversion_dict):
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)

train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)
# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)

# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, mixed_to_numbers)
train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, mixed_to_numbers)

# FLAG - CHECK IF SPARSE MATRIX REPRESENTATION WAS DONE CORRECTLY

Expand Down Expand Up @@ -286,8 +287,8 @@ def create_vectors_list(sents, conversion_dict):

features = {}
#for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
if (value != 0):
features[token] = value
features = sorted([(value, key) for (key, value) in features.items()], reverse=True)
Expand Down Expand Up @@ -326,7 +327,7 @@ def create_vectors_list(sents, conversion_dict):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w') as file:
#print("Performance measures - Unigram Dictionary - MLP Word Embeddings\n", file=file)
print("Performance measures - Bigram Dictionary - Adaboost\n", file=file)
print("Performance measures - Mixed Dictionary - Adaboost\n", file=file)
#write_output_stats_file(path, "Dev", dev_labels_primary, predictions, labels)
write_output_stats_file(path, "Test", test_labels_primary, predictions, labels)

Expand Down
Binary file modified output/AI Classifier/1Label_confusion_matrix_NonNorm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/AI Classifier/1Label_confusion_matrix_NormTrue.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 9 additions & 9 deletions output/AI Classifier/1labelPredictionsStatsTest.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Performance measures - Bigram Dictionary - Adaboost
Performance measures - Mixed Dictionary - Adaboost

Test set:

Precision macro: 0.275
Precision Individually: [0.857 0. 0.517 0. 0. ]
Recall macro: 0.279
Recall Individually: [0.429 0. 0.968 0. 0. ]
F1 Score micro: 0.537
F1 Score macro: 0.249
F1 Score weighted: 0.431
F1 Score Individually: [0.571 0. 0.674 0. 0. ]
Precision macro: 0.425
Precision Individually: [0.8 0. 0.574 0.25 0.5 ]
Recall macro: 0.315
Recall Individually: [0.286 0. 1. 0.091 0.2 ]
F1 Score micro: 0.567
F1 Score macro: 0.314
F1 Score weighted: 0.49
F1 Score Individually: [0.421 0. 0.729 0.133 0.286]


0 comments on commit 3189279

Please sign in to comment.