Skip to content

Commit

Permalink
ran unigrams with lr 0.5 and nest 100 weighted
Browse files Browse the repository at this point in the history
  • Loading branch information
gicraveiro committed Feb 5, 2022
1 parent c2ee829 commit f470f5b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 29 deletions.
42 changes: 21 additions & 21 deletions AIclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def format_sentVector_to_SparseMatrix(vectors_list, dictionary):
counts = Counter(sent_vector)
for index, freq in counts.items():
if len(counts.items()) > 0:
sparse_vector[index] = 1 #freq/len(sent_vector) # 1 DIFFERENT CONFIGURATION POSSIBILITIES
sparse_vector[index] = freq/len(sent_vector) # 1 DIFFERENT CONFIGURATION POSSIBILITIES
if (i == 0): # TO DO: OPTIMIZE, NO NEED TO CHECK THIS EVERY TURN
matrix_array = [sparse_vector]
else:
Expand Down Expand Up @@ -119,11 +119,11 @@ def create_vectors_list(sents, conversion_dict):
sent_mixed_vector = sent_mixed_vector.astype(int)
vectors_list.append(sent_vector)
bigrams_vector.append(sent_bigrams_vector)
print(sent_mixed_vector)
#print(sent_mixed_vector)
mixed_vector.append(sent_mixed_vector)

return vectors_list # unigrams
#return bigrams_vector
#return vectors_list # unigrams
return bigrams_vector
#return mixed_vector

# def create_word_embedding(partition):
Expand Down Expand Up @@ -197,8 +197,8 @@ def create_vectors_list(sents, conversion_dict):
#train_word_embedding_features = create_word_embedding(sents_train)
#dev_word_embedding_features = create_word_embedding(sents_dev)
#test_word_embedding_features = numpy.asarray(create_word_embedding(sents_test))
print("Length of the dictionary of word representations:",len(words_to_numbers))
#print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
#print("Length of the dictionary of word representations:",len(words_to_numbers))
print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
#print("Length of the dictionary of word representations:",len(mixed_to_numbers))

# FLAG - CHECK IF DICTIONARY IS BUILT CORRECTLY
Expand All @@ -207,13 +207,13 @@ def create_vectors_list(sents, conversion_dict):
# count frequency before and after removing unknown words - ??? - ASK GABRIEL!!
# checked that it seems ok

train_vectors_list = create_vectors_list(sents_train, words_to_numbers)
dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
test_vectors_list = create_vectors_list(sents_test, words_to_numbers)
# train_vectors_list = create_vectors_list(sents_train, words_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, words_to_numbers)

# train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
# test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)
train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)

# train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
# dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
Expand All @@ -223,13 +223,13 @@ def create_vectors_list(sents, conversion_dict):

# FLAG - CHECK IF SENTENCE REPRESENTATIONS WERE DONE CORRECTLY

train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, words_to_numbers)
dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)
# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, words_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)

# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)
train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)

# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
Expand Down Expand Up @@ -285,9 +285,9 @@ def create_vectors_list(sents, conversion_dict):
importances = model.feature_importances_

features = {}
for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
#for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
if (value != 0):
features[token] = value
features = sorted([(value, key) for (key, value) in features.items()], reverse=True)
Expand Down Expand Up @@ -326,7 +326,7 @@ def create_vectors_list(sents, conversion_dict):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w') as file:
#print("Performance measures - Unigram Dictionary - MLP Word Embeddings\n", file=file)
print("Performance measures - Unigram Dictionary - Adaboost\n", file=file)
print("Performance measures - Bigram Dictionary - Adaboost\n", file=file)
#write_output_stats_file(path, "Dev", dev_labels_primary, predictions, labels)
write_output_stats_file(path, "Test", test_labels_primary, predictions, labels)

Expand Down
Binary file modified output/AI Classifier/1Label_confusion_matrix_NonNorm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/AI Classifier/1Label_confusion_matrix_NormTrue.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 8 additions & 8 deletions output/AI Classifier/1labelPredictionsStatsTest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ Performance measures - Unigram Dictionary - Adaboost

Test set:

Precision macro: 0.247
Precision Individually: [0.333 0. 0.566 0. 0.333]
Recall macro: 0.256
Recall Individually: [0.214 0. 0.968 0. 0.1 ]
F1 Score micro: 0.507
F1 Score macro: 0.226
F1 Score weighted: 0.408
F1 Score Individually: [0.261 0. 0.714 0. 0.154]
Precision macro: 0.346
Precision Individually: [0.5 0. 0.564 0. 0.667]
Recall macro: 0.283
Recall Individually: [0.214 0. 1. 0. 0.2 ]
F1 Score micro: 0.537
F1 Score macro: 0.266
F1 Score weighted: 0.442
F1 Score Individually: [0.3 0. 0.721 0. 0.308]


0 comments on commit f470f5b

Please sign in to comment.