rerun bigrams weighted

gicraveiro · Feb 5, 2022 · abf383f · abf383f
1 parent 0d97939
commit abf383f
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 26 deletions.
diff --git a/AIclassifier.py b/AIclassifier.py
@@ -121,8 +121,8 @@ def create_vectors_list(sents, conversion_dict):
         bigrams_vector.append(sent_bigrams_vector)
         mixed_vector.append(sent_mixed_vector)
 
-    return vectors_list # unigrams
-    #return bigrams_vector
+    #return vectors_list # unigrams
+    return bigrams_vector
     #return mixed_vector
 
 # def create_word_embedding(partition):
@@ -195,8 +195,8 @@ def create_vectors_list(sents, conversion_dict):
 #train_word_embedding_features = create_word_embedding(sents_train)
 #dev_word_embedding_features = create_word_embedding(sents_dev)
 #test_word_embedding_features = numpy.asarray(create_word_embedding(sents_test))
-print("Length of the dictionary of word representations:",len(words_to_numbers))
-# print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
+#print("Length of the dictionary of word representations:",len(words_to_numbers))
+print("Length of the dictionary of word representations:",len(bigrams_to_numbers))
 #print("Length of the dictionary of word representations:",len(mixed_to_numbers))
 
 # FLAG - CHECK IF DICTIONARY IS BUILT CORRECTLY
@@ -205,13 +205,13 @@ def create_vectors_list(sents, conversion_dict):
 # count frequency before and after removing unknown words - ??? - ASK GABRIEL!!
 # checked that it seems ok
 
-train_vectors_list = create_vectors_list(sents_train, words_to_numbers)
-dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
-test_vectors_list = create_vectors_list(sents_test, words_to_numbers)
+# train_vectors_list = create_vectors_list(sents_train, words_to_numbers)
+# dev_vectors_list = create_vectors_list(sents_dev, words_to_numbers)
+# test_vectors_list = create_vectors_list(sents_test, words_to_numbers)
 
-# train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
-# dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
-# test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)
+train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
+dev_vectors_list = create_vectors_list(sents_dev, bigrams_to_numbers)
+test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)
 
 # train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
 # dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
@@ -221,13 +221,13 @@ def create_vectors_list(sents, conversion_dict):
 
 # FLAG - CHECK IF SENTENCE REPRESENTATIONS WERE DONE CORRECTLY
 
-train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, words_to_numbers)
-dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
-test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)
+# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, words_to_numbers)
+# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, words_to_numbers)
+# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, words_to_numbers)
 
-# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
-# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
-# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)
+train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
+dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, bigrams_to_numbers)
+test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)
 
 # train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
 # dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
@@ -283,9 +283,9 @@ def create_vectors_list(sents, conversion_dict):
 importances = model.feature_importances_
 
 features = {}
-for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
+#for i,(token,value) in enumerate(zip(words_to_numbers, importances)):
 #for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
-#for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
+for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): # IMPORTANTO TO CHANGE TO ADEQUATE DICT
    if (value != 0):
        features[token] = value
 features = sorted([(value, key) for (key, value) in features.items()], reverse=True)

diff --git a/output/AI Classifier/1Label_confusion_matrix_NonNorm.png b/output/AI Classifier/1Label_confusion_matrix_NonNorm.png
diff --git a/output/AI Classifier/1Label_confusion_matrix_NormTrue.png b/output/AI Classifier/1Label_confusion_matrix_NormTrue.png
diff --git a/output/AI Classifier/1labelPredictionsStatsTest.txt b/output/AI Classifier/1labelPredictionsStatsTest.txt
@@ -2,13 +2,13 @@ Performance measures - Unigram Dictionary - Adaboost
 
 Test set:
 
-Precision macro: 0.254
-Precision Individually: [0.357 0.    0.59  0.125 0.2  ]
-Recall macro: 0.258
-Recall Individually: [0.357 0.    0.742 0.091 0.1  ]
-F1 Score micro: 0.448
-F1 Score macro: 0.251
-F1 Score weighted: 0.416
-F1 Score Individually: [0.357 0.    0.657 0.105 0.133]
+Precision macro: 0.257
+Precision Individually: [0.8   0.    0.484 0.    0.   ]
+Recall macro: 0.251
+Recall Individually: [0.286 0.    0.968 0.    0.   ]
+F1 Score micro: 0.507
+F1 Score macro: 0.213
+F1 Score weighted: 0.386
+F1 Score Individually: [0.421 0.    0.645 0.    0.   ]