From 962e268e7d4da5f6439a3eee56be3923a9b96c5a Mon Sep 17 00:00:00 2001
From: gicraveiro <gmcraveiro@hotmail.com>
Date: Mon, 7 Feb 2022 17:12:09 +0100
Subject: [PATCH] final cleanse for utils file

---
 utils.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/utils.py b/utils.py
index e16da30..fcd852d 100644
--- a/utils.py
+++ b/utils.py
@@ -1,9 +1,8 @@
 import re
 import os
-from sklearn.metrics import precision_score, f1_score, recall_score # ,accuracy_score
+from sklearn.metrics import precision_score, f1_score, recall_score 
 import json
 import numpy
-#import sklearn
 from sklearn.metrics import ConfusionMatrixDisplay
 import matplotlib.pyplot as plt
 
@@ -21,7 +20,8 @@ def reconstruct_hyphenated_words(corpus):
             i += 1
     return corpus
 
-# noun chunks that correspond to keywords
+# used to reconstruct noun chunks that correspond to keywords
+# merge the compound words specified in the keywords parameters into the same token
 def reconstruct_noun_chunks(corpus,keywords):
     i = 0
     while i < len(corpus):
@@ -65,16 +65,10 @@ def clean_corpus(corpus):
 
 # WRITE OUTPUT STATISTICS FILE
 def write_output_stats_file(path, name, ref_labels, pred_labels, labels):
-    #path = 'output/Simple Classifier/1labelPredictionsStats_'+name+'.txt'
-    #os.makedirs(os.path.dirname(path), exist_ok=True)
-    #print(ref_labels, pred_labels, labels)
     with open(path, 'a') as file:
-        print(name,"set:\n", file=file)
-        #print("Accuracy:",round( accuracy_score( ref_labels, pred_labels), 3), file=file)
-        #print("Precision micro:",round( precision_score( ref_labels, pred_labels, average="micro"), 3), file=file)
+        print(name,"set:\n", file=file) # Title
         print("Precision macro:",round( precision_score( ref_labels, pred_labels, average="macro"),3), file=file)
         print("Precision Individually:", numpy.round (precision_score( ref_labels, pred_labels, average=None, labels=labels),3), file=file)
-        #print("Recall micro:",round( recall_score( ref_labels, pred_labels, average="micro"),3), file=file)
         print("Recall macro:",round( recall_score( ref_labels, pred_labels, average="macro"),3), file=file)
         print("Recall Individually:", numpy.round(recall_score( ref_labels, pred_labels, average=None, labels=labels),3), file=file)
         print("F1 Score micro:",round( f1_score( ref_labels, pred_labels, average="micro"),3), file=file)
@@ -92,9 +86,8 @@ def write_predictions_file(name, pred_dict):
 
 # Creates a confusion matrix
 def create_confusion_matrix(refs, preds, normalize, path, labels, display_labels):
-    #print(sklearn.__version__)
     ConfusionMatrixDisplay.from_predictions(refs,preds, normalize=normalize, labels=labels, display_labels=display_labels)
     plt.xticks(rotation=45, ha="right")
     plt.subplots_adjust(bottom=0.4)
-    #plt.show()
+    #plt.show() # obs.: either show or save the confusion matrix
     plt.savefig(path)