From 962e268e7d4da5f6439a3eee56be3923a9b96c5a Mon Sep 17 00:00:00 2001 From: gicraveiro Date: Mon, 7 Feb 2022 17:12:09 +0100 Subject: [PATCH] final cleanse for utils file --- utils.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/utils.py b/utils.py index e16da30..fcd852d 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,8 @@ import re import os -from sklearn.metrics import precision_score, f1_score, recall_score # ,accuracy_score +from sklearn.metrics import precision_score, f1_score, recall_score import json import numpy -#import sklearn from sklearn.metrics import ConfusionMatrixDisplay import matplotlib.pyplot as plt @@ -21,7 +20,8 @@ def reconstruct_hyphenated_words(corpus): i += 1 return corpus -# noun chunks that correspond to keywords +# used to reconstruct noun chunks that correspond to keywords +# merge the compound words specified in the keywords parameters into the same token def reconstruct_noun_chunks(corpus,keywords): i = 0 while i < len(corpus): @@ -65,16 +65,10 @@ def clean_corpus(corpus): # WRITE OUTPUT STATISTICS FILE def write_output_stats_file(path, name, ref_labels, pred_labels, labels): - #path = 'output/Simple Classifier/1labelPredictionsStats_'+name+'.txt' - #os.makedirs(os.path.dirname(path), exist_ok=True) - #print(ref_labels, pred_labels, labels) with open(path, 'a') as file: - print(name,"set:\n", file=file) - #print("Accuracy:",round( accuracy_score( ref_labels, pred_labels), 3), file=file) - #print("Precision micro:",round( precision_score( ref_labels, pred_labels, average="micro"), 3), file=file) + print(name,"set:\n", file=file) # Title print("Precision macro:",round( precision_score( ref_labels, pred_labels, average="macro"),3), file=file) print("Precision Individually:", numpy.round (precision_score( ref_labels, pred_labels, average=None, labels=labels),3), file=file) - #print("Recall micro:",round( recall_score( ref_labels, pred_labels, average="micro"),3), file=file) print("Recall macro:",round( recall_score( ref_labels, pred_labels, average="macro"),3), file=file) print("Recall Individually:", numpy.round(recall_score( ref_labels, pred_labels, average=None, labels=labels),3), file=file) print("F1 Score micro:",round( f1_score( ref_labels, pred_labels, average="micro"),3), file=file) @@ -92,9 +86,8 @@ def write_predictions_file(name, pred_dict): # Creates a confusion matrix def create_confusion_matrix(refs, preds, normalize, path, labels, display_labels): - #print(sklearn.__version__) ConfusionMatrixDisplay.from_predictions(refs,preds, normalize=normalize, labels=labels, display_labels=display_labels) plt.xticks(rotation=45, ha="right") plt.subplots_adjust(bottom=0.4) - #plt.show() + #plt.show() # obs.: either show or save the confusion matrix plt.savefig(path)