From bb7c7ba9bf73b7f0ee29cc96d9f9ad725058ea4d Mon Sep 17 00:00:00 2001 From: gicraveiro Date: Tue, 8 Feb 2022 15:48:34 +0100 Subject: [PATCH] adjustments to functions to make them accessible to AI classifier code --- partition.py | 10 ++-------- simpleclassifier.py | 6 +++--- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/partition.py b/partition.py index 75547af..bc584ba 100644 --- a/partition.py +++ b/partition.py @@ -8,17 +8,11 @@ import matplotlib.pyplot as plt import numpy import spacy -from utils import clean_corpus, reconstruct_hyphenated_words +from utils import clean_corpus, reconstruct_hyphenated_words, create_sent_label_dict # Functions -# Creates dictionary of a set, associating sentence with label -def create_sent_label_dict(sents, labels): - sents_dict = [] - for row_id,row in enumerate(sents): - row = re.sub("\n", " ", row) - sents_dict.append({"text":row.strip(), "label":labels[row_id]}) - return sents_dict + # Writes json of partition set, each entry is the sentence associated with its labels def write_partition_file(partition_dict, name): diff --git a/simpleclassifier.py b/simpleclassifier.py index f3c3911..5f8489c 100644 --- a/simpleclassifier.py +++ b/simpleclassifier.py @@ -70,9 +70,9 @@ def simple_classifier(sents_ref_json): test_pred_dict = simple_classifier(test_sents_ref_json) # Output predictions in separate files -write_predictions_file("Train", train_pred_dict) -write_predictions_file("Dev", dev_pred_dict) -write_predictions_file("Test", test_pred_dict) +write_predictions_file(train_pred_dict, 'output/Simple Classifier/multilabelPredictions_Train.json') +write_predictions_file(dev_pred_dict,'output/Simple Classifier/multilabelPredictions_Dev.json') +write_predictions_file(test_pred_dict, 'output/Simple Classifier/multilabelPredictions_Test.json') # Formatting predictions to calculate results train_pred_array = [sent['label'] for sent in train_pred_dict]