diff --git a/AIclassifier.py b/AIclassifier.py
index b4f0734..4d174b3 100644
--- a/AIclassifier.py
+++ b/AIclassifier.py
@@ -58,7 +58,8 @@ def format_sentVector_to_SparseMatrix(vectors_list, dictionary):
 
 # Create sentences representation in numeric format, according to dictionary
 def create_vectors_list(sents, conversion_dict):
-    unk_count = 0
+    unk_unigrams_count = 0
+    unk_bigrams_count = 0
     unigrams_vector = []
     bigrams_vector = []
     mixed_vector = []
@@ -81,7 +82,7 @@ def create_vectors_list(sents, conversion_dict):
             if token.lower() not in conversion_dict: 
                 #sent_tokens_list.append("unk") # TO CONSIDER UNK TOKENS, UNCOMMENT THESE LINES
                 #mixed_tokens_list.append("unk")
-                #unk_count += 1
+                unk_unigrams_count += 1
                 pass
             else:
                 sent_tokens_list.append(token.lower())
@@ -96,7 +97,7 @@ def create_vectors_list(sents, conversion_dict):
         for bigram in sent_bigram:
             if bigram not in conversion_dict:
                 #sent_bigrams_list.append("unk") TO CONSIDER UNK TOKENS, UNCOMMENT THESE LINES
-                #unk_count += 1
+                unk_bigrams_count += 1
                 pass
             else:
                 sent_bigrams_list.append(bigram)
@@ -111,9 +112,11 @@ def create_vectors_list(sents, conversion_dict):
         bigrams_vector.append(sent_bigrams_vector)
         mixed_vector.append(sent_mixed_vector)
 
-    return unigrams_vector  # TO RUN WITH UNIGRAMS, UNCOMMENT THIS LINE AND COMMENT THE OTHER TWO RETURNS
+    print("Unigrams unknown count including repetitions:", unk_unigrams_count)
+    print("Bigrams unknown count including repetitions:", unk_bigrams_count, "\n")
+    #return unigrams_vector  # TO RUN WITH UNIGRAMS, UNCOMMENT THIS LINE AND COMMENT THE OTHER TWO RETURNS
     #return bigrams_vector  # TO RUN WITH BIGRAMS, UNCOMMENT THIS LINE AND COMMENT THE OTHER TWO RETURNS
-    #return mixed_vector    # TO RUN WITH UNIGRAMS + BIGRAMS, UNCOMMENT THIS LINE AND COMMENT THE OTHER TWO RETURNS
+    return mixed_vector    # TO RUN WITH UNIGRAMS + BIGRAMS, UNCOMMENT THIS LINE AND COMMENT THE OTHER TWO RETURNS
 
 # TO USE MLP CLASSIFIER WITH WORD EMBEDDINGS APPROACH, UNCOMMENT THIS FUNCION
 # def create_word_embedding(partition):
@@ -149,43 +152,45 @@ def create_vectors_list(sents, conversion_dict):
 corpus = clean_corpus(corpus) 
 train_doc = nlp(corpus)
 train_doc = reconstruct_hyphenated_words(train_doc)
-tokens = [token.text for token in train_doc if not token.is_space if not token.is_punct] # if not token.text in stopwords.words()] 
+corpus_in_unigrams = [token.text for token in train_doc if not token.is_space if not token.is_punct] # if not token.text in stopwords.words()] 
 # OBS: MAYBE ENHANCING PREPROCESSING BY REMOVING LITTLE SQUARES COULD BE AN OPTION
 
 corpus_in_bigrams = []
-for i in range(0,len(tokens)-1):
-    corpus_in_bigrams.append(tokens[i]+" "+tokens[i+1])
+for i in range(0,len(corpus_in_unigrams)-1):
+    corpus_in_bigrams.append(corpus_in_unigrams[i]+" "+corpus_in_unigrams[i+1])
 
-token_freq = Counter(tokens)
+unigram_freq = Counter(corpus_in_unigrams)
 bigram_freq = Counter(corpus_in_bigrams)
-print("Unigrams frequency before removing unknown words:", token_freq)
-print("Bigrams frequency before removing unknown words:", bigram_freq)
+# print("Unigrams frequency before removing unknown words:", unigram_freq)
+# print("Bigrams frequency before removing unknown words:", bigram_freq)
 
-# Removing words less frequent than 2 
-corpus_without_unk = [token[0] for token in token_freq.items() if int(token[1]) > 2]
-bigrams_filtered_lexicon = [bigram[0] for bigram in bigram_freq.items() if int(bigram[1]) > 1]
+# Removing less frequent than 2 
+unigrams_filtered_lexicon = [unigram[0] for unigram in unigram_freq.items() if int(unigram[1]) > 2]
+bigrams_filtered_lexicon = [bigram[0] for bigram in bigram_freq.items() if int(bigram[1]) > 1] 
+# print("Unigrams frequency after removing unknown words:", [unigram for unigram in unigram_freq.items() if int(unigram[1]) > 2])
+# print("Bigrams frequency after removing unknown words:", [bigram for bigram in bigram_freq.items() if int(bigram[1]) > 1] )
 
-token_freq = Counter(corpus_without_unk)
-bigram_freq = Counter(bigrams_filtered_lexicon)
-print("Unigrams frequency after removing unknown words:", token_freq)
-print("Bigrams frequency after removing unknown words:", bigram_freq)
+# Counting unknown tokens
+unknown_unigrams = [unigram[0] for unigram in unigram_freq.items() if int(unigram[1]) <= 2]
+unknown_bigrams = [bigram[0] for bigram in bigram_freq.items() if int(bigram[1]) <= 1]
+print("\n","Unknown unigrams count without repetitions:", len(unknown_unigrams))
+print("Unknown bigrams count without repetitions:", len(unknown_bigrams), "\n")
 
 # Unigram dictionary
-unigrams_to_numbers = create_dict(corpus_without_unk)
+unigrams_to_numbers = create_dict(unigrams_filtered_lexicon)
 
 # Bigram dictionary
 bigrams_to_numbers = create_dict(bigrams_filtered_lexicon)
 
 # Mixed dictionary
-with open('featureslr0.5nEst100.txt', 'r') as file:
-#with open('features.txt', 'r') as file:
+with open('features.txt', 'r') as file:
     features_list = file.read()
 features_list = features_list.split('\n')
 mixed_to_numbers = create_dict(features_list)
 
-print("Length of the dictionary of unigrams:",len(unigrams_to_numbers))
-print("Length of the dictionary of bigrams:",len(bigrams_to_numbers))
-print("Length of the dictionary of unigrams and bigrams:",len(mixed_to_numbers))
+print("Length of the dictionary of unigrams(lexicon):",len(unigrams_to_numbers))
+print("Length of the dictionary of bigrams(lexicon):",len(bigrams_to_numbers))
+print("Length of the dictionary of unigrams and bigrams(lexicon):",len(mixed_to_numbers), "\n")
 
 # CREATE SENTENCE REPRESENTATIONS
 #   can either be by word embeddings or with a simple representation according to the presence of a unigram or bigram in the sentence
@@ -202,9 +207,9 @@ def create_vectors_list(sents, conversion_dict):
 # SIMPLE NUMERICAL REPRESENTATIONS OF THE SENTENCES
 
 # TO RUN WITH UNIGRAMS, UNCOMMENT THIS 3 LINES AND COMMENT THE OTHER TWO TRIPLETS
-train_vectors_list = create_vectors_list(sents_train, unigrams_to_numbers)
-dev_vectors_list = create_vectors_list(sents_dev, unigrams_to_numbers)
-test_vectors_list = create_vectors_list(sents_test, unigrams_to_numbers)
+# train_vectors_list = create_vectors_list(sents_train, unigrams_to_numbers)
+# dev_vectors_list = create_vectors_list(sents_dev, unigrams_to_numbers)
+# test_vectors_list = create_vectors_list(sents_test, unigrams_to_numbers)
 
 # TO RUN WITH BIGRAMS, UNCOMMENT THIS 3 LINES AND COMMENT THE OTHER TWO TRIPLETS
 # train_vectors_list = create_vectors_list(sents_train, bigrams_to_numbers)
@@ -212,16 +217,16 @@ def create_vectors_list(sents, conversion_dict):
 # test_vectors_list = create_vectors_list(sents_test, bigrams_to_numbers)
 
 # TO RUN WITH UNIGRAMS + BIGRAMS, UNCOMMENT THIS 3 LINES AND COMMENT THE OTHER TWO TRIPLETS
-# train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
-# dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
-# test_vectors_list = create_vectors_list(sents_test, mixed_to_numbers)
+train_vectors_list = create_vectors_list(sents_train, mixed_to_numbers)
+dev_vectors_list = create_vectors_list(sents_dev, mixed_to_numbers)
+test_vectors_list = create_vectors_list(sents_test, mixed_to_numbers)
 
 # FORMATTING SIMPLE SENTENCE REPRESENTATIONS - MUST BE IN SPARSE MATRIX FORMAT TO FEED THE CLASSIFIERS
 
 # TO RUN WITH UNIGRAMS, UNCOMMENT THIS 3 LINES AND COMMENT THE OTHER TWO TRIPLETS
-train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, unigrams_to_numbers)
-dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, unigrams_to_numbers)
-test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, unigrams_to_numbers)
+# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, unigrams_to_numbers)
+# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, unigrams_to_numbers)
+# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, unigrams_to_numbers)
 
 # TO RUN WITH BIGRAMS, UNCOMMENT THIS 3 LINES AND COMMENT THE OTHER TWO TRIPLETS
 # train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, bigrams_to_numbers)
@@ -229,9 +234,9 @@ def create_vectors_list(sents, conversion_dict):
 # test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, bigrams_to_numbers)
 
 # TO RUN WITH UNIGRAMS + BIGRAMS, UNCOMMENT THIS 3 LINES AND COMMENT THE OTHER TWO TRIPLETS
-# train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
-# dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
-# test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, mixed_to_numbers)
+train_matrix_array = format_sentVector_to_SparseMatrix(train_vectors_list, mixed_to_numbers)
+dev_matrix_array = format_sentVector_to_SparseMatrix(dev_vectors_list, mixed_to_numbers)
+test_matrix_array = format_sentVector_to_SparseMatrix(test_vectors_list, mixed_to_numbers)
 
 # CREATE LABELS REPRESENTATIONS
 
@@ -264,8 +269,10 @@ def create_vectors_list(sents, conversion_dict):
 
 # Classifier models
 
-# TO USE ADABOOST CLASSIFIER, UNCOMMENT THIS LINE AND COMMENT OTHER MODELS
-adaclassifier = AdaBoostClassifier(n_estimators=100, learning_rate=0.5)
+# TO USE ADABOOST CLASSIFIER, UNCOMMENT adaclassifier AND COMMENT OTHER MODELS
+#   TO USE UNIGRAMS, PARAMETERS WERE BETTER WITH n_estimators=50, learning_rate=1
+#   TO USE UNIGRAMS + BIGRAMS, PARAMETERS WERE BETTER WITH n_estimators=100, learning_rate=0.5
+adaclassifier = AdaBoostClassifier(n_estimators=100, learning_rate=0.5) 
 # TO USE SVC CLASSIFIER WITH ONE VS REST SCHEME, UNCOMMENT THIS LINE AND COMMENT OTHER MODELS
 #svc_classifier = make_pipeline(StandardScaler(), OneVsRestClassifier(LinearSVC(dual=False,random_state=None, tol=1e-5, C=1)))
 # TO USE SVC CLASSIFIER WITH ONE VS ONE SCHEME, UNCOMMENT THIS LINE AND COMMENT OTHER MODELS
@@ -293,18 +300,17 @@ def create_vectors_list(sents, conversion_dict):
 #print(model.best_params_)
 
 # TO SEE WHICH FEATURES ADABOOST CHOSE, UNCOMMENT THIS SECTION
-importances = model.feature_importances_
-features = {}
-
+# importances = model.feature_importances_
+# features = {}
 # UNCOMMENT THE LINE YOU NEED FROM THESE 3 AND COMMENT THE OTHER 2
-#for i,(token,value) in enumerate(zip(unigrams_to_numbers, importances)):
-for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
-#for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): 
-   if (value != 0):
-       features[token] = value
-features = sorted([(value, key) for (key, value) in features.items()], reverse=True)
-for feature in features:
-   print('Feature:',feature[1],'Score:',feature[0])
+# for i,(token,value) in enumerate(zip(unigrams_to_numbers, importances)):
+# # for i,(token,value) in enumerate(zip(mixed_to_numbers, importances)):
+# #for i,(token,value) in enumerate(zip(bigrams_to_numbers, importances)): 
+#    if (value != 0):
+#        features[token] = value
+# features = sorted([(value, key) for (key, value) in features.items()], reverse=True)
+# for feature in features:
+#    print('Feature:',feature[1],'Score:',feature[0])
 
 # Predicting
 
diff --git a/Oldfeatures.txt b/Oldfeatures.txt
deleted file mode 100644
index b8c93a6..0000000
--- a/Oldfeatures.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-privacy
-people
-system
-safe
-possible
-should
-experience
-safer
-right
-secure
-community
-believe
-express
-we
-can
-use
-cookies
-you
-data
-or
-content
-with
-information
-they
-learn
-more
-provide
-device
-understand
-days
-collect
-who
-the
-your
-is
-to be
-you should
-help keep
-provide you
-should be
-and protect
-when people
-to enable
-and privacy
-this right
-public interest
-what you
-have a
-you with
-for people
-use cookies
-you can
-20/09/2021 14
-more about
-help us
-the information
-information about
-we may
-and others
-products including
-not about
-these terms
-to build
-with facebook
-who you
-advertising and
-that help
-we collect
-share and
-not at
-services that
-share it
-it with
-are not
\ No newline at end of file
diff --git a/aux.txt b/aux.txt
deleted file mode 100644
index 0e92d07..0000000
--- a/aux.txt
+++ /dev/null
@@ -1,129 +0,0 @@
-we
-cookies
-you
-the
-right
-your
-data
-or
-is
-information
-privacy
-they
-learn
-people
-safe
-secure
-community
-possible
-should
-believe
-express
-use cookies
-things like
-for example
-cookies to
-these terms
-service providers
-public interest
-of your
-learn more
-law enforcement
-how we
-can use
-by using
-▪ internet
-िह �
-your data
-while participating
-websites apps
-web browser
-video calling
-united states
-tool allows
-to prevent
-this includes
-they even
-the same
-that a
-technical limitations
-similar technologies
-sign up
-share content
-safety integrity
-prior permission
-pixel tags
-pay us
-pages videos
-p r
-other 1
-operating system
-no longer
-n g
-messages restricted
-local fundraisers
-lite watch
-like give
-legitimate interests
-italiano română
-ireland ltd
-ios 13
-https //about
-help center
-have about
-good-faith belief
-globally both
-face recognition
-express themselves
-e l
-diﬀerent devices
-covid-19 support
-consistent experience
-conducting surveys
-competent court
-camera so
-brand resources
-best practices
-automatically process
-assets changes
-as described
-are performing
-apply when
-ai ethics
-advertisers who
-active status
-a b
-23 june
-20/09/2021 14
-//opensource fb
-'re visiting
-
-
-
-Feature: we Score: 0.01
-Feature: use Score: 0.01
-Feature: cookies Score: 0.01
-Feature: you Score: 0.01
-Feature: data Score: 0.01
-Feature: or Score: 0.02
-Feature: content Score: 0.01
-Feature: with Score: 0.01
-Feature: information Score: 0.01
-Feature: privacy Score: 0.13
-Feature: they Score: 0.01
-Feature: learn Score: 0.01
-Feature: more Score: 0.01
-Feature: provide Score: 0.01
-Feature: people Score: 0.14
-Feature: device Score: 0.01
-Feature: system Score: 0.1
-Feature: safe Score: 0.11
-Feature: secure Score: 0.01
-Feature: understand Score: 0.01
-Feature: who Score: 0.02
-Feature: possible Score: 0.05
-Feature: days Score: 0.01
-Feature: should Score: 0.12
-Feature: experience Score: 0.05
-Feature: collect Score: 0.01
-Feature: safer Score: 0.08
\ No newline at end of file
diff --git a/features.txt b/features.txt
index 80f5f1c..8c09f14 100644
--- a/features.txt
+++ b/features.txt
@@ -1,37 +1,25 @@
-combat
-you
-right
-your
-is
-be
 privacy
-give
-people
-their
-safe
 secure
 community
-however
-possible
 should
-safer
-believe
+possible
 express
-experience
-can
-no
-connect
-they
-public
-example
-cookies
-content
-and
+believe
+right
+people
+your
+you
 we
+they
 the
+safe
 or
+is
 information
 data
+cookies
+no
+experience
 use
 protect
 with
@@ -42,24 +30,23 @@ if
 device
 days
 companies
+can
 automatically
 also
 about
 you should
-this right
-should be
-not at
-help keep
-and protect
-have a
 to be
 when people
+and protect
 public interest
 for people
+have a
 you share
 to enable
+this right
 provide you
 it with
+help keep
 are not
 advertising and
 you with
@@ -78,10 +65,12 @@ share it
 share and
 products including
 post or
+not at
 more about
 information about
 help us
 cookies to
 and privacy
 and others
-ads to
\ No newline at end of file
+ads to
+should be
\ No newline at end of file
diff --git a/featureslr0.5nEst100.txt b/featureslr0.5nEst100.txt
deleted file mode 100644
index 8c09f14..0000000
--- a/featureslr0.5nEst100.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-privacy
-secure
-community
-should
-possible
-express
-believe
-right
-people
-your
-you
-we
-they
-the
-safe
-or
-is
-information
-data
-cookies
-no
-experience
-use
-protect
-with
-providers
-limited
-learn
-if
-device
-days
-companies
-can
-automatically
-also
-about
-you should
-to be
-when people
-and protect
-public interest
-for people
-have a
-you share
-to enable
-this right
-provide you
-it with
-help keep
-are not
-advertising and
-you with
-with the
-you use
-you can
-with facebook
-we may
-we collect
-we also
-the information
-the facebook
-that help
-such as
-share it
-share and
-products including
-post or
-not at
-more about
-information about
-help us
-cookies to
-and privacy
-and others
-ads to
-should be
\ No newline at end of file
diff --git a/featureslr1nEst50.txt b/featureslr1nEst50.txt
deleted file mode 100644
index b5946b4..0000000
--- a/featureslr1nEst50.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-combat
-you
-right
-your
-is
-be
-privacy
-give
-people
-their
-safe
-secure
-community
-however
-possible
-should
-safer
-believe
-express
-experience
-can
-no
-connect
-they
-public
-example
-cookies
-content
-and
-to be
-you should
-to enable
-this right
-should be
-not at
-help keep
-when people
-provide you
-and protect
-you can
-have a
-you with
-with the
\ No newline at end of file
diff --git a/output/AI Classifier/1Label_confusion_matrix_NonNorm.png b/output/AI Classifier/1Label_confusion_matrix_NonNorm.png
index e1b9bba..2e94e14 100644
Binary files a/output/AI Classifier/1Label_confusion_matrix_NonNorm.png and b/output/AI Classifier/1Label_confusion_matrix_NonNorm.png differ
diff --git a/output/AI Classifier/1Label_confusion_matrix_NormTrue.png b/output/AI Classifier/1Label_confusion_matrix_NormTrue.png
index 4cac149..75c4501 100644
Binary files a/output/AI Classifier/1Label_confusion_matrix_NormTrue.png and b/output/AI Classifier/1Label_confusion_matrix_NormTrue.png differ