Diminuito consumo memoria da parte di ClusterManager

GiacomoManzoli · Dec 23, 2016 · 8a3ab29 · 8a3ab29
1 parent 30c9f34
commit 8a3ab29
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 13 deletions.
diff --git a/experiment_1.properties b/experiment_1.properties
@@ -6,7 +6,7 @@ experiment.pipeline = clustering, save_graph_data, save_history, merge_history,
 experiment.lexicon_path = ./lexicon/italian/AGZ1994.txt
 # opzionali, ma devono essere presenti entrambi
 experiment.lexicon_range.start = 50000
-experiment.lexicon_range.end   = 60000
+experiment.lexicon_range.end   = 50500
 
 # Riduzione dei dati
 experiment.stopwords = ./lexicon/italian/stoplist/stoplist2.txt

diff --git a/src/bm/Test.java b/src/bm/Test.java
@@ -41,6 +41,31 @@ static private int _j(long k){
     public static void main(String[] args) {
         List<String> lines = new ArrayList<>();
         long tot = ((long)N*(N-1))/2;
+         /* Total number of processors or cores available to the JVM */
+        System.out.println("Available processors (cores): " +
+                Runtime.getRuntime().availableProcessors());
+
+          /* Total amount of free memory available to the JVM */
+                System.out.println("Free memory (bytes): " +
+                        Runtime.getRuntime().freeMemory());
+
+          /* This will return Long.MAX_VALUE if there is no preset limit */
+                long maxMemory = Runtime.getRuntime().maxMemory();
+          /* Maximum amount of memory the JVM will attempt to use */
+                System.out.println("Maximum memory (bytes): " +
+                        (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory));
+
+          /* Total memory currently in use by the JVM */
+                System.out.println("Total memory (bytes): " +
+                        Runtime.getRuntime().totalMemory());
+        long allocatedMemory =
+                (Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory());
+        long presumableFreeMemory = Runtime.getRuntime().maxMemory() -  (Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory());
+
+
+        System.out.println(allocatedMemory);
+        System.out.println(presumableFreeMemory);
+        /*
         for (int i = 0; i < Integer.MAX_VALUE; i++){
             long k = ThreadLocalRandom.current().nextLong(0, tot);
             int i1 = _i(k);

diff --git a/src/bm/clustering/Cluster.java b/src/bm/clustering/Cluster.java
@@ -3,6 +3,7 @@
 
 import bm.yass.DistanceMeasure;
 
+import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -20,7 +21,7 @@ public class Cluster {
      *  @return il nuovo cluster ottenuto mergiando i due cluster ricevuti come parametro.
      * */
     static Cluster merge(int id, Cluster c1, Cluster c2) {
-        List<String> newWords = c1.words.subList(0, c1.words.size());
+        List<String> newWords = (ArrayList<String>)((ArrayList<String>)c1.words).clone();
         newWords.addAll(c2.words);
         return new Cluster(id, newWords);
     }

diff --git a/src/bm/clustering/ClusterManager.java b/src/bm/clustering/ClusterManager.java
@@ -70,34 +70,69 @@ int _j(long k){
      * @param indexes indici dei cluster da rimuovere
      * */
     void deleteClusters(List<Integer> indexes){
+        /*
+        * PROBLEMA: le strutture dati di supporto a questo metodo possono richiede un'elevata quantità di spazio.
+        * Viene infatti utilizzato un Set, implementato con un hashmap, indicizzata per chiavi di tipo long.
+        *
+        * SOLUZIONE: l'eliminazione viene fatta in più passate. Così facendo l'occupazione in memoria è ridotta,
+        * anche se questo rende l'operazione meno efficiente in termini di tempo.
+        * */
+
         // Ordino gli indici in ordine crescente
         Collections.sort(indexes);
+        actuallyDeleteClusters(indexes);
+        //int start;
+        //for (start = 0; start + 4 < indexes.size(); start+= 4){
+        //    List<Integer> ar = new ArrayList<>();
+        //    for (Integer i:  indexes.subList(start, start+4)) {
+        //        ar.add(i);
+        //    }
+        //    actuallyDeleteClusters(ar);
+        //}
+        //List<Integer> ar = new ArrayList<>();
+        //for (Integer i:  indexes.subList(start, indexes.size())) {
+        //    ar.add(i);
+        //}
+        //try {
+        //    if(ar.size() > 0)
+        //        actuallyDeleteClusters(ar);
+        //} catch (Exception e) {
+        //    System.out.println(e.toString());
+        //    System.out.println();
+        //}
+    }
+
+    private void actuallyDeleteClusters(List<Integer> indexes) {
         int n = clusters.size();
         // La stessa coppia può comparire più di una volta, quindi le memorizzo in un set per evitare duplicati.
         // Anziché memorizzare direttamente la coppia, calcolo subito l'indice della coppia nella matrice linearizzata.
-        Set<Long> toDelete = new HashSet<>();
-
+        //Set<Long> toDelete = new HashSet<>();
+        List<Long> toDeleteIndexes = new ArrayList<>();
         // Per ogni indice calcolo le coppie in cui compare
         for (int r : indexes) {
             // calcolo le coppie del tipo (*,r)
             for (int i = 0; i < r; i++) {
                 long index = _k(i,r);
-                if (index >= 0 && index < dist.getSize()){
-                    toDelete.add(index);
+                if (index >= 0 && index < dist.getSize() ){
+                    //toDelete.add(index);
+                    if (! toDeleteIndexes.contains(index))
+                        toDeleteIndexes.add(index);
                 }
             }
             // calcolo le coppie del tipo (r,*) (c'è (r,s))
             // sono consecutive e ce ne sono n-r-1
             for (int j = r+1; j < r+1+(n-r-1); j++) {
                 long index = _k(r,j);
                 if (index >= 0 && index < dist.getSize()){
-                    toDelete.add(index);
+                    //toDelete.add(index);
+                    if (! toDeleteIndexes.contains(index))
+                        toDeleteIndexes.add(index);
                 }
             }
         }
 
-        List<Long> toDeleteIndexes = new ArrayList<>();
-        toDeleteIndexes.addAll(toDelete);
+        //List<Long> toDeleteIndexes = new ArrayList<>();
+        //toDeleteIndexes.addAll(toDelete);
         // Ordino gli indici da cancellare in ordine decrescente
         Collections.sort(toDeleteIndexes);
 
@@ -112,12 +147,15 @@ void deleteClusters(List<Integer> indexes){
             // devo cancellare l'indice corrente, passo all'elemento successivo
             if (cntDeleted == 0 && it != toDeleteIndexes.get(cntDeleted)) { continue; }
 
-            if (cntDeleted < toDelete.size() && it == toDeleteIndexes.get(cntDeleted))
+            //if (cntDeleted < toDelete.size() && it == toDeleteIndexes.get(cntDeleted))
+            if (cntDeleted < toDeleteIndexes.size() && it == toDeleteIndexes.get(cntDeleted))
                 cntDeleted += 1;
 
             // Prima di copiare il prossimo indice, controllo di non copiare
             // un indice che poi deve essere cancellato
-            while (cntDeleted < toDelete.size() && it + cntDeleted == toDeleteIndexes.get(cntDeleted))
+            //while (cntDeleted < toDelete.size() && it + cntDeleted == toDeleteIndexes.get(cntDeleted))
+            while (cntDeleted < toDeleteIndexes.size() && it + cntDeleted == toDeleteIndexes.get(cntDeleted))
+
                 cntDeleted += 1;
 
             if (it + cntDeleted < tot)

diff --git a/src/bm/clustering/HierarchicalClustering.java b/src/bm/clustering/HierarchicalClustering.java
@@ -21,7 +21,7 @@ public class HierarchicalClustering {
      * */
     public static List<MergeHistoryRecord> calculateClusters(DistanceMeasure d, List<String> words){
         int n = words.size();
-        int printInterval = (int)Math.max(100, n*0.005);
+        int printInterval = (int)Math.max(100, n*0.00005);
 
         // Crea n cluster, ognuno contenente una parola
         List<Cluster> clusters = new ArrayList<>();
@@ -89,7 +89,6 @@ public static List<MergeHistoryRecord> calculateClusters(DistanceMeasure d, List
             // IMPORTANTE: questa operazione deve essere fatta DOPO tutte le cancellazioni e insierimenti
             //
             manager.resize();
-
             cntIter++;
             if (cntIter % (printInterval)  == 0) {
                 System.out.println("Iterazione: " + cntIter + " numero di cluster presenti: "+ manager.size() +