Skip to content

Commit

Permalink
Diminuito consumo memoria da parte di ClusterManager
Browse files Browse the repository at this point in the history
  • Loading branch information
GiacomoManzoli committed Dec 23, 2016
1 parent 30c9f34 commit 8a3ab29
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 13 deletions.
2 changes: 1 addition & 1 deletion experiment_1.properties
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ experiment.pipeline = clustering, save_graph_data, save_history, merge_history,
experiment.lexicon_path = ./lexicon/italian/AGZ1994.txt
# opzionali, ma devono essere presenti entrambi
experiment.lexicon_range.start = 50000
experiment.lexicon_range.end = 60000
experiment.lexicon_range.end = 50500

# Riduzione dei dati
experiment.stopwords = ./lexicon/italian/stoplist/stoplist2.txt
Expand Down
25 changes: 25 additions & 0 deletions src/bm/Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,31 @@ static private int _j(long k){
public static void main(String[] args) {
List<String> lines = new ArrayList<>();
long tot = ((long)N*(N-1))/2;
/* Total number of processors or cores available to the JVM */
System.out.println("Available processors (cores): " +
Runtime.getRuntime().availableProcessors());

/* Total amount of free memory available to the JVM */
System.out.println("Free memory (bytes): " +
Runtime.getRuntime().freeMemory());

/* This will return Long.MAX_VALUE if there is no preset limit */
long maxMemory = Runtime.getRuntime().maxMemory();
/* Maximum amount of memory the JVM will attempt to use */
System.out.println("Maximum memory (bytes): " +
(maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory));

/* Total memory currently in use by the JVM */
System.out.println("Total memory (bytes): " +
Runtime.getRuntime().totalMemory());
long allocatedMemory =
(Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory());
long presumableFreeMemory = Runtime.getRuntime().maxMemory() - (Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory());


System.out.println(allocatedMemory);
System.out.println(presumableFreeMemory);
/*
for (int i = 0; i < Integer.MAX_VALUE; i++){
long k = ThreadLocalRandom.current().nextLong(0, tot);
int i1 = _i(k);
Expand Down
3 changes: 2 additions & 1 deletion src/bm/clustering/Cluster.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import bm.yass.DistanceMeasure;

import java.util.ArrayList;
import java.util.List;

/**
Expand All @@ -20,7 +21,7 @@ public class Cluster {
* @return il nuovo cluster ottenuto mergiando i due cluster ricevuti come parametro.
* */
static Cluster merge(int id, Cluster c1, Cluster c2) {
List<String> newWords = c1.words.subList(0, c1.words.size());
List<String> newWords = (ArrayList<String>)((ArrayList<String>)c1.words).clone();
newWords.addAll(c2.words);
return new Cluster(id, newWords);
}
Expand Down
56 changes: 47 additions & 9 deletions src/bm/clustering/ClusterManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,34 +70,69 @@ int _j(long k){
* @param indexes indici dei cluster da rimuovere
* */
void deleteClusters(List<Integer> indexes){
/*
* PROBLEMA: le strutture dati di supporto a questo metodo possono richiede un'elevata quantità di spazio.
* Viene infatti utilizzato un Set, implementato con un hashmap, indicizzata per chiavi di tipo long.
*
* SOLUZIONE: l'eliminazione viene fatta in più passate. Così facendo l'occupazione in memoria è ridotta,
* anche se questo rende l'operazione meno efficiente in termini di tempo.
* */

// Ordino gli indici in ordine crescente
Collections.sort(indexes);
actuallyDeleteClusters(indexes);
//int start;
//for (start = 0; start + 4 < indexes.size(); start+= 4){
// List<Integer> ar = new ArrayList<>();
// for (Integer i: indexes.subList(start, start+4)) {
// ar.add(i);
// }
// actuallyDeleteClusters(ar);
//}
//List<Integer> ar = new ArrayList<>();
//for (Integer i: indexes.subList(start, indexes.size())) {
// ar.add(i);
//}
//try {
// if(ar.size() > 0)
// actuallyDeleteClusters(ar);
//} catch (Exception e) {
// System.out.println(e.toString());
// System.out.println();
//}
}

private void actuallyDeleteClusters(List<Integer> indexes) {
int n = clusters.size();
// La stessa coppia può comparire più di una volta, quindi le memorizzo in un set per evitare duplicati.
// Anziché memorizzare direttamente la coppia, calcolo subito l'indice della coppia nella matrice linearizzata.
Set<Long> toDelete = new HashSet<>();

//Set<Long> toDelete = new HashSet<>();
List<Long> toDeleteIndexes = new ArrayList<>();
// Per ogni indice calcolo le coppie in cui compare
for (int r : indexes) {
// calcolo le coppie del tipo (*,r)
for (int i = 0; i < r; i++) {
long index = _k(i,r);
if (index >= 0 && index < dist.getSize()){
toDelete.add(index);
if (index >= 0 && index < dist.getSize() ){
//toDelete.add(index);
if (! toDeleteIndexes.contains(index))
toDeleteIndexes.add(index);
}
}
// calcolo le coppie del tipo (r,*) (c'è (r,s))
// sono consecutive e ce ne sono n-r-1
for (int j = r+1; j < r+1+(n-r-1); j++) {
long index = _k(r,j);
if (index >= 0 && index < dist.getSize()){
toDelete.add(index);
//toDelete.add(index);
if (! toDeleteIndexes.contains(index))
toDeleteIndexes.add(index);
}
}
}

List<Long> toDeleteIndexes = new ArrayList<>();
toDeleteIndexes.addAll(toDelete);
//List<Long> toDeleteIndexes = new ArrayList<>();
//toDeleteIndexes.addAll(toDelete);
// Ordino gli indici da cancellare in ordine decrescente
Collections.sort(toDeleteIndexes);

Expand All @@ -112,12 +147,15 @@ void deleteClusters(List<Integer> indexes){
// devo cancellare l'indice corrente, passo all'elemento successivo
if (cntDeleted == 0 && it != toDeleteIndexes.get(cntDeleted)) { continue; }

if (cntDeleted < toDelete.size() && it == toDeleteIndexes.get(cntDeleted))
//if (cntDeleted < toDelete.size() && it == toDeleteIndexes.get(cntDeleted))
if (cntDeleted < toDeleteIndexes.size() && it == toDeleteIndexes.get(cntDeleted))
cntDeleted += 1;

// Prima di copiare il prossimo indice, controllo di non copiare
// un indice che poi deve essere cancellato
while (cntDeleted < toDelete.size() && it + cntDeleted == toDeleteIndexes.get(cntDeleted))
//while (cntDeleted < toDelete.size() && it + cntDeleted == toDeleteIndexes.get(cntDeleted))
while (cntDeleted < toDeleteIndexes.size() && it + cntDeleted == toDeleteIndexes.get(cntDeleted))

cntDeleted += 1;

if (it + cntDeleted < tot)
Expand Down
3 changes: 1 addition & 2 deletions src/bm/clustering/HierarchicalClustering.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class HierarchicalClustering {
* */
public static List<MergeHistoryRecord> calculateClusters(DistanceMeasure d, List<String> words){
int n = words.size();
int printInterval = (int)Math.max(100, n*0.005);
int printInterval = (int)Math.max(100, n*0.00005);

// Crea n cluster, ognuno contenente una parola
List<Cluster> clusters = new ArrayList<>();
Expand Down Expand Up @@ -89,7 +89,6 @@ public static List<MergeHistoryRecord> calculateClusters(DistanceMeasure d, List
// IMPORTANTE: questa operazione deve essere fatta DOPO tutte le cancellazioni e insierimenti
//
manager.resize();

cntIter++;
if (cntIter % (printInterval) == 0) {
System.out.println("Iterazione: " + cntIter + " numero di cluster presenti: "+ manager.size() +
Expand Down

0 comments on commit 8a3ab29

Please sign in to comment.