Fixed style issues

tgaloppo · tgaloppo · commit d695034fab97 · 2014-12-16T08:18:58.000-05:00
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGmmEM.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGmmEM.scala
@@ -47,9 +47,10 @@ object DenseGmmEM {
       println("weight=%f mu=%s sigma=\n%s\n" format 
         (clusters.weight(i), clusters.mu(i), clusters.sigma(i)))
     }
-    val (responsibility_matrix, cluster_labels) = clusters.predict(data)
-    for(x <- cluster_labels.collect()){
-        print(" " + x)
+    
+    val (responsibilityMatrix, clusterLabels) = clusters.predict(data)
+    for (x <- clusterLabels.collect) {
+      print(" " + x)
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -42,9 +42,9 @@ class GaussianMixtureModel(
 
   /** Maps given points to their cluster indices. */
   def predict(points: RDD[Vector]): (RDD[Array[Double]],RDD[Int]) = {
-    val responsibility_matrix = new GaussianMixtureModelEM()
-        .predictClusters(points,mu,sigma,weight,k)
-    val cluster_labels = responsibility_matrix.map(r => r.indexOf(r.max))
-    (responsibility_matrix,cluster_labels)
-  }    
+    val responsibilityMatrix = new GaussianMixtureModelEM()
+      .predictClusters(points,mu,sigma,weight,k)
+    val clusterLabels = responsibilityMatrix.map(r => r.indexOf(r.max))
+    (responsibilityMatrix, clusterLabels)
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModelEM.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModelEM.scala
@@ -208,27 +208,30 @@ class GaussianMixtureModelEM private (
     cov
   }
   
-  /** 
-  Given the input vectors, return the membership value of each vector
-  to all mixture components.  
-  */
-  def predictClusters(points:RDD[Vector],mu:Array[Vector],sigma:Array[Matrix],
-      weight:Array[Double],k:Int):RDD[Array[Double]]= {
+  /**
+   * Given the input vectors, return the membership value of each vector
+   * to all mixture components. 
+   */
+  def predictClusters(points:RDD[Vector], mu:Array[Vector], sigma:Array[Matrix],
+      weight:Array[Double],k:Int): RDD[Array[Double]] = {
     val ctx = points.sparkContext
-    val dists = ctx.broadcast((0 until k).map(i => 
-        new MultivariateGaussian(mu(i).toBreeze.toDenseVector,sigma(i).toBreeze.toDenseMatrix))
-        .toArray)
+    val dists = ctx.broadcast{
+      (0 until k).map{ i => 
+        new MultivariateGaussian(mu(i).toBreeze.toDenseVector, sigma(i).toBreeze.toDenseMatrix)
+      }.toArray
+    }
     val weights = ctx.broadcast((0 until k).map(i => weight(i)).toArray)
-    points.map(x=>compute_log_likelihood(x.toBreeze.toDenseVector,dists.value,weights.value,k))
-
+    points.map{ x => 
+      computeSoftAssignments(x.toBreeze.toDenseVector, dists.value, weights.value, k)
+    }
   }
+  
   /**
-  * Compute the log density of each vector
-  */
-  def compute_log_likelihood(pt:DenseDoubleVector,dists:Array[MultivariateGaussian],
-      weights:Array[Double],k:Int):Array[Double]={
-    val p = (0 until k).map(i => 
-          eps + weights(i) * dists(i).pdf(pt)).toArray
+   * Compute the partial assignments for each vector
+   */
+  def computeSoftAssignments(pt: DenseDoubleVector, dists: Array[MultivariateGaussian],
+      weights: Array[Double], k: Int): Array[Double] = {
+    val p = (0 until k).map(i => eps + weights(i) * dists(i).pdf(pt)).toArray
     val pSum = p.sum 
     for(i<- 0 until k){
       p(i) /= pSum

Original file line number	Diff line number	Diff line change
`@@ -47,9 +47,10 @@ object DenseGmmEM {`
`47`	`47`	`println("weight=%f mu=%s sigma=\n%s\n" format`
`48`	`48`	`(clusters.weight(i), clusters.mu(i), clusters.sigma(i)))`
`49`	`49`	`}`
`50`		`- val (responsibility_matrix, cluster_labels) = clusters.predict(data)`
`51`		`- for(x <- cluster_labels.collect()){`
`52`		`- print(" " + x)`
	`50`	`+`
	`51`	`+ val (responsibilityMatrix, clusterLabels) = clusters.predict(data)`
	`52`	`+ for (x <- clusterLabels.collect) {`
	`53`	`+ print(" " + x)`
`53`	`54`	`}`
`54`	`55`	`}`
`55`	`56`	`}`