touch up

DB Tsai · DB Tsai · commit baa0805b9cd3 · 2015-08-02T16:01:02.000-07:00
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -85,7 +85,11 @@ class LinearRegression(override val uid: String)
   setDefault(fitIntercept -> true)
 
   /**
-   * Set to enable scaling (standardization).
+   * Whether to standardize the training features before fitting the model.
+   * The coefficients of models will be always returned on the original scale,
+   * so it will be transparent for users. Note that when no regularization,
+   * with or without standardization, the models should be always converged to
+   * the same solution.
    * Default is true.
    * @group setParam
    */
@@ -178,7 +182,19 @@ class LinearRegression(override val uid: String)
     val optimizer = if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) {
       new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
     } else {
-      new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegParam, $(tol))
+      def regParamL1Fun = (index: Int) => {
+        if ($(standardization)) {
+          effectiveL1RegParam
+        } else {
+          // If `standardization` is false, we still standardize the data
+          // to improve the rate of convergence; as a result, we have to
+          // perform this reverse standardization by penalizing each component
+          // differently to get effectively the same objective function when
+          // the training dataset is not standardized.
+          if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0
+        }
+      }
+      new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
     }
 
     val initialWeights = Vectors.zeros(numFeatures)
@@ -464,6 +480,7 @@ class LinearRegressionSummary private[regression] (
  * @param weights The weights/coefficients corresponding to the features.
  * @param labelStd The standard deviation value of the label.
  * @param labelMean The mean value of the label.
+ * @param fitIntercept Whether to fit an intercept term.
  * @param featuresStd The standard deviation values of the features.
  * @param featuresMean The mean values of the features.
  */
@@ -472,7 +489,6 @@ private class LeastSquaresAggregator(
     labelStd: Double,
     labelMean: Double,
     fitIntercept: Boolean,
-    standardization: Boolean,
     featuresStd: Array[Double],
     featuresMean: Array[Double]) extends Serializable {
 
@@ -519,11 +535,7 @@ private class LeastSquaresAggregator(
       val localGradientSumArray = gradientSumArray
       data.foreachActive { (index, value) =>
         if (featuresStd(index) != 0.0 && value != 0.0) {
-          if (standardization) {
-            localGradientSumArray(index) += diff * value / featuresStd(index)
-          } else {
-            localGradientSumArray(index) += diff * value
-          }
+          localGradientSumArray(index) += diff * value / featuresStd(index)
         }
       }
       lossSum += diff * diff / 2.0
@@ -590,43 +602,46 @@ private class LeastSquaresCostFun(
     val w = Vectors.fromBreeze(weights)
 
     val leastSquaresAggregator = data.treeAggregate(new LeastSquaresAggregator(w, labelStd,
-      labelMean, fitIntercept, standardization, featuresStd, featuresMean))(
+      labelMean, fitIntercept, featuresStd, featuresMean))(
         seqOp = (c, v) => (c, v) match {
           case (aggregator, (label, features)) => aggregator.add(label, features)
         },
         combOp = (c1, c2) => (c1, c2) match {
           case (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
         })
 
-    // If we are not doing standardization go back to unscaled weights
-    if (standardization) {
-      // regVal is the sum of weight squares for L2 regularization
-      val norm = brzNorm(weights, 2.0)
-      val regVal = 0.5 * effectiveL2regParam * norm * norm
-
-      val loss = leastSquaresAggregator.loss + regVal
-      val gradient = leastSquaresAggregator.gradient
-      axpy(effectiveL2regParam, w, gradient)
+    val totalGradientArray = leastSquaresAggregator.gradient.toArray
 
-      (loss, gradient.toBreeze.asInstanceOf[BDV[Double]])
+    val regVal = if (effectiveL2regParam == 0.0) {
+      0.0
     } else {
-      val unscaledWeights = weights.copy
-      val len = unscaledWeights.length
-      var i = 0
-      while (i < len) {
-        unscaledWeights(i) /= featuresStd(i)
-        i += 1
+      var sum = 0.0
+      w.foreachActive { (index, value) =>
+        // The following code will compute the loss of the regularization; also
+        // the gradient of the regularization, and add back to totalGradientArray.
+        sum += {
+          if (standardization) {
+            totalGradientArray(index) += effectiveL2regParam * value
+            value * value
+          } else {
+            if (featuresStd(index) != 0.0) {
+              // If `standardization` is false, we still standardize the data
+              // to improve the rate of convergence; as a result, we have to
+              // perform this reverse standardization by penalizing each component
+              // differently to get effectively the same objective function when
+              // the training dataset is not standardized.
+              val temp = value / (featuresStd(index) * featuresStd(index))
+              totalGradientArray(index) += effectiveL2regParam * temp
+              value * temp
+            } else {
+              0.0
+            }
+          }
+        }
       }
-      val norm = brzNorm(unscaledWeights, 2.0)
-
-      val regVal = 0.5 * effectiveL2regParam * norm * norm
-
-      val loss = leastSquaresAggregator.loss + regVal
-      val gradient = leastSquaresAggregator.gradient
-      val mw = Vectors.dense(unscaledWeights.toArray)
-      axpy(effectiveL2regParam, mw, gradient)
-
-      (loss, gradient.toBreeze.asInstanceOf[BDV[Double]])
+      0.5 * effectiveL2regParam * sum
     }
+
+    (leastSquaresAggregator.loss + regVal, new BDV(totalGradientArray))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -151,7 +151,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        Then again with the data with no intercept:
        > weightsWithoutIntercept
        3 x 1 sparse Matrix of class "dgCMatrix"
-                                   s0
+                                 s0
        (Intercept)           .
        as.numeric.data3.V2. 4.70011
        as.numeric.data3.V3. 7.19943
@@ -505,5 +505,4 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       .zip(testSummary.residuals.select("residuals").collect())
       .forall { case (Row(r1: Double), Row(r2: Double)) => r1 ~== r2 relTol 1E-5 }
   }
-
 }

Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {`
`151`	`151`	`Then again with the data with no intercept:`
`152`	`152`	`> weightsWithoutIntercept`
`153`	`153`	`3 x 1 sparse Matrix of class "dgCMatrix"`
`154`		`- s0`
	`154`	`+ s0`
`155`	`155`	`(Intercept) .`
`156`	`156`	`as.numeric.data3.V2. 4.70011`
`157`	`157`	`as.numeric.data3.V3. 7.19943`
`@@ -505,5 +505,4 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {`
`505`	`505`	`.zip(testSummary.residuals.select("residuals").collect())`
`506`	`506`	`.forall { case (Row(r1: Double), Row(r2: Double)) => r1 ~== r2 relTol 1E-5 }`
`507`	`507`	`}`
`508`		`-`
`509`	`508`	`}`