Skip to content

Commit b11659c

Browse files
committed
style update
1 parent 78c4671 commit b11659c

File tree

9 files changed

+57
-35
lines changed

9 files changed

+57
-35
lines changed

mllib/src/main/scala/org/apache/spark/mllib/MLContext.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ import org.apache.spark.rdd.RDD
2626
class MLContext(self: SparkContext) {
2727
/**
2828
* Reads labeled data in the LIBSVM format into an RDD[LabeledPoint].
29-
* The LIBSVM format is a text-based format used by LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/).
29+
* The LIBSVM format is a text-based format used by LIBSVM and LIBLINEAR.
3030
* Each line represents a labeled sparse feature vector using the following format:
3131
* {{{label index1:value1 index2:value2 ...}}}
3232
* where the indices are one-based and in ascending order.
33-
* This method parses each line into a [[org.apache.spark.mllib.regression.LabeledPoint]] instance,
33+
* This method parses each line into a [[org.apache.spark.mllib.regression.LabeledPoint]],
3434
* where the feature indices are converted to zero-based.
3535
*
3636
* @param path file or directory path in any Hadoop-supported file system URI

mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,17 @@ abstract class Gradient extends Serializable {
3737
def compute(data: Vector, label: Double, weights: Vector): (Vector, Double)
3838

3939
/**
40-
* Compute the gradient and loss given the features of a single data point, add the gradient to a provided vector to
41-
* avoid creating new objects, and return loss.
40+
* Compute the gradient and loss given the features of a single data point,
41+
* add the gradient to a provided vector to avoid creating new objects, and return loss.
4242
*
4343
* @param data features for one data point
4444
* @param label label for this data point
4545
* @param weights weights/coefficients corresponding to features
46-
* @param gradientAddTo gradient will be added to this vector
46+
* @param cumGradient the computed gradient will be added to this vector
4747
*
48-
* @return (gradient: Vector, loss: Double)
48+
* @return loss
4949
*/
50-
def compute(data: Vector, label: Double, weights: Vector, gradientAddTo: Vector): Double
50+
def compute(data: Vector, label: Double, weights: Vector, cumGradient: Vector): Double
5151
}
5252

5353
/**
@@ -71,13 +71,17 @@ class LogisticGradient extends Gradient {
7171
(Vectors.fromBreeze(gradient), loss)
7272
}
7373

74-
override def compute(data: Vector, label: Double, weights: Vector, gradientAddTo: Vector): Double = {
74+
override def compute(
75+
data: Vector,
76+
label: Double,
77+
weights: Vector,
78+
cumGradient: Vector): Double = {
7579
val brzData = data.toBreeze
7680
val brzWeights = weights.toBreeze
7781
val margin: Double = -1.0 * brzWeights.dot(brzData)
7882
val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
7983

80-
brzAxpy(gradientMultiplier, brzData, gradientAddTo.toBreeze)
84+
brzAxpy(gradientMultiplier, brzData, cumGradient.toBreeze)
8185

8286
if (label > 0) {
8387
math.log(1 + math.exp(margin))
@@ -104,12 +108,16 @@ class LeastSquaresGradient extends Gradient {
104108
(Vectors.fromBreeze(gradient), loss)
105109
}
106110

107-
override def compute(data: Vector, label: Double, weights: Vector, gradientAddTo: Vector): Double = {
111+
override def compute(
112+
data: Vector,
113+
label: Double,
114+
weights: Vector,
115+
cumGradient: Vector): Double = {
108116
val brzData = data.toBreeze
109117
val brzWeights = weights.toBreeze
110118
val diff = brzWeights.dot(brzData) - label
111119

112-
brzAxpy(2.0 * diff, brzData, gradientAddTo.toBreeze)
120+
brzAxpy(2.0 * diff, brzData, cumGradient.toBreeze)
113121

114122
diff * diff
115123
}
@@ -137,7 +145,11 @@ class HingeGradient extends Gradient {
137145
}
138146
}
139147

140-
override def compute(data: Vector, label: Double, weights: Vector, gradientAddTo: Vector): Double = {
148+
override def compute(
149+
data: Vector,
150+
label: Double,
151+
weights: Vector,
152+
cumGradient: Vector): Double = {
141153
val brzData = data.toBreeze
142154
val brzWeights = weights.toBreeze
143155
val dotProduct = brzWeights.dot(brzData)
@@ -147,7 +159,7 @@ class HingeGradient extends Gradient {
147159
val labelScaled = 2 * label - 1.0
148160

149161
if (1.0 > labelScaled * dotProduct) {
150-
brzAxpy(-labelScaled, brzData, gradientAddTo.toBreeze)
162+
brzAxpy(-labelScaled, brzData, cumGradient.toBreeze)
151163
1.0 - labelScaled * dotProduct
152164
} else {
153165
0.0

mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717

1818
package org.apache.spark.mllib.regression
1919

20+
import breeze.linalg.{Vector => BV}
21+
2022
import org.apache.spark.SparkContext
2123
import org.apache.spark.rdd.RDD
24+
import org.apache.spark.mllib.linalg.{Vector, Vectors}
2225
import org.apache.spark.mllib.optimization._
2326
import org.apache.spark.mllib.util.MLUtils
24-
import org.apache.spark.mllib.linalg.{Vector, Vectors}
25-
26-
import breeze.linalg.{Vector => BV, DenseVector => BDV}
2727

2828
/**
2929
* Regression model trained using Lasso.
@@ -142,7 +142,8 @@ object LassoWithSGD {
142142
regParam: Double,
143143
miniBatchFraction: Double,
144144
initialWeights: Vector): LassoModel = {
145-
new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input, initialWeights)
145+
new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction)
146+
.run(input, initialWeights)
146147
}
147148

148149
/**

mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ package org.apache.spark.mllib.regression
1919

2020
import org.apache.spark.SparkContext
2121
import org.apache.spark.rdd.RDD
22+
import org.apache.spark.mllib.linalg.Vector
2223
import org.apache.spark.mllib.optimization._
2324
import org.apache.spark.mllib.util.MLUtils
24-
import org.apache.spark.mllib.linalg.{Vector, Vectors}
2525

2626
/**
2727
* Regression model trained using LinearRegression.

mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
package org.apache.spark.mllib.regression
1919

20+
import breeze.linalg.{Vector => BV}
21+
2022
import org.apache.spark.SparkContext
2123
import org.apache.spark.rdd.RDD
2224
import org.apache.spark.mllib.optimization._
2325
import org.apache.spark.mllib.util.MLUtils
2426
import org.apache.spark.mllib.linalg.{Vectors, Vector}
2527

26-
import breeze.linalg.{Vector => BV, DenseVector => BDV}
27-
2828
/**
2929
* Regression model trained using RidgeRegression.
3030
*
@@ -72,9 +72,9 @@ class RidgeRegressionWithSGD private (
7272
// We don't want to penalize the intercept in RidgeRegression, so set this to false.
7373
super.setIntercept(false)
7474

75-
var yMean = 0.0
76-
var xColMean: BV[Double] = _
77-
var xColSd: BV[Double] = _
75+
private var yMean = 0.0
76+
private var xColMean: BV[Double] = _
77+
private var xColSd: BV[Double] = _
7878

7979
/**
8080
* Construct a RidgeRegression object with default parameters
@@ -214,8 +214,8 @@ object RidgeRegressionWithSGD {
214214

215215
def main(args: Array[String]) {
216216
if (args.length != 5) {
217-
println("Usage: RidgeRegression <master> <input_dir> <step_size> <regularization_parameter>" +
218-
" <niters>")
217+
println("Usage: RidgeRegression <master> <input_dir> <step_size> " +
218+
"<regularization_parameter> <niters>")
219219
System.exit(1)
220220
}
221221
val sc = new SparkContext(args(0), "RidgeRegression")

mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,10 @@ object MLUtils {
8282
* xColMean - Row vector with mean for every column (or feature) of the input data
8383
* xColSd - Row vector standard deviation for every column (or feature) of the input data.
8484
*/
85-
def computeStats(data: RDD[LabeledPoint], numFeatures: Int, numExamples: Long): (Double, Vector, Vector) = {
85+
def computeStats(
86+
data: RDD[LabeledPoint],
87+
numFeatures: Int,
88+
numExamples: Long): (Double, Vector, Vector) = {
8689
val brzData = data.map { case LabeledPoint(label, features) =>
8790
(label, features.toBreeze)
8891
}

mllib/src/test/scala/org/apache/spark/mllib/MLContextSuite.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@
1717

1818
package org.apache.spark.mllib
1919

20-
import org.apache.spark.mllib.MLContext._
21-
import org.apache.spark.mllib.util.LocalSparkContext
22-
import org.scalatest.FunSuite
23-
import com.google.common.io.Files
2420
import java.io.File
21+
22+
import org.scalatest.FunSuite
23+
2524
import com.google.common.base.Charsets
25+
import com.google.common.io.Files
26+
27+
import org.apache.spark.mllib.MLContext._
2628
import org.apache.spark.mllib.linalg.Vectors
29+
import org.apache.spark.mllib.util.LocalSparkContext
2730

2831
class MLContextSuite extends FunSuite with LocalSparkContext {
2932
test("libSVMFile") {

mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ import scala.util.Random
2121

2222
import org.scalatest.FunSuite
2323

24+
import org.apache.spark.mllib.linalg.Vectors
2425
import org.apache.spark.mllib.regression.LabeledPoint
2526
import org.apache.spark.mllib.util.LocalSparkContext
26-
import org.apache.spark.mllib.linalg.Vectors
2727

2828
object NaiveBayesSuite {
2929

mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.regression
1919

2020
import org.scalatest.FunSuite
2121

22-
import org.apache.spark.mllib.util.{LinearDataGenerator, LocalSparkContext}
2322
import org.apache.spark.mllib.linalg.Vectors
23+
import org.apache.spark.mllib.util.{LinearDataGenerator, LocalSparkContext}
2424

2525
class LinearRegressionSuite extends FunSuite with LocalSparkContext {
2626

@@ -88,7 +88,8 @@ class LinearRegressionSuite extends FunSuite with LocalSparkContext {
8888

8989
// Test if we can correctly learn Y = 10*X1 + 10*X10000
9090
test("sparse linear regression without intercept") {
91-
val denseRDD = sc.parallelize(LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), 100, 42), 2)
91+
val denseRDD = sc.parallelize(
92+
LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), 100, 42), 2)
9293
val sparseRDD = denseRDD.map { case LabeledPoint(label, v) =>
9394
val sv = Vectors.sparse(10000, Seq((0, v(0)), (9999, v(1))))
9495
LabeledPoint(label, sv)
@@ -113,9 +114,11 @@ class LinearRegressionSuite extends FunSuite with LocalSparkContext {
113114
val sparseValidationRDD = sc.parallelize(sparseValidationData, 2)
114115

115116
// Test prediction on RDD.
116-
validatePrediction(model.predict(sparseValidationRDD.map(_.features)).collect(), sparseValidationData)
117+
validatePrediction(
118+
model.predict(sparseValidationRDD.map(_.features)).collect(), sparseValidationData)
117119

118120
// Test prediction on Array.
119-
validatePrediction(sparseValidationData.map(row => model.predict(row.features)), sparseValidationData)
121+
validatePrediction(
122+
sparseValidationData.map(row => model.predict(row.features)), sparseValidationData)
120123
}
121124
}

0 commit comments

Comments
 (0)