Skip to content

Commit ef1a717

Browse files
committed
mark some constructors private
add default parameters to JavaDoc
1 parent 00ffbcc commit ef1a717

File tree

9 files changed

+56
-16
lines changed

9 files changed

+56
-16
lines changed

mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import org.apache.spark.rdd.RDD
3131
* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
3232
*
3333
* The Java stubs necessary for the Python mllib bindings.
34-
* Users should not call the methods defined in this class directly.
3534
*/
3635
class PythonMLLibAPI extends Serializable {
3736
private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {

mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class LogisticRegressionModel(
7070
* Train a classification model for Logistic Regression using Stochastic Gradient Descent.
7171
* NOTE: Labels used in Logistic Regression should be {0, 1}
7272
*/
73-
class LogisticRegressionWithSGD (
73+
class LogisticRegressionWithSGD private (
7474
private var stepSize: Double,
7575
private var numIterations: Int,
7676
private var regParam: Double,

mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,14 @@ import org.apache.spark.mllib.util.MLUtils
2727
import org.apache.spark.rdd.RDD
2828

2929
/**
30+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
31+
*
3032
* Model for Naive Bayes Classifiers.
3133
*
32-
* @param pi Log of class priors, whose dimension is C.
33-
* @param theta Log of class conditional probabilities, whose dimension is CxD.
34+
* @param labels list of labels
35+
* @param pi log of class priors, whose dimension is C, number of labels
36+
* @param theta log of class conditional probabilities, whose dimension is C-by-D,
37+
* where D is number of features
3438
*/
3539
class NaiveBayesModel(
3640
val labels: Array[Double],
@@ -68,7 +72,7 @@ class NaiveBayesModel(
6872
* document classification. By making every vector a 0-1 vector, it can also be used as
6973
* Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
7074
*/
71-
class NaiveBayes (private var lambda: Double) extends Serializable with Logging {
75+
class NaiveBayes private (private var lambda: Double) extends Serializable with Logging {
7276

7377
def this() = this(1.0)
7478

mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class SVMModel(
7171
* Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
7272
* NOTE: Labels used in SVM should be {0, 1}.
7373
*/
74-
class SVMWithSGD(
74+
class SVMWithSGD private (
7575
private var stepSize: Double,
7676
private var numIterations: Int,
7777
private var regParam: Double,

mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,18 @@ import org.apache.spark.util.random.XORShiftRandom
3636
* This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
3737
* to it should be cached by the user.
3838
*/
39-
class KMeans(
39+
class KMeans private (
4040
private var k: Int,
4141
private var maxIterations: Int,
4242
private var runs: Int,
4343
private var initializationMode: String,
4444
private var initializationSteps: Int,
4545
private var epsilon: Double) extends Serializable with Logging {
46+
47+
/**
48+
* Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
49+
* initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4}.
50+
*/
4651
def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4)
4752

4853
/** Set the number of clusters to create (k). Default: 2. */
@@ -318,15 +323,36 @@ object KMeans {
318323
data: RDD[Vector],
319324
k: Int,
320325
maxIterations: Int,
321-
runs: Int = 1,
322-
initializationMode: String = K_MEANS_PARALLEL): KMeansModel = {
326+
runs: Int,
327+
initializationMode: String): KMeansModel = {
323328
new KMeans().setK(k)
324329
.setMaxIterations(maxIterations)
325330
.setRuns(runs)
326331
.setInitializationMode(initializationMode)
327332
.run(data)
328333
}
329334

335+
/**
336+
* Trains a k-means model using specified parameters and the default values for unspecified.
337+
*/
338+
def train(
339+
data: RDD[Vector],
340+
k: Int,
341+
maxIterations: Int): KMeansModel = {
342+
train(data, k, maxIterations, 1, K_MEANS_PARALLEL)
343+
}
344+
345+
/**
346+
* Trains a k-means model using specified parameters and the default values for unspecified.
347+
*/
348+
def train(
349+
data: RDD[Vector],
350+
k: Int,
351+
maxIterations: Int,
352+
runs: Int): KMeansModel = {
353+
train(data, k, maxIterations, runs, K_MEANS_PARALLEL)
354+
}
355+
330356
/**
331357
* Returns the index of the closest center to the given point, as well as the squared distance.
332358
*/
@@ -371,6 +397,9 @@ object KMeans {
371397
MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm)
372398
}
373399

400+
/**
401+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
402+
*/
374403
def main(args: Array[String]) {
375404
if (args.length < 4) {
376405
println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")

mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
8888
* indicated user
8989
* preferences rather than explicit ratings given to items.
9090
*/
91-
class ALS(
91+
class ALS private (
9292
private var numBlocks: Int,
9393
private var rank: Int,
9494
private var iterations: Int,
@@ -97,6 +97,11 @@ class ALS(
9797
private var alpha: Double,
9898
private var seed: Long = System.nanoTime()
9999
) extends Serializable with Logging {
100+
101+
/**
102+
* Constructs an ALS instance with default parameters: {numBlocks: -1, rank: 10, iterations: 10,
103+
* lambda: 0.01, implicitPrefs: false, alpha: 1.0}.
104+
*/
100105
def this() = this(-1, 10, 10, 0.01, false, 1.0)
101106

102107
/**

mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class LassoModel(
5151
* its corresponding right hand side label y.
5252
* See also the documentation for the precise formulation.
5353
*/
54-
class LassoWithSGD(
54+
class LassoWithSGD private (
5555
private var stepSize: Double,
5656
private var numIterations: Int,
5757
private var regParam: Double,
@@ -70,7 +70,8 @@ class LassoWithSGD(
7070
super.setIntercept(false)
7171

7272
/**
73-
* Construct a Lasso object with default parameters
73+
* Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
74+
* regParam: 1.0, miniBatchFraction: 1.0}.
7475
*/
7576
def this() = this(1.0, 100, 1.0, 1.0)
7677

mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class LinearRegressionModel(
5151
* its corresponding right hand side label y.
5252
* See also the documentation for the precise formulation.
5353
*/
54-
class LinearRegressionWithSGD(
54+
class LinearRegressionWithSGD private (
5555
private var stepSize: Double,
5656
private var numIterations: Int,
5757
private var miniBatchFraction: Double)
@@ -65,7 +65,8 @@ class LinearRegressionWithSGD(
6565
.setMiniBatchFraction(miniBatchFraction)
6666

6767
/**
68-
* Construct a LinearRegression object with default parameters
68+
* Construct a LinearRegression object with default parameters: {stepSize: 1.0,
69+
* numIterations: 100, miniBatchFraction: 1.0}.
6970
*/
7071
def this() = this(1.0, 100, 1.0)
7172

mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class RidgeRegressionModel(
5151
* its corresponding right hand side label y.
5252
* See also the documentation for the precise formulation.
5353
*/
54-
class RidgeRegressionWithSGD(
54+
class RidgeRegressionWithSGD private (
5555
private var stepSize: Double,
5656
private var numIterations: Int,
5757
private var regParam: Double,
@@ -71,7 +71,8 @@ class RidgeRegressionWithSGD(
7171
super.setIntercept(false)
7272

7373
/**
74-
* Construct a RidgeRegression object with default parameters
74+
* Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
75+
* regParam: 1.0, miniBatchFraction: 1.0}.
7576
*/
7677
def this() = this(1.0, 100, 1.0, 1.0)
7778

0 commit comments

Comments
 (0)