mark some constructors private

mengxr · mengxr · commit ef1a71770b5e · 2014-04-08T22:59:33.000-07:00
add default parameters to JavaDoc
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -31,7 +31,6 @@ import org.apache.spark.rdd.RDD
  * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
  *
  * The Java stubs necessary for the Python mllib bindings.
- * Users should not call the methods defined in this class directly.
  */
 class PythonMLLibAPI extends Serializable {
   private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -70,7 +70,7 @@ class LogisticRegressionModel(
  * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
-class LogisticRegressionWithSGD (
+class LogisticRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -27,10 +27,14 @@ import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Model for Naive Bayes Classifiers.
  *
- * @param pi Log of class priors, whose dimension is C.
- * @param theta Log of class conditional probabilities, whose dimension is CxD.
+ * @param labels list of labels
+ * @param pi log of class priors, whose dimension is C, number of labels
+ * @param theta log of class conditional probabilities, whose dimension is C-by-D,
+ *              where D is number of features
  */
 class NaiveBayesModel(
     val labels: Array[Double],
@@ -68,7 +72,7 @@ class NaiveBayesModel(
  * document classification.  By making every vector a 0-1 vector, it can also be used as
  * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
  */
-class NaiveBayes (private var lambda: Double) extends Serializable with Logging {
+class NaiveBayes private (private var lambda: Double) extends Serializable with Logging {
 
   def this() = this(1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -71,7 +71,7 @@ class SVMModel(
  * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
  * NOTE: Labels used in SVM should be {0, 1}.
  */
-class SVMWithSGD(
+class SVMWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -36,13 +36,18 @@ import org.apache.spark.util.random.XORShiftRandom
  * This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
  * to it should be cached by the user.
  */
-class KMeans(
+class KMeans private (
     private var k: Int,
     private var maxIterations: Int,
     private var runs: Int,
     private var initializationMode: String,
     private var initializationSteps: Int,
     private var epsilon: Double) extends Serializable with Logging {
+
+  /**
+   * Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
+   * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4}.
+   */
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4)
 
   /** Set the number of clusters to create (k). Default: 2. */
@@ -318,15 +323,36 @@ object KMeans {
       data: RDD[Vector],
       k: Int,
       maxIterations: Int,
-      runs: Int = 1,
-      initializationMode: String = K_MEANS_PARALLEL): KMeansModel = {
+      runs: Int,
+      initializationMode: String): KMeansModel = {
     new KMeans().setK(k)
       .setMaxIterations(maxIterations)
       .setRuns(runs)
       .setInitializationMode(initializationMode)
       .run(data)
   }
 
+  /**
+   * Trains a k-means model using specified parameters and the default values for unspecified.
+   */
+  def train(
+      data: RDD[Vector],
+      k: Int,
+      maxIterations: Int): KMeansModel = {
+    train(data, k, maxIterations, 1, K_MEANS_PARALLEL)
+  }
+
+  /**
+   * Trains a k-means model using specified parameters and the default values for unspecified.
+   */
+  def train(
+      data: RDD[Vector],
+      k: Int,
+      maxIterations: Int,
+      runs: Int): KMeansModel = {
+    train(data, k, maxIterations, runs, K_MEANS_PARALLEL)
+  }
+
   /**
    * Returns the index of the closest center to the given point, as well as the squared distance.
    */
@@ -371,6 +397,9 @@ object KMeans {
     MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm)
   }
 
+  /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   */
   def main(args: Array[String]) {
     if (args.length < 4) {
       println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -88,7 +88,7 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
-class ALS(
+class ALS private (
     private var numBlocks: Int,
     private var rank: Int,
     private var iterations: Int,
@@ -97,6 +97,11 @@ class ALS(
     private var alpha: Double,
     private var seed: Long = System.nanoTime()
   ) extends Serializable with Logging {
+
+  /**
+   * Constructs an ALS instance with default parameters: {numBlocks: -1, rank: 10, iterations: 10,
+   * lambda: 0.01, implicitPrefs: false, alpha: 1.0}.
+   */
   def this() = this(-1, 10, 10, 0.01, false, 1.0)
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -51,7 +51,7 @@ class LassoModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LassoWithSGD(
+class LassoWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
@@ -70,7 +70,8 @@ class LassoWithSGD(
   super.setIntercept(false)
 
   /**
-   * Construct a Lasso object with default parameters
+   * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
+   * regParam: 1.0, miniBatchFraction: 1.0}.
    */
   def this() = this(1.0, 100, 1.0, 1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -51,7 +51,7 @@ class LinearRegressionModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LinearRegressionWithSGD(
+class LinearRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double)
@@ -65,7 +65,8 @@ class LinearRegressionWithSGD(
     .setMiniBatchFraction(miniBatchFraction)
 
   /**
-   * Construct a LinearRegression object with default parameters
+   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
+   * numIterations: 100, miniBatchFraction: 1.0}.
    */
   def this() = this(1.0, 100, 1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -51,7 +51,7 @@ class RidgeRegressionModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class RidgeRegressionWithSGD(
+class RidgeRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
@@ -71,7 +71,8 @@ class RidgeRegressionWithSGD(
   super.setIntercept(false)
 
   /**
-   * Construct a RidgeRegression object with default parameters
+   * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
+   * regParam: 1.0, miniBatchFraction: 1.0}.
    */
   def this() = this(1.0, 100, 1.0, 1.0)
 

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,6 @@ import org.apache.spark.rdd.RDD`
`31`	`31`	`* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>`
`32`	`32`	`*`
`33`	`33`	`* The Java stubs necessary for the Python mllib bindings.`
`34`		`- * Users should not call the methods defined in this class directly.`
`35`	`34`	`*/`
`36`	`35`	`class PythonMLLibAPI extends Serializable {`
`37`	`36`	`private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {`