@@ -32,11 +32,10 @@ import org.apache.spark.util.Utils
3232
3333/**
3434 * :: DeveloperApi ::
35- *
3635 * StreamingKMeansModel extends MLlib's KMeansModel for streaming
3736 * algorithms, so it can keep track of the number of points assigned
3837 * to each cluster, and also update the model by doing a single iteration
39- * of the standard KMeans algorithm.
38+ * of the standard k-means algorithm.
4039 *
4140 * The update algorithm uses the "mini-batch" KMeans rule,
4241 * generalized to incorporate forgetfullness (i.e. decay).
@@ -63,22 +62,13 @@ import org.apache.spark.util.Utils
6362 * if 'batches', behavior will be independent of the number of points per batch;
6463 * if 'points', the expected number of points per batch must be specified.
6564 *
66- * Use a builder pattern to construct a streaming KMeans analysis
67- * in an application, like:
68- *
69- * val model = new StreamingKMeans()
70- * .setDecayFactor(0.5)
71- * .setK(3)
72- * .setRandomCenters(5)
73- * .trainOn(DStream)
74- *
7565 */
7666@ DeveloperApi
7767class StreamingKMeansModel (
7868 override val clusterCenters : Array [Vector ],
7969 val clusterCounts : Array [Long ]) extends KMeansModel (clusterCenters) with Logging {
8070
81- // do a sequential KMeans update on a batch of data
71+ /** Perform a k-means update on a batch of data. */
8272 def update (data : RDD [Vector ], a : Double , units : String ): StreamingKMeansModel = {
8373
8474 val centers = clusterCenters
@@ -125,7 +115,22 @@ class StreamingKMeansModel(
125115 }
126116
127117}
128-
118+ /**
119+ * :: DeveloperApi ::
120+ * StreamingKMeans provides methods for configuring a
121+ * streaming k-means analysis, training the model on streaming,
122+ * and using the model to make predictions on streaming data.
123+ * See KMeansModel for details on algorithm and update rules.
124+ *
125+ * Use a builder pattern to construct a streaming k-means analysis
126+ * in an application, like:
127+ *
128+ * val model = new StreamingKMeans()
129+ * .setDecayFactor(0.5)
130+ * .setK(3)
131+ * .setRandomCenters(5)
132+ * .trainOn(DStream)
133+ */
129134@ DeveloperApi
130135class StreamingKMeans (
131136 var k : Int ,
@@ -171,7 +176,7 @@ class StreamingKMeans(
171176 this
172177 }
173178
174- /** Specify initial explicitly directly. */
179+ /** Specify initial centers directly. */
175180 def setInitialCenters (initialCenters : Array [Vector ]): this .type = {
176181 val clusterCounts = Array .fill(this .k)(0 ).map(_.toLong)
177182 this .model = new StreamingKMeansModel (initialCenters, clusterCounts)
0 commit comments