From 3fee7651488efa1a98b2200af83937900c04c9b4 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Mon, 27 Jun 2016 00:22:45 -0700
Subject: [PATCH 1/7] [SPARK-16140] group k-means method in generated doc

---
 R/pkg/R/mllib.R | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 853cfce74ae5..bbe0317787b7 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -266,9 +266,9 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
             return(list(apriori = apriori, tables = tables))
           })
 
-#' Fit a k-means model
+#' K-Means Model
 #'
-#' Fit a k-means model, similarly to R's kmeans().
+#' Fits a k-means model, similarly to R's kmeans().
 #'
 #' @param data SparkDataFrame for training
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -277,14 +277,32 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' @param k Number of centers
 #' @param maxIter Maximum iteration number
 #' @param initMode The initialization algorithm choosen to fit the model
-#' @return A fitted k-means model
+#' @return \code{spark.kmeans} returns a fitted k-means model
 #' @rdname spark.kmeans
+#' @name spark.kmeans
 #' @export
 #' @examples
 #' \dontrun{
-#' model <- spark.kmeans(data, ~ ., k = 4, initMode = "random")
+#' sparkR.session()
+#' data(iris)
+#' df <- createDataFrame(iris)
+#' model <- spark.kmeans(df, Sepal_Length ~ Sepal_Width, k = 4, initMode = "random")
+#' summary(model)
+#'
+#' # fitted values on training data
+#' fitted <- predict(model, df)
+#' head(select(fitted, "Sepal_Length", "prediction"))
+#'
+#' # save fitted model to input path
+#' path <- "path/to/model"
+#' write.ml(model, path)
+#'
+#' # can also read back the saved model and print
+#' savedModel <- read.ml(path)
+#' summary(savedModel)
 #' }
 #' @note spark.kmeans since 2.0.0
+#' @seealso \link{kmeans}, \link{read.ml}
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) {
             formula <- paste(deparse(formula), collapse = "")
@@ -327,8 +345,8 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #' Returns the summary of a k-means model produced by spark.kmeans(),
 #' similarly to R's summary().
 #'
-#' @param object a fitted k-means model
-#' @return the model's coefficients, size and cluster
+#' @param object A fitted k-means model
+#' @return \code{summary} the model's coefficients, size and cluster
 #' @rdname summary
 #' @export
 #' @examples
@@ -357,7 +375,7 @@ setMethod("summary", signature(object = "KMeansModel"),
                    cluster = cluster, is.loaded = is.loaded))
           })
 
-#' Predicted values based on model
+#' Predicted values based a k-means on model
 #'
 #' Makes predictions from a k-means model or a model produced by spark.kmeans().
 #'

From 122848395ac99392fab7d5123971ee28b69a46b7 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Mon, 27 Jun 2016 00:28:23 -0700
Subject: [PATCH 2/7] [SPARK-16140] minor changes

---
 R/pkg/R/mllib.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index bbe0317787b7..8ac26553be15 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -318,7 +318,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"
 #' Note: A saved-loaded model does not support this method.
 #'
 #' @param object A fitted k-means model
-#' @return SparkDataFrame containing fitted values
+#' @return \code{fitted} returns a SparkDataFrame containing fitted values
 #' @rdname fitted
 #' @export
 #' @examples
@@ -346,7 +346,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #' similarly to R's summary().
 #'
 #' @param object A fitted k-means model
-#' @return \code{summary} the model's coefficients, size and cluster
+#' @return \code{summary} returns the model's coefficients, size and cluster
 #' @rdname summary
 #' @export
 #' @examples
@@ -375,11 +375,12 @@ setMethod("summary", signature(object = "KMeansModel"),
                    cluster = cluster, is.loaded = is.loaded))
           })
 
-#' Predicted values based a k-means on model
+#' Predicted values based on a k-means model
 #'
 #' Makes predictions from a k-means model or a model produced by spark.kmeans().
 #'
 #' @param object A fitted k-means model
+#' @return \code{predict} returns the predicted values based on a k-means model
 #' @rdname predict
 #' @export
 #' @examples

From 6a67a49f4b114403aa5e63501f1ec91d937d5a7c Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Mon, 27 Jun 2016 15:19:49 -0700
Subject: [PATCH 3/7] [SPARK-16140] bring individual methods to the same
 spark.kmeans page

---
 R/pkg/R/mllib.R | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 8ac26553be15..1aa6d3caaddc 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -342,18 +342,12 @@ setMethod("fitted", signature(object = "KMeansModel"),
 
 #' Get the summary of a k-means model
 #'
-#' Returns the summary of a k-means model produced by spark.kmeans(),
-#' similarly to R's summary().
+#' Returns the summary of a k-means model produced by spark.kmeans(), similarly to R's summary().
 #'
 #' @param object A fitted k-means model
 #' @return \code{summary} returns the model's coefficients, size and cluster
-#' @rdname summary
+#' @rdname spark.kmeans
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- spark.kmeans(trainingData, ~ ., 2)
-#' summary(model)
-#' }
 #' @note summary(KMeansModel) since 2.0.0
 setMethod("summary", signature(object = "KMeansModel"),
           function(object, ...) {
@@ -381,14 +375,8 @@ setMethod("summary", signature(object = "KMeansModel"),
 #'
 #' @param object A fitted k-means model
 #' @return \code{predict} returns the predicted values based on a k-means model
-#' @rdname predict
+#' @rdname spark.kmeans
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- spark.kmeans(trainingData, ~ ., 2)
-#' predicted <- predict(model, testData)
-#' showDF(predicted)
-#' }
 #' @note predict(KMeansModel) since 2.0.0
 setMethod("predict", signature(object = "KMeansModel"),
           function(object, newData) {
@@ -513,15 +501,9 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
-#' @rdname write.ml
+#' @rdname spark.kmeans
 #' @name write.ml
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- spark.kmeans(trainingData, ~ ., k = 2)
-#' path <- "path/to/model"
-#' write.ml(model, path)
-#' }
 #' @note write.ml(KMeansModel, character) since 2.0.0
 setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
           function(object, path, overwrite = FALSE) {

From aeface51d3ea47e3eb65d3ea2105eb3f2d04f745 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Mon, 27 Jun 2016 16:43:56 -0700
Subject: [PATCH 4/7] [SPARK-16140] remove duplicated arguments 'object'

---
 R/pkg/R/mllib.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 1aa6d3caaddc..84c5aa15ab63 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -373,7 +373,6 @@ setMethod("summary", signature(object = "KMeansModel"),
 #'
 #' Makes predictions from a k-means model or a model produced by spark.kmeans().
 #'
-#' @param object A fitted k-means model
 #' @return \code{predict} returns the predicted values based on a k-means model
 #' @rdname spark.kmeans
 #' @export
@@ -496,7 +495,6 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
 #'
 #' Save the k-means model to the input path.
 #'
-#' @param object A fitted k-means model
 #' @param path The directory where the model is saved
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.

From 426c6593d382e50235664360713120363cb01492 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Mon, 27 Jun 2016 20:40:30 -0700
Subject: [PATCH 5/7] [SPARK-16140] more changes on style

---
 R/pkg/R/mllib.R | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 84c5aa15ab63..2ce1a34ec940 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -266,9 +266,10 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
             return(list(apriori = apriori, tables = tables))
           })
 
-#' K-Means Model
+#' K-Means Clustering Model
 #'
-#' Fits a k-means model, similarly to R's kmeans().
+#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans().
+#' Users can print, make predictions on the produced model and save the model to the input path.
 #'
 #' @param data SparkDataFrame for training
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -302,7 +303,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' summary(savedModel)
 #' }
 #' @note spark.kmeans since 2.0.0
-#' @seealso \link{kmeans}, \link{read.ml}
+#' @seealso \link{read.ml}, \link{write.ml}
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) {
             formula <- paste(deparse(formula), collapse = "")
@@ -340,9 +341,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
             }
           })
 
-#' Get the summary of a k-means model
-#'
-#' Returns the summary of a k-means model produced by spark.kmeans(), similarly to R's summary().
+#  Get the summary of a k-means model
 #'
 #' @param object A fitted k-means model
 #' @return \code{summary} returns the model's coefficients, size and cluster
@@ -369,9 +368,7 @@ setMethod("summary", signature(object = "KMeansModel"),
                    cluster = cluster, is.loaded = is.loaded))
           })
 
-#' Predicted values based on a k-means model
-#'
-#' Makes predictions from a k-means model or a model produced by spark.kmeans().
+#  Predicted values based on a k-means model
 #'
 #' @return \code{predict} returns the predicted values based on a k-means model
 #' @rdname spark.kmeans
@@ -491,9 +488,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
             invisible(callJMethod(writer, "save", path))
           })
 
-#' Save fitted MLlib model to the input path
-#'
-#' Save the k-means model to the input path.
+#  Save fitted MLlib model to the input path
 #'
 #' @param path The directory where the model is saved
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE

From 6fa1b60fec4100616d248cc6b63e3f9117ba9cff Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Tue, 28 Jun 2016 09:42:26 -0700
Subject: [PATCH 6/7] [SPARK-16140] add title for 'predict' and 'write.ml'
 generics

---
 R/pkg/R/generics.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 27dfd67ffc93..0e4350f861e4 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1247,6 +1247,7 @@ setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.gl
 #' @export
 setGeneric("glm")
 
+#' predict
 #' @rdname predict
 #' @export
 setGeneric("predict", function(object, ...) { standardGeneric("predict") })
@@ -1271,6 +1272,7 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
 #' @export
 setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })
 
+#' write.ml
 #' @rdname write.ml
 #' @export
 setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })

From 402026c63a05376b495846d864e1414d0989ef0b Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Tue, 28 Jun 2016 14:00:48 -0700
Subject: [PATCH 7/7] [SPARK-16140] add link to generic predict and write.ml
 doc

---
 R/pkg/R/mllib.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index db0d39a00118..7b3b5eaa8210 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -304,7 +304,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' summary(savedModel)
 #' }
 #' @note spark.kmeans since 2.0.0
-#' @seealso \link{read.ml}, \link{write.ml}
+#' @seealso \link{predict}, \link{read.ml}, \link{write.ml}
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) {
             formula <- paste(deparse(formula), collapse = "")