Skip to content
63 changes: 31 additions & 32 deletions R/pkg/R/mllib.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,9 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
return(list(apriori = apriori, tables = tables))
})

#' Fit a k-means model
#' K-Means Model

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

K-Means Clustering Model

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry I actually meant the description could be more like plain sentences, rather than bullet points.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears as the title, like in Generalized Linear Models.

#'
#' Fit a k-means model, similarly to R's kmeans().
#' Fits a k-means model, similarly to R's kmeans().
#'
#' @param data SparkDataFrame for training
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
Expand All @@ -277,14 +277,32 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
#' @param k Number of centers
#' @param maxIter Maximum iteration number
#' @param initMode The initialization algorithm choosen to fit the model
#' @return A fitted k-means model
#' @return \code{spark.kmeans} returns a fitted k-means model
#' @rdname spark.kmeans
#' @name spark.kmeans
#' @export
#' @examples
#' \dontrun{
#' model <- spark.kmeans(data, ~ ., k = 4, initMode = "random")
#' sparkR.session()
#' data(iris)
#' df <- createDataFrame(iris)
#' model <- spark.kmeans(df, Sepal_Length ~ Sepal_Width, k = 4, initMode = "random")
#' summary(model)
#'
#' # fitted values on training data
#' fitted <- predict(model, df)
#' head(select(fitted, "Sepal_Length", "prediction"))
#'
#' # save fitted model to input path
#' path <- "path/to/model"
#' write.ml(model, path)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi @junyangq , isn't it write.ml (KM) you are talking about?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, can you modify the doc of write.ml method for KM to bring it to the same page as other KM methods?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh sorry I see what you mean now, you mean bring it to the Usage section of this page, right? I'll try to do it

#'
#' # can also read back the saved model and print
#' savedModel <- read.ml(path)
#' summary(savedModel)
#' }
#' @note spark.kmeans since 2.0.0
#' @seealso \link{kmeans}, \link{read.ml}
setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"),
function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) {
formula <- paste(deparse(formula), collapse = "")
Expand All @@ -300,7 +318,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"
#' Note: A saved-loaded model does not support this method.
#'
#' @param object A fitted k-means model
#' @return SparkDataFrame containing fitted values
#' @return \code{fitted} returns a SparkDataFrame containing fitted values
#' @rdname fitted
#' @export
#' @examples
Expand All @@ -324,18 +342,12 @@ setMethod("fitted", signature(object = "KMeansModel"),

#' Get the summary of a k-means model
#'
#' Returns the summary of a k-means model produced by spark.kmeans(),
#' similarly to R's summary().
#' Returns the summary of a k-means model produced by spark.kmeans(), similarly to R's summary().
#'
#' @param object a fitted k-means model
#' @return the model's coefficients, size and cluster
#' @rdname summary
#' @param object A fitted k-means model
#' @return \code{summary} returns the model's coefficients, size and cluster
#' @rdname spark.kmeans
#' @export
#' @examples
#' \dontrun{
#' model <- spark.kmeans(trainingData, ~ ., 2)
#' summary(model)
#' }
#' @note summary(KMeansModel) since 2.0.0
setMethod("summary", signature(object = "KMeansModel"),
function(object, ...) {
Expand All @@ -357,19 +369,13 @@ setMethod("summary", signature(object = "KMeansModel"),
cluster = cluster, is.loaded = is.loaded))
})

#' Predicted values based on model
#' Predicted values based on a k-means model
#'
#' Makes predictions from a k-means model or a model produced by spark.kmeans().
#'
#' @param object A fitted k-means model
#' @rdname predict
#' @return \code{predict} returns the predicted values based on a k-means model
#' @rdname spark.kmeans
#' @export
#' @examples
#' \dontrun{
#' model <- spark.kmeans(trainingData, ~ ., 2)
#' predicted <- predict(model, testData)
#' showDF(predicted)
#' }
#' @note predict(KMeansModel) since 2.0.0
setMethod("predict", signature(object = "KMeansModel"),
function(object, newData) {
Expand Down Expand Up @@ -489,20 +495,13 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
#'
#' Save the k-means model to the input path.
#'
#' @param object A fitted k-means model
#' @param path The directory where the model is saved
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname write.ml
#' @rdname spark.kmeans
#' @name write.ml
#' @export
#' @examples
#' \dontrun{
#' model <- spark.kmeans(trainingData, ~ ., k = 2)
#' path <- "path/to/model"
#' write.ml(model, path)
#' }
#' @note write.ml(KMeansModel, character) since 2.0.0
setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
function(object, path, overwrite = FALSE) {
Expand Down