-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-16107] [R] group glm methods in documentation #13820
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj")) | |
| #' @note KMeansModel since 2.0.0 | ||
| setClass("KMeansModel", representation(jobj = "jobj")) | ||
|
|
||
| #' Fits a generalized linear model | ||
| #' Generalized Linear Models | ||
| #' | ||
| #' Fits a generalized linear model against a Spark DataFrame. | ||
| #' Fit generalized linear model against a Spark DataFrame. Can print, make predictions on the | ||
| #' produced model and save the model to the input path. | ||
| #' | ||
| #' @param data SparkDataFrame for training. | ||
| #' @param formula A symbolic description of the model to be fitted. Currently only a few formula | ||
|
|
@@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj")) | |
| #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. | ||
| #' @param tol Positive convergence tolerance of iterations. | ||
| #' @param maxIter Integer giving the maximal number of IRLS iterations. | ||
| #' @return a fitted generalized linear model | ||
| #' @return \code{spark.glm} returns a fitted generalized linear model | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since this is the page for
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd view |
||
| #' @rdname spark.glm | ||
| #' @name spark.glm | ||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
|
|
@@ -76,7 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj")) | |
| #' df <- createDataFrame(iris) | ||
| #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian") | ||
| #' summary(model) | ||
| #' | ||
| #' # fitted values on training data | ||
| #' fitted <- predict(model, df) | ||
| #' head(select(fitted, "Sepal_Length", "prediction")) | ||
| #' | ||
| #' # save fitted model to input path | ||
| #' path <- "path/to/model" | ||
| #' write.ml(model, path) | ||
| #' | ||
| #' # can also read back the saved model and print | ||
| #' savedModel <- read.ml(path) | ||
| #' summary(savedModel) | ||
| #' } | ||
|
|
||
|
|
||
|
||
| #' @note spark.glm since 2.0.0 | ||
| setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | ||
| function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) { | ||
|
|
@@ -99,10 +115,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
| return(new("GeneralizedLinearRegressionModel", jobj = jobj)) | ||
| }) | ||
|
|
||
| #' Fits a generalized linear model (R-compliant). | ||
| #' | ||
| #' Fits a generalized linear model, similarly to R's glm(). | ||
| #' | ||
| #' @title Fit a generalized linear model | ||
|
||
| #' @param formula A symbolic description of the model to be fitted. Currently only a few formula | ||
| #' operators are supported, including '~', '.', ':', '+', and '-'. | ||
| #' @param data SparkDataFrame for training. | ||
|
|
@@ -112,36 +125,23 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), | |
| #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please put a see also and link it to |
||
| #' @param epsilon Positive convergence tolerance of iterations. | ||
| #' @param maxit Integer giving the maximal number of IRLS iterations. | ||
| #' @return a fitted generalized linear model | ||
| #' @return \code{spark.glm} returns a fitted generalized linear model. | ||
|
||
| #' @rdname glm | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is fine to put |
||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' sparkR.session() | ||
| #' data(iris) | ||
| #' df <- createDataFrame(iris) | ||
| #' model <- glm(Sepal_Length ~ Sepal_Width, df, family = "gaussian") | ||
| #' summary(model) | ||
| #' } | ||
| #' @note glm since 1.5.0 | ||
| setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), | ||
| function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) { | ||
| spark.glm(data, formula, family, tol = epsilon, maxIter = maxit) | ||
| }) | ||
|
|
||
| #' Get the summary of a generalized linear model | ||
| #' | ||
| #' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary(). | ||
| #' | ||
| #' @title Return a summary of the produced generalized linear model | ||
|
||
| #' @param object A fitted generalized linear model | ||
| #' @return coefficients the model's coefficients, intercept | ||
| #' @rdname summary | ||
| #' @return \code{summary} returns a summary object of the fitted model, a list of components | ||
| #' including at least the coefficients, null/residual deviance, null/residual degrees | ||
| #' of freedom, AIC and number of iterations IRLS takes. | ||
| #' | ||
| #' @rdname spark.glm | ||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' model <- glm(y ~ x, trainingData) | ||
| #' summary(model) | ||
| #' } | ||
| #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 | ||
| setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), | ||
| function(object, ...) { | ||
|
|
@@ -173,10 +173,9 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), | |
| return(ans) | ||
| }) | ||
|
|
||
| #' Print the summary of GeneralizedLinearRegressionModel | ||
| #' | ||
| #' @rdname print | ||
| #' @name print.summary.GeneralizedLinearRegressionModel | ||
| #' @title Print the summary of the produced generalized linear model | ||
| #' @rdname spark.glm | ||
| #' @param x Summary object of fitted generalized linear model returned by \code{summary} function | ||
| #' @export | ||
| #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 | ||
| print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { | ||
|
|
@@ -205,22 +204,11 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { | |
| invisible(x) | ||
| } | ||
|
|
||
| #' Predicted values based on model | ||
| #' | ||
| #' Makes predictions from a generalized linear model produced by glm() or spark.glm(), | ||
| #' similarly to R's predict(). | ||
| #' | ||
| #' @param object A fitted generalized linear model | ||
| #' @title Make predictions using the produced generalized linear model | ||
| #' @param newData SparkDataFrame for testing | ||
| #' @return SparkDataFrame containing predicted labels in a column named "prediction" | ||
| #' @rdname predict | ||
| #' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction" | ||
| #' @rdname spark.glm | ||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' model <- glm(y ~ x, trainingData) | ||
| #' predicted <- predict(model, testData) | ||
| #' showDF(predicted) | ||
| #' } | ||
| #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 | ||
| setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), | ||
| function(object, newData) { | ||
|
|
@@ -471,25 +459,16 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c | |
| invisible(callJMethod(writer, "save", path)) | ||
| }) | ||
|
|
||
| #' Save fitted MLlib model to the input path | ||
| #' | ||
| #' Save the generalized linear model to the input path. | ||
| #' @title Save fitted generalized linear model to the input path | ||
| #' | ||
| #' @param object A fitted generalized linear model | ||
| #' @param path The directory where the model is saved | ||
| #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE | ||
| #' which means throw exception if the output path exists. | ||
| #' | ||
| #' @rdname write.ml | ||
| #' @name write.ml | ||
| #' @rdname spark.glm | ||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' model <- glm(y ~ x, trainingData) | ||
| #' path <- "path/to/model" | ||
| #' write.ml(model, path) | ||
| #' } | ||
| #' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0 | ||
| #' @seealso \link{read.ml} | ||
| setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"), | ||
| function(object, path, overwrite = FALSE) { | ||
| writer <- callJMethod(object@jobj, "write") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fit->FitsCan print->Users can print