@@ -65,8 +65,6 @@ setClass("IsotonicRegressionModel", representation(jobj = "jobj"))
6565# ' @param maxIter integer giving the maximal number of IRLS iterations.
6666# ' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
6767# ' weights as 1.0.
68- # ' @param offsetCol the offset column name. If this is not set or empty, we treat all instance offsets
69- # ' as 0.0. The feature specified as offset has a constant coefficient of 1.0.
7068# ' @param regParam regularization parameter for L2 regularization.
7169# ' @param var.power the power in the variance function of the Tweedie distribution which provides
7270# ' the relationship between the variance and mean of the distribution. Only
@@ -78,6 +76,8 @@ setClass("IsotonicRegressionModel", representation(jobj = "jobj"))
7876# ' "frequencyDesc", "frequencyAsc", "alphabetDesc", and "alphabetAsc".
7977# ' The default value is "frequencyDesc". When the ordering is set to
8078# ' "alphabetDesc", this drops the same category as R when encoding strings.
79+ # ' @param offsetCol the offset column name. If this is not set or empty, we treat all instance offsets
80+ # ' as 0.0. The feature specified as offset has a constant coefficient of 1.0.
8181# ' @param ... additional arguments passed to the method.
8282# ' @aliases spark.glm,SparkDataFrame,formula-method
8383# ' @return \code{spark.glm} returns a fitted generalized linear model.
@@ -127,9 +127,10 @@ setClass("IsotonicRegressionModel", representation(jobj = "jobj"))
127127# ' @seealso \link{glm}, \link{read.ml}
128128setMethod ("spark.glm ", signature(data = "SparkDataFrame", formula = "formula"),
129129 function (data , formula , family = gaussian , tol = 1e-6 , maxIter = 25 , weightCol = NULL ,
130- offsetCol = NULL , regParam = 0.0 , var.power = 0.0 , link.power = 1.0 - var.power ,
130+ regParam = 0.0 , var.power = 0.0 , link.power = 1.0 - var.power ,
131131 stringIndexerOrderType = c(" frequencyDesc" , " frequencyAsc" ,
132- " alphabetDesc" , " alphabetAsc" )) {
132+ " alphabetDesc" , " alphabetAsc" ),
133+ offsetCol = NULL ) {
133134
134135 stringIndexerOrderType <- match.arg(stringIndexerOrderType )
135136 if (is.character(family )) {
@@ -161,18 +162,19 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
161162 weightCol <- as.character(weightCol )
162163 }
163164
164- if (! is.null(offsetCol ) && offsetCol == " " ) {
165- offsetCol <- NULL
166- } else if (! is.null(offsetCol )) {
165+ if (! is.null(offsetCol )) {
167166 offsetCol <- as.character(offsetCol )
167+ if (nchar(offsetCol ) == 0 ) {
168+ offsetCol <- NULL
169+ }
168170 }
169171
170172 # For known families, Gamma is upper-cased
171173 jobj <- callJStatic(" org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper" ,
172174 " fit" , formula , data @ sdf , tolower(family $ family ), family $ link ,
173- tol , as.integer(maxIter ), weightCol , offsetCol , regParam ,
175+ tol , as.integer(maxIter ), weightCol , regParam ,
174176 as.double(var.power ), as.double(link.power ),
175- stringIndexerOrderType )
177+ stringIndexerOrderType , offsetCol )
176178 new(" GeneralizedLinearRegressionModel" , jobj = jobj )
177179 })
178180
@@ -190,8 +192,6 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
190192# ' \code{poisson}, \code{Gamma}, and \code{tweedie}.
191193# ' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
192194# ' weights as 1.0.
193- # ' @param offsetCol the offset column name. If this is not set or empty, we treat all instance offsets
194- # ' as 0.0. The feature specified as offset has a constant coefficient of 1.0.
195195# ' @param epsilon positive convergence tolerance of iterations.
196196# ' @param maxit integer giving the maximal number of IRLS iterations.
197197# ' @param var.power the index of the power variance function in the Tweedie family.
@@ -202,6 +202,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
202202# ' "frequencyDesc", "frequencyAsc", "alphabetDesc", and "alphabetAsc".
203203# ' The default value is "frequencyDesc". When the ordering is set to
204204# ' "alphabetDesc", this drops the same category as R when encoding strings.
205+ # ' @param offsetCol the offset column name. If this is not set or empty, we treat all instance offsets
206+ # ' as 0.0. The feature specified as offset has a constant coefficient of 1.0.
205207# ' @return \code{glm} returns a fitted generalized linear model.
206208# ' @rdname glm
207209# ' @export
@@ -217,12 +219,14 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
217219# ' @seealso \link{spark.glm}
218220setMethod ("glm ", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
219221 function (formula , family = gaussian , data , epsilon = 1e-6 , maxit = 25 , weightCol = NULL ,
220- offsetCol = NULL , var.power = 0.0 , link.power = 1.0 - var.power ,
222+ var.power = 0.0 , link.power = 1.0 - var.power ,
221223 stringIndexerOrderType = c(" frequencyDesc" , " frequencyAsc" ,
222- " alphabetDesc" , " alphabetAsc" )) {
224+ " alphabetDesc" , " alphabetAsc" ),
225+ offsetCol = NULL ) {
223226 spark.glm(data , formula , family , tol = epsilon , maxIter = maxit , weightCol = weightCol ,
224- offsetCol = offsetCol , var.power = var.power , link.power = link.power ,
225- stringIndexerOrderType = stringIndexerOrderType )
227+ var.power = var.power , link.power = link.power ,
228+ stringIndexerOrderType = stringIndexerOrderType ,
229+ offsetCol = offsetCol )
226230 })
227231
228232# Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
0 commit comments