Skip to content

Commit ca5ad48

Browse files
committed
Merge remote-tracking branch 'apache/master' into SPARK-13927
2 parents 3d76781 + 9202479 commit ca5ad48

File tree

855 files changed

+13200
-17564
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

855 files changed

+13200
-17564
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
263263
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
264264
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
265265
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
266-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.9.1 - http://py4j.sourceforge.net/)
266+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.9.2 - http://py4j.sourceforge.net/)
267267
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - https://github.com/szeiger/junit-interface/)
268268
(BSD licence) sbt and sbt-launch-lib.bash
269269
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)

R/pkg/DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ Collate:
1818
'schema.R'
1919
'generics.R'
2020
'jobj.R'
21-
'RDD.R'
22-
'pairRDD.R'
2321
'column.R'
2422
'group.R'
23+
'RDD.R'
24+
'pairRDD.R'
2525
'DataFrame.R'
2626
'SQLContext.R'
2727
'backend.R'
@@ -36,3 +36,4 @@ Collate:
3636
'stats.R'
3737
'types.R'
3838
'utils.R'
39+
RoxygenNote: 5.0.1

R/pkg/R/DataFrame.R

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,28 @@ setMethod("colnames",
303303
#' @rdname columns
304304
#' @name colnames<-
305305
setMethod("colnames<-",
306-
signature(x = "DataFrame", value = "character"),
306+
signature(x = "DataFrame"),
307307
function(x, value) {
308+
309+
# Check parameter integrity
310+
if (class(value) != "character") {
311+
stop("Invalid column names.")
312+
}
313+
314+
if (length(value) != ncol(x)) {
315+
stop(
316+
"Column names must have the same length as the number of columns in the dataset.")
317+
}
318+
319+
if (any(is.na(value))) {
320+
stop("Column names cannot be NA.")
321+
}
322+
323+
# Check if the column names have . in it
324+
if (any(regexec(".", value, fixed = TRUE)[[1]][1] != -1)) {
325+
stop("Colum names cannot contain the '.' symbol.")
326+
}
327+
308328
sdf <- callJMethod(x@sdf, "toDF", as.list(value))
309329
dataFrame(sdf)
310330
})
@@ -331,7 +351,7 @@ setMethod("coltypes",
331351
types <- sapply(dtypes(x), function(x) {x[[2]]})
332352

333353
# Map Spark data types into R's data types using DATA_TYPES environment
334-
rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
354+
rTypes <- sapply(types, USE.NAMES = F, FUN = function(x) {
335355
# Check for primitive types
336356
type <- PRIMITIVE_TYPES[[x]]
337357

@@ -1759,7 +1779,7 @@ setMethod("merge",
17591779
signature(x = "DataFrame", y = "DataFrame"),
17601780
function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by,
17611781
all = FALSE, all.x = all, all.y = all,
1762-
sort = TRUE, suffixes = c("_x","_y"), ... ) {
1782+
sort = TRUE, suffixes = c("_x", "_y"), ... ) {
17631783

17641784
if (length(suffixes) != 2) {
17651785
stop("suffixes must have length 2")
@@ -2279,7 +2299,7 @@ setMethod("as.data.frame",
22792299
function(x, ...) {
22802300
# Check if additional parameters have been passed
22812301
if (length(list(...)) > 0) {
2282-
stop(paste("Unused argument(s): ", paste(list(...), collapse=", ")))
2302+
stop(paste("Unused argument(s): ", paste(list(...), collapse = ", ")))
22832303
}
22842304
collect(x)
22852305
})
@@ -2375,13 +2395,13 @@ setMethod("str",
23752395
# Get the first elements for each column
23762396

23772397
firstElements <- if (types[i] == "character") {
2378-
paste(paste0("\"", localDF[,i], "\""), collapse = " ")
2398+
paste(paste0("\"", localDF[, i], "\""), collapse = " ")
23792399
} else {
2380-
paste(localDF[,i], collapse = " ")
2400+
paste(localDF[, i], collapse = " ")
23812401
}
23822402

23832403
# Add the corresponding number of spaces for alignment
2384-
spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse="")
2404+
spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse = "")
23852405

23862406
# Get the short type. For 'character', it would be 'chr';
23872407
# 'for numeric', it's 'num', etc.
@@ -2393,7 +2413,7 @@ setMethod("str",
23932413
# Concatenate the colnames, coltypes, and first
23942414
# elements of each column
23952415
line <- paste0(" $ ", names[i], spaces, ": ",
2396-
dataType, " ",firstElements)
2416+
dataType, " ", firstElements)
23972417

23982418
# Chop off extra characters if this is too long
23992419
cat(substr(line, 1, MAX_CHAR_PER_ROW))

R/pkg/R/RDD.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
6767

6868
setMethod("show", "RDD",
6969
function(object) {
70-
cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep=""))
70+
cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = ""))
7171
})
7272

7373
setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) {

R/pkg/R/context.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ parallelize <- function(sc, coll, numSlices = 1) {
103103
# TODO: bound/safeguard numSlices
104104
# TODO: unit tests for if the split works for all primitives
105105
# TODO: support matrix, data frame, etc
106+
# nolint start
107+
# suppress lintr warning: Place a space before left parenthesis, except in a function call.
106108
if ((!is.list(coll) && !is.vector(coll)) || is.data.frame(coll)) {
109+
# nolint end
107110
if (is.data.frame(coll)) {
108111
message(paste("context.R: A data frame is parallelized by columns."))
109112
} else {

R/pkg/R/deserialize.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ readMultipleObjects <- function(inputCon) {
186186
# of the objects, so the number of objects varies, we try to read
187187
# all objects in a loop until the end of the stream.
188188
data <- list()
189-
while(TRUE) {
189+
while (TRUE) {
190190
# If reaching the end of the stream, type returned should be "".
191191
type <- readType(inputCon)
192192
if (type == "") {

R/pkg/R/functions.R

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -536,15 +536,27 @@ setMethod("factorial",
536536
#'
537537
#' Aggregate function: returns the first value in a group.
538538
#'
539+
#' The function by default returns the first values it sees. It will return the first non-missing
540+
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
541+
#'
539542
#' @rdname first
540543
#' @name first
541544
#' @family agg_funcs
542545
#' @export
543-
#' @examples \dontrun{first(df$c)}
546+
#' @examples
547+
#' \dontrun{
548+
#' first(df$c)
549+
#' first(df$c, TRUE)
550+
#' }
544551
setMethod("first",
545-
signature(x = "Column"),
546-
function(x) {
547-
jc <- callJStatic("org.apache.spark.sql.functions", "first", x@jc)
552+
signature(x = "characterOrColumn"),
553+
function(x, na.rm = FALSE) {
554+
col <- if (class(x) == "Column") {
555+
x@jc
556+
} else {
557+
x
558+
}
559+
jc <- callJStatic("org.apache.spark.sql.functions", "first", col, na.rm)
548560
column(jc)
549561
})
550562

@@ -663,15 +675,27 @@ setMethod("kurtosis",
663675
#'
664676
#' Aggregate function: returns the last value in a group.
665677
#'
678+
#' The function by default returns the last values it sees. It will return the last non-missing
679+
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
680+
#'
666681
#' @rdname last
667682
#' @name last
668683
#' @family agg_funcs
669684
#' @export
670-
#' @examples \dontrun{last(df$c)}
685+
#' @examples
686+
#' \dontrun{
687+
#' last(df$c)
688+
#' last(df$c, TRUE)
689+
#' }
671690
setMethod("last",
672-
signature(x = "Column"),
673-
function(x) {
674-
jc <- callJStatic("org.apache.spark.sql.functions", "last", x@jc)
691+
signature(x = "characterOrColumn"),
692+
function(x, na.rm = FALSE) {
693+
col <- if (class(x) == "Column") {
694+
x@jc
695+
} else {
696+
x
697+
}
698+
jc <- callJStatic("org.apache.spark.sql.functions", "last", col, na.rm)
675699
column(jc)
676700
})
677701

R/pkg/R/generics.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
8484

8585
# @rdname first
8686
# @export
87-
setGeneric("first", function(x) { standardGeneric("first") })
87+
setGeneric("first", function(x, ...) { standardGeneric("first") })
8888

8989
# @rdname flatMap
9090
# @export
@@ -607,15 +607,15 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr")
607607

608608
#' @rdname showDF
609609
#' @export
610-
setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
610+
setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
611611

612612
# @rdname subset
613613
# @export
614614
setGeneric("subset", function(x, ...) { standardGeneric("subset") })
615615

616616
#' @rdname agg
617617
#' @export
618-
setGeneric("summarize", function(x,...) { standardGeneric("summarize") })
618+
setGeneric("summarize", function(x, ...) { standardGeneric("summarize") })
619619

620620
#' @rdname summary
621621
#' @export
@@ -889,7 +889,7 @@ setGeneric("lag", function(x, ...) { standardGeneric("lag") })
889889

890890
#' @rdname last
891891
#' @export
892-
setGeneric("last", function(x) { standardGeneric("last") })
892+
setGeneric("last", function(x, ...) { standardGeneric("last") })
893893

894894
#' @rdname last_day
895895
#' @export

R/pkg/R/mllib.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "DataFram
5454
function(formula, family = c("gaussian", "binomial"), data, lambda = 0, alpha = 0,
5555
standardize = TRUE, solver = "auto") {
5656
family <- match.arg(family)
57-
formula <- paste(deparse(formula), collapse="")
57+
formula <- paste(deparse(formula), collapse = "")
5858
model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers",
5959
"fitRModelFormula", formula, data@sdf, family, lambda,
6060
alpha, standardize, solver)

R/pkg/R/serialize.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ writeJobj <- function(con, value) {
100100
writeString <- function(con, value) {
101101
utfVal <- enc2utf8(value)
102102
writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))
103-
writeBin(utfVal, con, endian = "big", useBytes=TRUE)
103+
writeBin(utfVal, con, endian = "big", useBytes = TRUE)
104104
}
105105

106106
writeInt <- function(con, value) {

0 commit comments

Comments
 (0)