apache
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/DESCRIPTION‎
Lines changed: 3 additions & 2 deletions b/‎R/pkg/DESCRIPTION‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 28 additions & 8 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 28 additions & 8 deletions
diff --git a/‎R/pkg/R/RDD.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/RDD.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/context.R‎
Lines changed: 3 additions & 0 deletions b/‎R/pkg/R/context.R‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/pkg/R/deserialize.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/deserialize.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 32 additions & 8 deletions b/‎R/pkg/R/functions.R‎
Lines changed: 32 additions & 8 deletions
diff --git a/‎R/pkg/R/generics.R‎
Lines changed: 4 additions & 4 deletions b/‎R/pkg/R/generics.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/pkg/R/mllib.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/mllib.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/serialize.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/serialize.R‎
Lines changed: 1 addition & 1 deletion
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
-     (The New BSD License) Py4J (net.sf.py4j:py4j:0.9.1 - http://py4j.sourceforge.net/)
+     (The New BSD License) Py4J (net.sf.py4j:py4j:0.9.2 - http://py4j.sourceforge.net/)
      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - https://github.com/szeiger/junit-interface/)
      (BSD licence) sbt and sbt-launch-lib.bash
      (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
 
@@ -18,10 +18,10 @@ Collate:
     'schema.R'
     'generics.R'
     'jobj.R'
-    'RDD.R'
-    'pairRDD.R'
     'column.R'
     'group.R'
+    'RDD.R'
+    'pairRDD.R'
     'DataFrame.R'
     'SQLContext.R'
     'backend.R'
@@ -36,3 +36,4 @@ Collate:
     'stats.R'
     'types.R'
     'utils.R'
+RoxygenNote: 5.0.1
@@ -303,8 +303,28 @@ setMethod("colnames",
 #' @rdname columns
 #' @name colnames<-
 setMethod("colnames<-",
-          signature(x = "DataFrame", value = "character"),
+          signature(x = "DataFrame"),
           function(x, value) {
+
+            # Check parameter integrity
+            if (class(value) != "character") {
+              stop("Invalid column names.")
+            }
+
+            if (length(value) != ncol(x)) {
+              stop(
+                "Column names must have the same length as the number of columns in the dataset.")
+            }
+
+            if (any(is.na(value))) {
+              stop("Column names cannot be NA.")
+            }
+
+            # Check if the column names have . in it
+            if (any(regexec(".", value, fixed = TRUE)[[1]][1] != -1)) {
+              stop("Colum names cannot contain the '.' symbol.")
+            }
+
             sdf <- callJMethod(x@sdf, "toDF", as.list(value))
             dataFrame(sdf)
           })
@@ -331,7 +351,7 @@ setMethod("coltypes",
             types <- sapply(dtypes(x), function(x) {x[[2]]})
 
             # Map Spark data types into R's data types using DATA_TYPES environment
-            rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
+            rTypes <- sapply(types, USE.NAMES = F, FUN = function(x) {
               # Check for primitive types
               type <- PRIMITIVE_TYPES[[x]]
 
@@ -1759,7 +1779,7 @@ setMethod("merge",
           signature(x = "DataFrame", y = "DataFrame"),
           function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by,
                    all = FALSE, all.x = all, all.y = all,
-                   sort = TRUE, suffixes = c("_x","_y"), ... ) {
+                   sort = TRUE, suffixes = c("_x", "_y"), ... ) {
 
             if (length(suffixes) != 2) {
               stop("suffixes must have length 2")
@@ -2279,7 +2299,7 @@ setMethod("as.data.frame",
           function(x, ...) {
             # Check if additional parameters have been passed
             if (length(list(...)) > 0) {
-              stop(paste("Unused argument(s): ", paste(list(...), collapse=", ")))
+              stop(paste("Unused argument(s): ", paste(list(...), collapse = ", ")))
             }
             collect(x)
           })
@@ -2375,13 +2395,13 @@ setMethod("str",
                 # Get the first elements for each column
 
                 firstElements <- if (types[i] == "character") {
-                  paste(paste0("\"", localDF[,i], "\""), collapse = " ")
+                  paste(paste0("\"", localDF[, i], "\""), collapse = " ")
                 } else {
-                  paste(localDF[,i], collapse = " ")
+                  paste(localDF[, i], collapse = " ")
                 }
 
                 # Add the corresponding number of spaces for alignment
-                spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse="")
+                spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse = "")
 
                 # Get the short type. For 'character', it would be 'chr';
                 # 'for numeric', it's 'num', etc.
@@ -2393,7 +2413,7 @@ setMethod("str",
                 # Concatenate the colnames, coltypes, and first
                 # elements of each column
                 line <- paste0(" $ ", names[i], spaces, ": ",
-                               dataType, " ",firstElements)
+                               dataType, " ", firstElements)
 
                 # Chop off extra characters if this is too long
                 cat(substr(line, 1, MAX_CHAR_PER_ROW))
 
@@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
 
 setMethod("show", "RDD",
           function(object) {
-              cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep=""))
+              cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = ""))
           })
 
 setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) {
 
@@ -103,7 +103,10 @@ parallelize <- function(sc, coll, numSlices = 1) {
   # TODO: bound/safeguard numSlices
   # TODO: unit tests for if the split works for all primitives
   # TODO: support matrix, data frame, etc
+  # nolint start
+  # suppress lintr warning: Place a space before left parenthesis, except in a function call.
   if ((!is.list(coll) && !is.vector(coll)) || is.data.frame(coll)) {
+  # nolint end
     if (is.data.frame(coll)) {
       message(paste("context.R: A data frame is parallelized by columns."))
     } else {
 
@@ -186,7 +186,7 @@ readMultipleObjects <- function(inputCon) {
   # of the objects, so the number of objects varies, we try to read
   # all objects in a loop until the end of the stream.
   data <- list()
-  while(TRUE) {
+  while (TRUE) {
     # If reaching the end of the stream, type returned should be "".
     type <- readType(inputCon)
     if (type == "") {
 
@@ -536,15 +536,27 @@ setMethod("factorial",
 #'
 #' Aggregate function: returns the first value in a group.
 #'
+#' The function by default returns the first values it sees. It will return the first non-missing
+#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
+#'
 #' @rdname first
 #' @name first
 #' @family agg_funcs
 #' @export
-#' @examples \dontrun{first(df$c)}
+#' @examples
+#' \dontrun{
+#' first(df$c)
+#' first(df$c, TRUE)
+#' }
 setMethod("first",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "first", x@jc)
+          signature(x = "characterOrColumn"),
+          function(x, na.rm = FALSE) {
+            col <- if (class(x) == "Column") {
+              x@jc
+            } else {
+              x
+            }
+            jc <- callJStatic("org.apache.spark.sql.functions", "first", col, na.rm)
             column(jc)
           })
 
@@ -663,15 +675,27 @@ setMethod("kurtosis",
 #'
 #' Aggregate function: returns the last value in a group.
 #'
+#' The function by default returns the last values it sees. It will return the last non-missing
+#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
+#'
 #' @rdname last
 #' @name last
 #' @family agg_funcs
 #' @export
-#' @examples \dontrun{last(df$c)}
+#' @examples
+#' \dontrun{
+#' last(df$c)
+#' last(df$c, TRUE)
+#' }
 setMethod("last",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "last", x@jc)
+          signature(x = "characterOrColumn"),
+          function(x, na.rm = FALSE) {
+            col <- if (class(x) == "Column") {
+              x@jc
+            } else {
+              x
+            }
+            jc <- callJStatic("org.apache.spark.sql.functions", "last", col, na.rm)
             column(jc)
           })
 
 
@@ -84,7 +84,7 @@ setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
 
 # @rdname first
 # @export
-setGeneric("first", function(x) { standardGeneric("first") })
+setGeneric("first", function(x, ...) { standardGeneric("first") })
 
 # @rdname flatMap
 # @export
@@ -607,15 +607,15 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr")
 
 #' @rdname showDF
 #' @export
-setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
+setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
 
 # @rdname subset
 # @export
 setGeneric("subset", function(x, ...) { standardGeneric("subset") })
 
 #' @rdname agg
 #' @export
-setGeneric("summarize", function(x,...) { standardGeneric("summarize") })
+setGeneric("summarize", function(x, ...) { standardGeneric("summarize") })
 
 #' @rdname summary
 #' @export
@@ -889,7 +889,7 @@ setGeneric("lag", function(x, ...) { standardGeneric("lag") })
 
 #' @rdname last
 #' @export
-setGeneric("last", function(x) { standardGeneric("last") })
+setGeneric("last", function(x, ...) { standardGeneric("last") })
 
 #' @rdname last_day
 #' @export
 
@@ -54,7 +54,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "DataFram
           function(formula, family = c("gaussian", "binomial"), data, lambda = 0, alpha = 0,
             standardize = TRUE, solver = "auto") {
             family <- match.arg(family)
-            formula <- paste(deparse(formula), collapse="")
+            formula <- paste(deparse(formula), collapse = "")
             model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers",
                                  "fitRModelFormula", formula, data@sdf, family, lambda,
                                  alpha, standardize, solver)
 
@@ -100,7 +100,7 @@ writeJobj <- function(con, value) {
 writeString <- function(con, value) {
   utfVal <- enc2utf8(value)
   writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))
-  writeBin(utfVal, con, endian = "big", useBytes=TRUE)
+  writeBin(utfVal, con, endian = "big", useBytes = TRUE)
 }
 
 writeInt <- function(con, value) {
Original file line number	Diff line number	Diff line change
`@@ -100,7 +100,7 @@ writeJobj <- function(con, value) {`
`100`	`100`	`writeString <- function(con, value) {`
`101`	`101`	`utfVal <- enc2utf8(value)`
`102`	`102`	`writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))`
`103`		`- writeBin(utfVal, con, endian = "big", useBytes=TRUE)`
	`103`	`+ writeBin(utfVal, con, endian = "big", useBytes = TRUE)`
`104`	`104`	`}`
`105`	`105`
`106`	`106`	`writeInt <- function(con, value) {`