[SPARK-22920][SPARKR] sql functions for current_date, current_timestamp, rtrim/ltrim/trim with trimString

felixcheung · Felix Cheung · commit 66a7d6b30fe5 · 2017-12-29T10:51:43.000-08:00
## What changes were proposed in this pull request? Add sql functions ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #20105 from felixcheung/rsqlfuncs.
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -59,3 +59,4 @@ Collate:
     'window.R'
 RoxygenNote: 5.0.1
 VignetteBuilder: knitr
+NeedsCompilation: no
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -228,6 +228,8 @@ exportMethods("%<=>%",
               "crc32",
               "create_array",
               "create_map",
+              "current_date",
+              "current_timestamp",
               "hash",
               "cume_dist",
               "date_add",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -39,7 +39,8 @@ NULL
 #' Date time functions defined for \code{Column}.
 #'
 #' @param x Column to compute on. In \code{window}, it must be a time Column of
-#'          \code{TimestampType}.
+#'          \code{TimestampType}. This is not used with \code{current_date} and
+#'          \code{current_timestamp}
 #' @param format The format for the given dates or timestamps in Column \code{x}. See the
 #'               format used in the following methods:
 #'               \itemize{
@@ -1109,10 +1110,11 @@ setMethod("lower",
           })
 
 #' @details
-#' \code{ltrim}: Trims the spaces from left end for the specified string value.
+#' \code{ltrim}: Trims the spaces from left end for the specified string value. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases ltrim ltrim,Column-method
+#' @aliases ltrim ltrim,Column,missing-method
 #' @export
 #' @examples
 #'
@@ -1128,12 +1130,24 @@ setMethod("lower",
 #' head(tmp)}
 #' @note ltrim since 1.5.0
 setMethod("ltrim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc)
             column(jc)
           })
 
+#' @param trimString a character string to trim with
+#' @rdname column_string_functions
+#' @aliases ltrim,Column,character-method
+#' @export
+#' @note ltrim(Column, character) since 2.3.0
+setMethod("ltrim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc, trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{max}: Returns the maximum value of the expression in a group.
 #'
@@ -1348,19 +1362,31 @@ setMethod("bround",
           })
 
 #' @details
-#' \code{rtrim}: Trims the spaces from right end for the specified string value.
+#' \code{rtrim}: Trims the spaces from right end for the specified string value. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases rtrim rtrim,Column-method
+#' @aliases rtrim rtrim,Column,missing-method
 #' @export
 #' @note rtrim since 1.5.0
 setMethod("rtrim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc)
             column(jc)
           })
 
+#' @rdname column_string_functions
+#' @aliases rtrim,Column,character-method
+#' @export
+#' @note rtrim(Column, character) since 2.3.0
+setMethod("rtrim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc, trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{sd}: Alias for \code{stddev_samp}.
 #'
@@ -1789,19 +1815,31 @@ setMethod("to_timestamp",
           })
 
 #' @details
-#' \code{trim}: Trims the spaces from both ends for the specified string column.
+#' \code{trim}: Trims the spaces from both ends for the specified string column. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases trim trim,Column-method
+#' @aliases trim trim,Column,missing-method
 #' @export
 #' @note trim since 1.5.0
 setMethod("trim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc)
             column(jc)
           })
 
+#' @rdname column_string_functions
+#' @aliases trim,Column,character-method
+#' @export
+#' @note trim(Column, character) since 2.3.0
+setMethod("trim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc, trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{unbase64}: Decodes a BASE64 encoded string column and returns it as a binary column.
 #' This is the reverse of base64.
@@ -2777,11 +2815,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
           })
 
 #' @details
-#' \code{substring_index}: Returns the substring from string str before count occurrences of
-#' the delimiter delim. If count is positive, everything the left of the final delimiter
-#' (counting from left) is returned. If count is negative, every to the right of the final
-#' delimiter (counting from the right) is returned. substring_index performs a case-sensitive
-#' match when searching for delim.
+#' \code{substring_index}: Returns the substring from string (\code{x}) before \code{count}
+#' occurrences of the delimiter (\code{delim}). If \code{count} is positive, everything the left of
+#' the final delimiter (counting from left) is returned. If \code{count} is negative, every to the
+#' right of the final delimiter (counting from the right) is returned. \code{substring_index}
+#' performs a case-sensitive match when searching for the delimiter.
 #'
 #' @param delim a delimiter string.
 #' @param count number of occurrences of \code{delim} before the substring is returned.
@@ -3504,3 +3542,34 @@ setMethod("date_trunc",
             jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", format, x@jc)
             column(jc)
           })
+
+#' @details
+#' \code{current_date}: Returns the current date as a date column.
+#'
+#' @rdname column_datetime_functions
+#' @aliases current_date current_date,missing-method
+#' @export
+#' @examples
+#' \dontrun{
+#' head(select(df, current_date(), current_timestamp()))}
+#' @note current_date since 2.3.0
+setMethod("current_date",
+          signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "current_date")
+            column(jc)
+          })
+
+#' @details
+#' \code{current_timestamp}: Returns the current timestamp as a timestamp column.
+#'
+#' @rdname column_datetime_functions
+#' @aliases current_timestamp current_timestamp,missing-method
+#' @export
+#' @note current_timestamp since 2.3.0
+setMethod("current_timestamp",
+          signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "current_timestamp")
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -1027,6 +1027,17 @@ setGeneric("hash", function(x, ...) { standardGeneric("hash") })
 #' @name NULL
 setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") })
 
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("current_date", function(x = "missing") { standardGeneric("current_date") })
+
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") })
+
+
 #' @rdname column_datetime_diff_functions
 #' @export
 #' @name NULL
@@ -1230,7 +1241,7 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
+setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
 
 #' @rdname column_collection_functions
 #' @export
@@ -1380,7 +1391,7 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
+setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
 
 #' @rdname column_aggregate_functions
 #' @export
@@ -1520,7 +1531,7 @@ setGeneric("translate", function(x, matchingString, replaceString) { standardGen
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("trim", function(x) { standardGeneric("trim") })
+setGeneric("trim", function(x, trimString) { standardGeneric("trim") })
 
 #' @rdname column_string_functions
 #' @export
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1427,7 +1427,7 @@ test_that("column functions", {
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
   c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
-  c12 <- variance(c)
+  c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
   c14 <- cume_dist() + ntile(1) + corr(c, c1)
   c15 <- dense_rank() + percent_rank() + rank() + row_number()
@@ -1441,7 +1441,7 @@ test_that("column functions", {
   c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
     trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
   c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) +
-    date_trunc("quarter", c)
+    date_trunc("quarter", c) + current_date() + current_timestamp()
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))