Skip to content

Commit 66a7d6b

Browse files
felixcheungFelix Cheung
authored andcommitted
[SPARK-22920][SPARKR] sql functions for current_date, current_timestamp, rtrim/ltrim/trim with trimString
## What changes were proposed in this pull request? Add sql functions ## How was this patch tested? manual, unit tests Author: Felix Cheung <[email protected]> Closes #20105 from felixcheung/rsqlfuncs.
1 parent afc3641 commit 66a7d6b

File tree

5 files changed

+106
-23
lines changed

5 files changed

+106
-23
lines changed

R/pkg/DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,4 @@ Collate:
5959
'window.R'
6060
RoxygenNote: 5.0.1
6161
VignetteBuilder: knitr
62+
NeedsCompilation: no

R/pkg/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ exportMethods("%<=>%",
228228
"crc32",
229229
"create_array",
230230
"create_map",
231+
"current_date",
232+
"current_timestamp",
231233
"hash",
232234
"cume_dist",
233235
"date_add",

R/pkg/R/functions.R

Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ NULL
3939
#' Date time functions defined for \code{Column}.
4040
#'
4141
#' @param x Column to compute on. In \code{window}, it must be a time Column of
42-
#' \code{TimestampType}.
42+
#' \code{TimestampType}. This is not used with \code{current_date} and
43+
#' \code{current_timestamp}
4344
#' @param format The format for the given dates or timestamps in Column \code{x}. See the
4445
#' format used in the following methods:
4546
#' \itemize{
@@ -1109,10 +1110,11 @@ setMethod("lower",
11091110
})
11101111

11111112
#' @details
1112-
#' \code{ltrim}: Trims the spaces from left end for the specified string value.
1113+
#' \code{ltrim}: Trims the spaces from left end for the specified string value. Optionally a
1114+
#' \code{trimString} can be specified.
11131115
#'
11141116
#' @rdname column_string_functions
1115-
#' @aliases ltrim ltrim,Column-method
1117+
#' @aliases ltrim ltrim,Column,missing-method
11161118
#' @export
11171119
#' @examples
11181120
#'
@@ -1128,12 +1130,24 @@ setMethod("lower",
11281130
#' head(tmp)}
11291131
#' @note ltrim since 1.5.0
11301132
setMethod("ltrim",
1131-
signature(x = "Column"),
1132-
function(x) {
1133+
signature(x = "Column", trimString = "missing"),
1134+
function(x, trimString) {
11331135
jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc)
11341136
column(jc)
11351137
})
11361138

1139+
#' @param trimString a character string to trim with
1140+
#' @rdname column_string_functions
1141+
#' @aliases ltrim,Column,character-method
1142+
#' @export
1143+
#' @note ltrim(Column, character) since 2.3.0
1144+
setMethod("ltrim",
1145+
signature(x = "Column", trimString = "character"),
1146+
function(x, trimString) {
1147+
jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc, trimString)
1148+
column(jc)
1149+
})
1150+
11371151
#' @details
11381152
#' \code{max}: Returns the maximum value of the expression in a group.
11391153
#'
@@ -1348,19 +1362,31 @@ setMethod("bround",
13481362
})
13491363

13501364
#' @details
1351-
#' \code{rtrim}: Trims the spaces from right end for the specified string value.
1365+
#' \code{rtrim}: Trims the spaces from right end for the specified string value. Optionally a
1366+
#' \code{trimString} can be specified.
13521367
#'
13531368
#' @rdname column_string_functions
1354-
#' @aliases rtrim rtrim,Column-method
1369+
#' @aliases rtrim rtrim,Column,missing-method
13551370
#' @export
13561371
#' @note rtrim since 1.5.0
13571372
setMethod("rtrim",
1358-
signature(x = "Column"),
1359-
function(x) {
1373+
signature(x = "Column", trimString = "missing"),
1374+
function(x, trimString) {
13601375
jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc)
13611376
column(jc)
13621377
})
13631378

1379+
#' @rdname column_string_functions
1380+
#' @aliases rtrim,Column,character-method
1381+
#' @export
1382+
#' @note rtrim(Column, character) since 2.3.0
1383+
setMethod("rtrim",
1384+
signature(x = "Column", trimString = "character"),
1385+
function(x, trimString) {
1386+
jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc, trimString)
1387+
column(jc)
1388+
})
1389+
13641390
#' @details
13651391
#' \code{sd}: Alias for \code{stddev_samp}.
13661392
#'
@@ -1789,19 +1815,31 @@ setMethod("to_timestamp",
17891815
})
17901816

17911817
#' @details
1792-
#' \code{trim}: Trims the spaces from both ends for the specified string column.
1818+
#' \code{trim}: Trims the spaces from both ends for the specified string column. Optionally a
1819+
#' \code{trimString} can be specified.
17931820
#'
17941821
#' @rdname column_string_functions
1795-
#' @aliases trim trim,Column-method
1822+
#' @aliases trim trim,Column,missing-method
17961823
#' @export
17971824
#' @note trim since 1.5.0
17981825
setMethod("trim",
1799-
signature(x = "Column"),
1800-
function(x) {
1826+
signature(x = "Column", trimString = "missing"),
1827+
function(x, trimString) {
18011828
jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc)
18021829
column(jc)
18031830
})
18041831

1832+
#' @rdname column_string_functions
1833+
#' @aliases trim,Column,character-method
1834+
#' @export
1835+
#' @note trim(Column, character) since 2.3.0
1836+
setMethod("trim",
1837+
signature(x = "Column", trimString = "character"),
1838+
function(x, trimString) {
1839+
jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc, trimString)
1840+
column(jc)
1841+
})
1842+
18051843
#' @details
18061844
#' \code{unbase64}: Decodes a BASE64 encoded string column and returns it as a binary column.
18071845
#' This is the reverse of base64.
@@ -2777,11 +2815,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
27772815
})
27782816

27792817
#' @details
2780-
#' \code{substring_index}: Returns the substring from string str before count occurrences of
2781-
#' the delimiter delim. If count is positive, everything the left of the final delimiter
2782-
#' (counting from left) is returned. If count is negative, every to the right of the final
2783-
#' delimiter (counting from the right) is returned. substring_index performs a case-sensitive
2784-
#' match when searching for delim.
2818+
#' \code{substring_index}: Returns the substring from string (\code{x}) before \code{count}
2819+
#' occurrences of the delimiter (\code{delim}). If \code{count} is positive, everything the left of
2820+
#' the final delimiter (counting from left) is returned. If \code{count} is negative, every to the
2821+
#' right of the final delimiter (counting from the right) is returned. \code{substring_index}
2822+
#' performs a case-sensitive match when searching for the delimiter.
27852823
#'
27862824
#' @param delim a delimiter string.
27872825
#' @param count number of occurrences of \code{delim} before the substring is returned.
@@ -3504,3 +3542,34 @@ setMethod("date_trunc",
35043542
jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", format, x@jc)
35053543
column(jc)
35063544
})
3545+
3546+
#' @details
3547+
#' \code{current_date}: Returns the current date as a date column.
3548+
#'
3549+
#' @rdname column_datetime_functions
3550+
#' @aliases current_date current_date,missing-method
3551+
#' @export
3552+
#' @examples
3553+
#' \dontrun{
3554+
#' head(select(df, current_date(), current_timestamp()))}
3555+
#' @note current_date since 2.3.0
3556+
setMethod("current_date",
3557+
signature("missing"),
3558+
function() {
3559+
jc <- callJStatic("org.apache.spark.sql.functions", "current_date")
3560+
column(jc)
3561+
})
3562+
3563+
#' @details
3564+
#' \code{current_timestamp}: Returns the current timestamp as a timestamp column.
3565+
#'
3566+
#' @rdname column_datetime_functions
3567+
#' @aliases current_timestamp current_timestamp,missing-method
3568+
#' @export
3569+
#' @note current_timestamp since 2.3.0
3570+
setMethod("current_timestamp",
3571+
signature("missing"),
3572+
function() {
3573+
jc <- callJStatic("org.apache.spark.sql.functions", "current_timestamp")
3574+
column(jc)
3575+
})

R/pkg/R/generics.R

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,17 @@ setGeneric("hash", function(x, ...) { standardGeneric("hash") })
10271027
#' @name NULL
10281028
setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") })
10291029

1030+
#' @rdname column_datetime_functions
1031+
#' @export
1032+
#' @name NULL
1033+
setGeneric("current_date", function(x = "missing") { standardGeneric("current_date") })
1034+
1035+
#' @rdname column_datetime_functions
1036+
#' @export
1037+
#' @name NULL
1038+
setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") })
1039+
1040+
10301041
#' @rdname column_datetime_diff_functions
10311042
#' @export
10321043
#' @name NULL
@@ -1230,7 +1241,7 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
12301241
#' @rdname column_string_functions
12311242
#' @export
12321243
#' @name NULL
1233-
setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
1244+
setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
12341245

12351246
#' @rdname column_collection_functions
12361247
#' @export
@@ -1380,7 +1391,7 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
13801391
#' @rdname column_string_functions
13811392
#' @export
13821393
#' @name NULL
1383-
setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
1394+
setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
13841395

13851396
#' @rdname column_aggregate_functions
13861397
#' @export
@@ -1520,7 +1531,7 @@ setGeneric("translate", function(x, matchingString, replaceString) { standardGen
15201531
#' @rdname column_string_functions
15211532
#' @export
15221533
#' @name NULL
1523-
setGeneric("trim", function(x) { standardGeneric("trim") })
1534+
setGeneric("trim", function(x, trimString) { standardGeneric("trim") })
15241535

15251536
#' @rdname column_string_functions
15261537
#' @export

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,7 +1427,7 @@ test_that("column functions", {
14271427
c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
14281428
c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
14291429
c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
1430-
c12 <- variance(c)
1430+
c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
14311431
c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
14321432
c14 <- cume_dist() + ntile(1) + corr(c, c1)
14331433
c15 <- dense_rank() + percent_rank() + rank() + row_number()
@@ -1441,7 +1441,7 @@ test_that("column functions", {
14411441
c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
14421442
trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
14431443
c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) +
1444-
date_trunc("quarter", c)
1444+
date_trunc("quarter", c) + current_date() + current_timestamp()
14451445

14461446
# Test if base::is.nan() is exposed
14471447
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

0 commit comments

Comments
 (0)