diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 5a22c970965..2904abb30fe 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -168,3 +168,23 @@ register_bindings_datetime <- function() { build_expr("cast", x, options = list(to_type = date32())) }) } + +binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) { + if (usetz) { + format <- paste(format, "%Z") + } + + if (call_binding("is.POSIXct", x)) { + # the casting part might not be required once + # https://issues.apache.org/jira/browse/ARROW-14442 is solved + # TODO revisit the steps below once the PR for that issue is merged + if (tz == "" && x$type()$timezone() != "") { + tz <- x$type()$timezone() + } else if (tz == "") { + tz <- Sys.timezone() + } + x <- build_expr("cast", x, options = cast_options(to_type = timestamp(x$type()$unit(), tz))) + } + + build_expr("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) +} diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index fa839269abe..52a8eeabaf3 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -20,6 +20,7 @@ register_bindings_type <- function() { register_bindings_type_cast() register_bindings_type_inspect() register_bindings_type_elementwise() + register_bindings_type_format() } register_bindings_type_cast <- function() { @@ -292,3 +293,20 @@ register_bindings_type_elementwise <- function() { is_inf & !call_binding("is.na", is_inf) }) } + +register_bindings_type_format <- function() { + register_binding("format", function(x, ...) { + # We use R's format if we get a single R object here since we don't (yet) + # support all of the possible options for casting to string + if (!inherits(x, "Expression")) { + return(format(x, ...)) + } + + if (inherits(x, "Expression") && + x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) { + binding_format_datetime(x, ...) + } else { + build_expr("cast", x, options = cast_options(to_type = string())) + } + }) +} diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 043a36a951a..5cbe77763af 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -843,3 +843,105 @@ test_that("as.Date() converts successfully from date, timestamp, integer, char a test_df ) }) + +test_that("format date/time", { + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + + times <- tibble( + datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas"), NA), + date = c(as.Date("2021-01-01"), NA) + ) + formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" + formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u" + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(date, format = formats_date)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats, tz = "Europe/Bucharest")) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats, tz = "EST", usetz = TRUE)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(1), + y = format(13.7, nsmall = 3)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(start_date = format(as.POSIXct("2022-01-01 01:01:00"))) %>% + collect(), + times + ) + + withr::with_timezone( + "Pacific/Marquesas", + { + compare_dplyr_binding( + .input %>% + mutate( + x = format(datetime, format = formats, tz = "EST"), + x_date = format(date, format = formats_date, tz = "EST") + ) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate( + x = format(datetime, format = formats), + x_date = format(date, format = formats_date) + ) %>% + collect(), + times + ) + } + ) +}) + +test_that("format() for unsupported types returns the input as string", { + expect_equal( + example_data %>% + record_batch() %>% + mutate(x = format(int)) %>% + collect(), + example_data %>% + record_batch() %>% + mutate(x = as.character(int)) %>% + collect() + ) + expect_equal( + example_data %>% + arrow_table() %>% + mutate(y = format(dbl)) %>% + collect(), + example_data %>% + arrow_table() %>% + mutate(y = as.character(dbl)) %>% + collect() + ) +})