Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
db29c42
first pass at implementing `format()` for date-time
dragosmg Jan 25, 2022
cb162d9
`arrow::format()` uses object timezone if available
dragosmg Feb 3, 2022
fb67975
lint
dragosmg Feb 3, 2022
a7707c2
`format()` dispatch for `Timestamp` and `Date32` types
dragosmg Feb 3, 2022
f5f199a
create helper function for date/time format
dragosmg Feb 3, 2022
3e09b5c
moved format date/time and helper (+tests) to dplyr-funcs-type
dragosmg Feb 3, 2022
def79dc
remove arg check
dragosmg Feb 3, 2022
46ee180
testing abandon ship for unsupported types
dragosmg Feb 3, 2022
976e172
improve `format()` abort message
dragosmg Feb 3, 2022
1bbcdac
improve test description
dragosmg Feb 3, 2022
cf514eb
use `"Etc/GMT+6"` as unlikely unit test
dragosmg Feb 4, 2022
f8c2ed2
test on windows too
dragosmg Feb 4, 2022
2ae6249
skip on win
dragosmg Feb 4, 2022
40e1914
moved `binding_format_datetime()` to dplyr-funcs-datetime.R
dragosmg Feb 17, 2022
f771682
cast as string for unsupported formats + update unit tests
dragosmg Feb 17, 2022
5411458
without casting
dragosmg Feb 17, 2022
8859ebb
with casting
dragosmg Feb 17, 2022
03eb111
added TODO to revisit the casting step once #12240 is merged
dragosmg Feb 23, 2022
09d99b4
used `build_expr()` and simplified the implementation
dragosmg Mar 3, 2022
8ecf37a
typo
dragosmg Mar 3, 2022
c0688e1
:)
dragosmg Mar 3, 2022
c118700
changed one test to use `arrow_table()`
dragosmg Mar 5, 2022
a6c0b19
updated the `format()` binding to use `build_expr()`
dragosmg Mar 5, 2022
7878b37
keep dispatch for regular R objects in and add a unit test for `forma…
dragosmg Mar 5, 2022
ce5a0c1
additional unit test with regular R object
dragosmg Mar 8, 2022
9668f01
removed unevaluated args and added comment on why we use `base::forma…
dragosmg Mar 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,23 @@ register_bindings_datetime <- function() {
build_expr("cast", x, options = list(to_type = date32()))
})
}

binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) {
if (usetz) {
format <- paste(format, "%Z")
}

if (call_binding("is.POSIXct", x)) {
# the casting part might not be required once
# https://issues.apache.org/jira/browse/ARROW-14442 is solved
# TODO revisit the steps below once the PR for that issue is merged
if (tz == "" && x$type()$timezone() != "") {
tz <- x$type()$timezone()
} else if (tz == "") {
tz <- Sys.timezone()
}
x <- build_expr("cast", x, options = cast_options(to_type = timestamp(x$type()$unit(), tz)))
}

build_expr("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME")))
}
18 changes: 18 additions & 0 deletions r/R/dplyr-funcs-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ register_bindings_type <- function() {
register_bindings_type_cast()
register_bindings_type_inspect()
register_bindings_type_elementwise()
register_bindings_type_format()
}

register_bindings_type_cast <- function() {
Expand Down Expand Up @@ -292,3 +293,20 @@ register_bindings_type_elementwise <- function() {
is_inf & !call_binding("is.na", is_inf)
})
}

register_bindings_type_format <- function() {
register_binding("format", function(x, ...) {
# We use R's format if we get a single R object here since we don't (yet)
# support all of the possible options for casting to string
if (!inherits(x, "Expression")) {
return(format(x, ...))
}

if (inherits(x, "Expression") &&
x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) {
binding_format_datetime(x, ...)
} else {
build_expr("cast", x, options = cast_options(to_type = string()))
}
})
}
102 changes: 102 additions & 0 deletions r/tests/testthat/test-dplyr-funcs-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -843,3 +843,105 @@ test_that("as.Date() converts successfully from date, timestamp, integer, char a
test_df
)
})

test_that("format date/time", {
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168

times <- tibble(
datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas"), NA),
date = c(as.Date("2021-01-01"), NA)
)
formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u"
formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u"

compare_dplyr_binding(
.input %>%
mutate(x = format(datetime, format = formats)) %>%
collect(),
times
)

compare_dplyr_binding(
.input %>%
mutate(x = format(date, format = formats_date)) %>%
collect(),
times
)

compare_dplyr_binding(
.input %>%
mutate(x = format(datetime, format = formats, tz = "Europe/Bucharest")) %>%
collect(),
times
)

compare_dplyr_binding(
.input %>%
mutate(x = format(datetime, format = formats, tz = "EST", usetz = TRUE)) %>%
collect(),
times
)

compare_dplyr_binding(
.input %>%
mutate(x = format(1),
y = format(13.7, nsmall = 3)) %>%
collect(),
times
)

compare_dplyr_binding(
.input %>%
mutate(start_date = format(as.POSIXct("2022-01-01 01:01:00"))) %>%
collect(),
times
)

withr::with_timezone(
"Pacific/Marquesas",
{
compare_dplyr_binding(
.input %>%
mutate(
x = format(datetime, format = formats, tz = "EST"),
x_date = format(date, format = formats_date, tz = "EST")
) %>%
collect(),
times
)

compare_dplyr_binding(
.input %>%
mutate(
x = format(datetime, format = formats),
x_date = format(date, format = formats_date)
) %>%
collect(),
times
)
}
)
})

test_that("format() for unsupported types returns the input as string", {
expect_equal(
example_data %>%
record_batch() %>%
mutate(x = format(int)) %>%
collect(),
example_data %>%
record_batch() %>%
mutate(x = as.character(int)) %>%
collect()
)
expect_equal(
example_data %>%
arrow_table() %>%
mutate(y = format(dbl)) %>%
collect(),
example_data %>%
arrow_table() %>%
mutate(y = as.character(dbl)) %>%
collect()
)
})