Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
713b2ed
added integerish test for `as.Date()`
dragosmg Mar 21, 2022
256e97a
add integerish support for `as.Date()`
dragosmg Mar 21, 2022
ec99da3
replace `compare_dplyr_binding()` with `expect_error()`
dragosmg Mar 21, 2022
7571fc0
added the `as_date()` binding
dragosmg Mar 21, 2022
11e99bd
if `format` is unspecified, assume ISO
dragosmg Mar 21, 2022
071d68d
unit tests for `as_date()`
dragosmg Mar 21, 2022
30ac13d
update NEWS
dragosmg Mar 21, 2022
c9fb23a
moved the `as_date()` binding to _dplyr-funcs-datetime_
dragosmg Mar 21, 2022
67c56b5
moved `as_date()` tests to _test-dplyr-funcs-datetime_
dragosmg Mar 21, 2022
517af04
moved `as.Date()` & tests to dplyr-funcs-datetime
dragosmg Mar 21, 2022
c53a8c8
gymnastics with the location of the bindings (in prep for rebase)
dragosmg Mar 22, 2022
08e629f
first pass at `as_datetime()`
dragosmg Mar 22, 2022
14bf8ea
lint + reorg
dragosmg Mar 22, 2022
7cd97c9
`as_datetime()` binding
dragosmg Mar 22, 2022
453199c
improvements to `as_datetime()`
dragosmg Mar 23, 2022
99cea4a
unit test first step
dragosmg Mar 23, 2022
76803ee
update
dragosmg Mar 23, 2022
b496c12
moved datetime-related tests to `test-dplyr-funcs-datetime.R`
dragosmg Mar 28, 2022
a219939
move datetime bindings to ...-funcs-datetime
dragosmg Mar 28, 2022
759c2fd
create a new set of bindings (`datetime_helpers`)
dragosmg Mar 28, 2022
4888ab9
casting `x` and `delta` to `int64()` + updated unit tests
dragosmg Mar 28, 2022
8d8b6ab
test with `Pacific/Marquesas` and skip on Windows
dragosmg Mar 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* `lubridate`:
* component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs.
* `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations.
* `as_date()` and `as_datetime()`
* date-time functionality:
* `difftime` and `as.difftime()`
* `as.Date()` to convert to date
Expand Down
194 changes: 150 additions & 44 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,70 @@ register_bindings_datetime <- function() {
}

register_bindings_duration <- function() {
register_binding("difftime", function(time1,
time2,
tz,
units = "secs") {
if (units != "secs") {
abort("`difftime()` with units other than `secs` not supported in Arrow")
}

if (!missing(tz)) {
warn("`tz` argument is not supported in Arrow, so it will be ignored")
}

# cast to timestamp if time1 and time2 are not dates or timestamp expressions
# (the subtraction of which would output a `duration`)
if (!call_binding("is.instant", time1)) {
time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp()))
}

if (!call_binding("is.instant", time2)) {
time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp()))
}

# we need to go build the subtract expression instead of `time1 - time2` to
# prevent complaints when we try to subtract an R object from an Expression
subtract_output <- build_expr("-", time1, time2)
build_expr("cast", subtract_output, options = cast_options(to_type = duration("s")))
})
register_binding("as.difftime", function(x,
format = "%X",
units = "secs") {
# windows doesn't seem to like "%X"
if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
format <- "%H:%M:%S"
}

if (units != "secs") {
abort("`as.difftime()` with units other than 'secs' not supported in Arrow")
}

if (call_binding("is.character", x)) {
x <- build_expr("strptime", x, options = list(format = format, unit = 0L))
# complex casting only due to cast type restrictions: time64 -> int64 -> duration(us)
# and then we cast to duration ("s") at the end
x <- x$cast(time64("us"))$cast(int64())$cast(duration("us"))
}

# numeric -> duration not supported in Arrow yet so we use int64() as an
# intermediate step
# TODO revisit if https://issues.apache.org/jira/browse/ARROW-15862 results
# in numeric -> duration support

if (call_binding("is.numeric", x)) {
# coerce x to be int64(). it should work for integer-like doubles and fail
# for pure doubles
# if we abort for all doubles, we risk erroring in cases in which
# coercion to int64() would work
x <- build_expr("cast", x, options = cast_options(to_type = int64()))
}

build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
})
}

register_bindings_datetime_helpers <- function() {
register_binding("make_datetime", function(year = 1970L,
month = 1L,
day = 1L,
Expand Down Expand Up @@ -239,66 +303,108 @@ register_bindings_duration <- function() {
tz = "UTC") {
call_binding("make_datetime", year, month, day, hour, min, sec, tz)
})
register_binding("difftime", function(time1,
time2,
tz,
units = "secs") {
if (units != "secs") {
abort("`difftime()` with units other than `secs` not supported in Arrow")
}
register_binding("as.Date", function(x,
format = NULL,
tryFormats = "%Y-%m-%d",
origin = "1970-01-01",
tz = "UTC") {

if (!missing(tz)) {
warn("`tz` argument is not supported in Arrow, so it will be ignored")
# the origin argument will be better supported once we implement temporal
# arithmetic (https://issues.apache.org/jira/browse/ARROW-14947)
# TODO revisit once the above has been sorted
if (call_binding("is.numeric", x) & origin != "1970-01-01") {
abort("`as.Date()` with an `origin` different than '1970-01-01' is not supported in Arrow")
}

# cast to timestamp if time1 and time2 are not dates or timestamp expressions
# (the subtraction of which would output a `duration`)
if (!call_binding("is.instant", time1)) {
time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp(timezone = "UTC")))
# this could be improved with tryFormats once strptime returns NA and we
# can use coalesce - https://issues.apache.org/jira/browse/ARROW-15659
# TODO revisit once https://issues.apache.org/jira/browse/ARROW-15659 is done
if (is.null(format) && length(tryFormats) > 1) {
abort("`as.Date()` with multiple `tryFormats` is not supported in Arrow")
}

if (!call_binding("is.instant", time2)) {
time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp(timezone = "UTC")))
}
if (call_binding("is.Date", x)) {
return(x)

# we need to go build the subtract expression instead of `time1 - time2` to
# prevent complaints when we try to subtract an R object from an Expression
subtract_output <- build_expr("-", time1, time2)
build_expr("cast", subtract_output, options = cast_options(to_type = duration("s")))
# cast from POSIXct
} else if (call_binding("is.POSIXct", x)) {
# base::as.Date() first converts to the desired timezone and then extracts
# the date, which is why we need to go through timestamp() first
x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))

# cast from character
} else if (call_binding("is.character", x)) {
format <- format %||% tryFormats[[1]]
# unit = 0L is the identifier for seconds in valid_time32_units
x <- build_expr("strptime", x, options = list(format = format, unit = 0L))

# cast from numeric
} else if (call_binding("is.numeric", x) & !call_binding("is.integer", x)) {
# Arrow does not support direct casting from double to date32(), but for
# integer-like values we can go via int32()
# https://issues.apache.org/jira/browse/ARROW-15798
# TODO revisit if arrow decides to support double -> date casting
x <- build_expr("cast", x, options = cast_options(to_type = int32()))
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})
register_binding("as.difftime", function(x,
format = "%X",
units = "secs") {
# windows doesn't seem to like "%X"
if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
format <- "%H:%M:%S"
register_binding("as_date", function(x,
format = NULL,
origin = "1970-01-01",
tz = "UTC") {
# the origin argument will be better supported once we implement temporal
# arithmetic (https://issues.apache.org/jira/browse/ARROW-14947)
# TODO revisit once the above has been sorted
if (call_binding("is.numeric", x) & origin != "1970-01-01") {
abort("`as.Date()` with an `origin` different than '1970-01-01' is not supported in Arrow")
}

if (units != "secs") {
abort("`as.difftime()` with units other than 'secs' not supported in Arrow")
# assume format is ISO if unspecified (to align with lubridate::as_date)
if (is.null(format)) {
format <- "%Y-%m-%d"
}

if (call_binding("is.character", x)) {
x <- build_expr("strptime", x, options = list(format = format, unit = 0L))
# complex casting only due to cast type restrictions: time64 -> int64 -> duration(us)
# and then we cast to duration ("s") at the end
x <- x$cast(time64("us"))$cast(int64())$cast(duration("us"))
}
if (call_binding("is.Date", x)) {
return(x)

# numeric -> duration not supported in Arrow yet so we use int64() as an
# intermediate step
# TODO revisit if https://issues.apache.org/jira/browse/ARROW-15862 results
# in numeric -> duration support
# cast from POSIXct
} else if (call_binding("is.POSIXct", x)) {
# this is where as_date() differs from as.Date()
if (!missing(tz)) {
x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
}
# POSIXct is of type double -> we need this to prevent going down the
# "double" branch
x <- x

# cast from character
} else if (call_binding("is.character", x)) {
# unit = 0L is the identifier for seconds in valid_time32_units
x <- build_expr("strptime", x, options = list(format = format, unit = 0L))

# cast from numeric
} else if (call_binding("is.numeric", x) & !call_binding("is.integer", x)) {
# Arrow does not support direct casting from double to date32(), but for
# integer-like values we can go via int32()
# https://issues.apache.org/jira/browse/ARROW-15798
# TODO revisit if arrow decides to support double -> date casting
x <- build_expr("cast", x, options = cast_options(to_type = int32()))
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})
register_binding("as_datetime", function(x,
origin = "1970-01-01",
tz = "UTC") {
if (call_binding("is.numeric", x)) {
# coerce x to be int64(). it should work for integer-like doubles and fail
# for pure doubles
# if we abort for all doubles, we risk erroring in cases in which
# coercion to int64() would work
delta <- call_binding("difftime", origin, "1970-01-01")
delta <- build_expr("cast", delta, options = cast_options(to_type = int64()))
x <- build_expr("cast", x, options = cast_options(to_type = int64()))
output <- build_expr("+", x, delta)
output <- build_expr("cast", output, options = cast_options(to_type = timestamp()))
} else {
output <- build_expr("cast", x, options = cast_options(to_type = timestamp()))
}

build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
build_expr("assume_timezone", output, options = list(timezone = tz))
})
}

Expand Down
45 changes: 0 additions & 45 deletions r/R/dplyr-funcs-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,51 +77,6 @@ register_bindings_type_cast <- function() {
register_binding("as.numeric", function(x) {
build_expr("cast", x, options = cast_options(to_type = float64()))
})
register_binding("as.Date", function(x,
format = NULL,
tryFormats = "%Y-%m-%d",
origin = "1970-01-01",
tz = "UTC") {

# the origin argument will be better supported once we implement temporal
# arithmetic (https://issues.apache.org/jira/browse/ARROW-14947)
# TODO revisit once the above has been sorted
if (call_binding("is.numeric", x) & origin != "1970-01-01") {
abort("`as.Date()` with an `origin` different than '1970-01-01' is not supported in Arrow")
}

# this could be improved with tryFormats once strptime returns NA and we
# can use coalesce - https://issues.apache.org/jira/browse/ARROW-15659
# TODO revisit once https://issues.apache.org/jira/browse/ARROW-15659 is done
if (is.null(format) && length(tryFormats) > 1) {
abort("`as.Date()` with multiple `tryFormats` is not supported in Arrow")
}

if (call_binding("is.Date", x)) {
return(x)

# cast from POSIXct
} else if (call_binding("is.POSIXct", x)) {
# base::as.Date() first converts to the desired timezone and then extracts
# the date, which is why we need to go through timestamp() first
x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))

# cast from character
} else if (call_binding("is.character", x)) {
format <- format %||% tryFormats[[1]]
# unit = 0L is the identifier for seconds in valid_time32_units
x <- build_expr("strptime", x, options = list(format = format, unit = 0L))

# cast from numeric
} else if (call_binding("is.numeric", x) & !call_binding("is.integer", x)) {
# Arrow does not support direct casting from double to date32()
# https://issues.apache.org/jira/browse/ARROW-15798
# TODO revisit if arrow decides to support double -> date casting
abort("`as.Date()` with double/float is not supported in Arrow")
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})

register_binding("is", function(object, class2) {
if (is.string(class2)) {
switch(class2,
Expand Down
1 change: 1 addition & 0 deletions r/R/dplyr-funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ create_binding_cache <- function() {
register_bindings_conditional()
register_bindings_datetime()
register_bindings_duration()
register_bindings_datetime_helpers()
register_bindings_math()
register_bindings_string()
register_bindings_type()
Expand Down
Loading