Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 85 additions & 114 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,35 @@ NULL
#' df <- createDataFrame(as.data.frame(Titanic, stringsAsFactors = FALSE))}
NULL

#' Collection functions for Column operations
#'
#' Collection functions defined for \code{Column}.
#'
#' @param x Column to compute on. Note the difference in the following methods:
#' \itemize{
#' \item \code{to_json}: it is the column containing the struct or array of the structs.
#' \item \code{from_json}: it is the column containing the JSON string.
#' }
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
#' additional named properties to control how it is converted, accepts the same
#' options as the JSON data source.
#' @name column_collection_functions
#' @rdname column_collection_functions
#' @family collection functions
#' @examples
#' \dontrun{
#' # Dataframe used throughout this doc
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1, FALSE)))}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, let's improve this? I think in sort_array we could be more clear, eg. sort_array(tmp$v1, asc = FALSE)

NULL

#' lit
#'
#' A new \linkS4class{Column} is created to represent the literal value.
Expand Down Expand Up @@ -1657,30 +1686,23 @@ setMethod("to_date",
column(jc)
})

#' to_json
#'
#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
#' @param x Column containing the struct or array of the structs
#' @param ... additional named properties to control how it is converted, accepts the same options
#' as the JSON data source.
#' @details
#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
#' @family non-aggregate functions
#' @rdname to_json
#' @name to_json
#' @aliases to_json,Column-method
#' @rdname column_collection_functions
#' @aliases to_json to_json,Column-method
#' @export
#' @examples
#'
#' \dontrun{
#' # Converts a struct into a JSON object
#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
#'
#' # Converts an array of structs into a JSON array
#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
#' select(df, to_json(df$people))
#'}
#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
#' df2 <- mutate(df2, people_json = to_json(df2$people))}
#' @note to_json since 2.2.0
setMethod("to_json", signature(x = "Column"),
function(x, ...) {
Expand Down Expand Up @@ -2156,28 +2178,23 @@ setMethod("date_format", signature(y = "Column", x = "character"),
column(jc)
})

#' from_json
#'
#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
#' If the string is unparseable, the Column will contains the value NA.
#' @details
#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
#' to \code{TRUE}. If the string is unparseable, the Column will contains the value NA.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw, will contains the value NA. is very consistently documented. in this case this is right, but there are many other that says the value is null (note lower case) which isn't quite correct on the R side.

another project? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected the typo. Will consider updating null & NA in the future :)

#'
#' @param x Column containing the JSON string.
#' @rdname column_collection_functions
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
#' @param as.json.array indicating if input string is JSON array of objects or a single object.
#' @param ... additional named properties to control how the json is parsed, accepts the same
#' options as the JSON data source.
#'
#' @family non-aggregate functions
#' @rdname from_json
#' @name from_json
#' @aliases from_json,Column,structType-method
#' @aliases from_json from_json,Column,structType-method
#' @export
#' @examples
#'
#' \dontrun{
#' schema <- structType(structField("name", "string"),
#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
#'}
#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
#' df2 <- mutate(df2, people_json = to_json(df2$people))
#' schema <- structType(structField("name", "string"))
#' head(select(df2, from_json(df2$people_json, schema)))}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worthwhile to keep dateFormat = "dd/MM/yyyy") in the example

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this. Added an example.

#' @note from_json since 2.2.0
setMethod("from_json", signature(x = "Column", schema = "structType"),
function(x, schema, as.json.array = FALSE, ...) {
Expand Down Expand Up @@ -3156,18 +3173,14 @@ setMethod("row_number",

###################### Collection functions######################

#' array_contains
#'
#' Returns null if the array is null, true if the array contains the value, and false otherwise.
#' @details
#' \code{array_contains}: Returns null if the array is null, true if the array contains
#' the value, and false otherwise.
#'
#' @param x A Column
#' @param value A value to be checked if contained in the column
#' @rdname array_contains
#' @aliases array_contains,Column-method
#' @name array_contains
#' @family collection functions
#' @rdname column_collection_functions
#' @aliases array_contains array_contains,Column-method
#' @export
#' @examples \dontrun{array_contains(df$c, 1)}
#' @note array_contains since 1.6.0
setMethod("array_contains",
signature(x = "Column", value = "ANY"),
Expand All @@ -3176,18 +3189,12 @@ setMethod("array_contains",
column(jc)
})

#' explode
#'
#' Creates a new row for each element in the given array or map column.
#'
#' @param x Column to compute on
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#'
#' @rdname explode
#' @name explode
#' @family collection functions
#' @aliases explode,Column-method
#' @rdname column_collection_functions
#' @aliases explode explode,Column-method
#' @export
#' @examples \dontrun{explode(df$c)}
#' @note explode since 1.5.0
setMethod("explode",
signature(x = "Column"),
Expand All @@ -3196,18 +3203,12 @@ setMethod("explode",
column(jc)
})

#' size
#'
#' Returns length of array or map.
#'
#' @param x Column to compute on
#' @details
#' \code{size}: Returns length of array or map.
#'
#' @rdname size
#' @name size
#' @aliases size,Column-method
#' @family collection functions
#' @rdname column_collection_functions
#' @aliases size size,Column-method
#' @export
#' @examples \dontrun{size(df$c)}
#' @note size since 1.5.0
setMethod("size",
signature(x = "Column"),
Expand All @@ -3216,25 +3217,16 @@ setMethod("size",
column(jc)
})

#' sort_array
#'
#' Sorts the input array in ascending or descending order according
#' @details
#' \code{sort_array}: Sorts the input array in ascending or descending order according
#' to the natural ordering of the array elements.
#'
#' @param x A Column to sort
#' @rdname column_collection_functions
#' @param asc A logical flag indicating the sorting order.
#' TRUE, sorting is in ascending order.
#' FALSE, sorting is in descending order.
#' @rdname sort_array
#' @name sort_array
#' @aliases sort_array,Column-method
#' @family collection functions
#' @aliases sort_array sort_array,Column-method
#' @export
#' @examples
#' \dontrun{
#' sort_array(df$c)
#' sort_array(df$c, FALSE)
#' }
#' @note sort_array since 1.6.0
setMethod("sort_array",
signature(x = "Column"),
Expand All @@ -3243,18 +3235,13 @@ setMethod("sort_array",
column(jc)
})

#' posexplode
#'
#' Creates a new row for each element with position in the given array or map column.
#'
#' @param x Column to compute on
#' @details
#' \code{posexplode}: Creates a new row for each element with position in the given array
#' or map column.
#'
#' @rdname posexplode
#' @name posexplode
#' @family collection functions
#' @aliases posexplode,Column-method
#' @rdname column_collection_functions
#' @aliases posexplode posexplode,Column-method
#' @export
#' @examples \dontrun{posexplode(df$c)}
#' @note posexplode since 2.1.0
setMethod("posexplode",
signature(x = "Column"),
Expand Down Expand Up @@ -3394,27 +3381,24 @@ setMethod("repeat_string",
column(jc)
})

#' explode_outer
#'
#' Creates a new row for each element in the given array or map column.
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#' Unlike \code{explode}, if the array/map is \code{null} or empty
#' then \code{null} is produced.
#'
#' @param x Column to compute on
#'
#' @rdname explode_outer
#' @name explode_outer
#' @family collection functions
#' @aliases explode_outer,Column-method
#' @rdname column_collection_functions
#' @aliases explode_outer explode_outer,Column-method
#' @export
#' @examples
#'
#' \dontrun{
#' df <- createDataFrame(data.frame(
#' df2 <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
#' }
#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
#' @note explode_outer since 2.3.0
setMethod("explode_outer",
signature(x = "Column"),
Expand All @@ -3423,27 +3407,14 @@ setMethod("explode_outer",
column(jc)
})

#' posexplode_outer
#'
#' Creates a new row for each element with position in the given array or map column.
#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' @details
#' \code{posexplode_outer}: Creates a new row for each element with position in the given
#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' then the row (\code{null}, \code{null}) is produced.
#'
#' @param x Column to compute on
#'
#' @rdname posexplode_outer
#' @name posexplode_outer
#' @family collection functions
#' @aliases posexplode_outer,Column-method
#' @rdname column_collection_functions
#' @aliases posexplode_outer posexplode_outer,Column-method
#' @export
#' @examples
#' \dontrun{
#' df <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
#' }
#' @note posexplode_outer since 2.3.0
setMethod("posexplode_outer",
signature(x = "Column"),
Expand Down
27 changes: 18 additions & 9 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -913,8 +913,9 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
#' @name NULL
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })

#' @rdname array_contains
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })

#' @rdname column_string_functions
Expand Down Expand Up @@ -1057,12 +1058,14 @@ setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank")
#' @name NULL
setGeneric("encode", function(x, charset) { standardGeneric("encode") })

#' @rdname explode
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("explode", function(x) { standardGeneric("explode") })

#' @rdname explode_outer
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })

#' @rdname expr
Expand All @@ -1084,8 +1087,9 @@ setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
#' @name NULL
setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })

#' @rdname from_json
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })

#' @rdname column_datetime_functions
Expand Down Expand Up @@ -1262,12 +1266,14 @@ setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_ra
#' @name NULL
setGeneric("pmod", function(y, x) { standardGeneric("pmod") })

#' @rdname posexplode
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("posexplode", function(x) { standardGeneric("posexplode") })

#' @rdname posexplode_outer
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })

#' @rdname column_datetime_functions
Expand Down Expand Up @@ -1366,17 +1372,19 @@ setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUns
#' @name NULL
setGeneric("signum", function(x) { standardGeneric("signum") })

#' @rdname size
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("size", function(x) { standardGeneric("size") })

#' @rdname column_aggregate_functions
#' @export
#' @name NULL
setGeneric("skewness", function(x) { standardGeneric("skewness") })

#' @rdname sort_array
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })

#' @rdname column_string_functions
Expand Down Expand Up @@ -1438,8 +1446,9 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
#' @name NULL
setGeneric("to_date", function(x, format) { standardGeneric("to_date") })

#' @rdname to_json
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })

#' @rdname column_datetime_functions
Expand Down