Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 90 additions & 114 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,35 @@ NULL
#' }
NULL

#' Collection functions for Column operations
#'
#' Collection functions defined for \code{Column}.
#'
#' @param x Column to compute on. Note the difference in the following methods:
#' \itemize{
#' \item \code{to_json}: it is the column containing the struct or array of the structs.
#' \item \code{from_json}: it is the column containing the JSON string.
#' }
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
#' additional named properties to control how it is converted, accepts the same
#' options as the JSON data source.
#' @name column_collection_functions
#' @rdname column_collection_functions
#' @family collection functions
#' @examples
#' \dontrun{
#' # Dataframe used throughout this doc
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
NULL

#' @details
#' \code{lit}: A new Column is created to represent the literal value.
#' If the parameter is a Column, it is returned unchanged.
Expand Down Expand Up @@ -1642,30 +1671,23 @@ setMethod("to_date",
column(jc)
})

#' to_json
#'
#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
#' @param x Column containing the struct or array of the structs
#' @param ... additional named properties to control how it is converted, accepts the same options
#' as the JSON data source.
#' @details
#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
#' @family non-aggregate functions
#' @rdname to_json
#' @name to_json
#' @aliases to_json,Column-method
#' @rdname column_collection_functions
#' @aliases to_json to_json,Column-method
#' @export
#' @examples
#'
#' \dontrun{
#' # Converts a struct into a JSON object
#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
#'
#' # Converts an array of structs into a JSON array
#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
#' select(df, to_json(df$people))
#'}
#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
#' df2 <- mutate(df2, people_json = to_json(df2$people))}
#' @note to_json since 2.2.0
setMethod("to_json", signature(x = "Column"),
function(x, ...) {
Expand Down Expand Up @@ -2120,28 +2142,28 @@ setMethod("date_format", signature(y = "Column", x = "character"),
column(jc)
})

#' from_json
#'
#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
#' If the string is unparseable, the Column will contains the value NA.
#' @details
#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
#'
#' @param x Column containing the JSON string.
#' @rdname column_collection_functions
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
#' @param as.json.array indicating if input string is JSON array of objects or a single object.
#' @param ... additional named properties to control how the json is parsed, accepts the same
#' options as the JSON data source.
#'
#' @family non-aggregate functions
#' @rdname from_json
#' @name from_json
#' @aliases from_json,Column,structType-method
#' @aliases from_json from_json,Column,structType-method
#' @export
#' @examples
#'
#' \dontrun{
#' schema <- structType(structField("name", "string"),
#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
#'}
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
#' schema <- structType(structField("date", "string"))
#' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))

#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
#' df2 <- mutate(df2, people_json = to_json(df2$people))
#' schema <- structType(structField("name", "string"))
#' head(select(df2, from_json(df2$people_json, schema)))}
#' @note from_json since 2.2.0
setMethod("from_json", signature(x = "Column", schema = "structType"),
function(x, schema, as.json.array = FALSE, ...) {
Expand Down Expand Up @@ -3101,18 +3123,14 @@ setMethod("row_number",

###################### Collection functions######################

#' array_contains
#'
#' Returns null if the array is null, true if the array contains the value, and false otherwise.
#' @details
#' \code{array_contains}: Returns null if the array is null, true if the array contains
#' the value, and false otherwise.
#'
#' @param x A Column
#' @param value A value to be checked if contained in the column
#' @rdname array_contains
#' @aliases array_contains,Column-method
#' @name array_contains
#' @family collection functions
#' @rdname column_collection_functions
#' @aliases array_contains array_contains,Column-method
#' @export
#' @examples \dontrun{array_contains(df$c, 1)}
#' @note array_contains since 1.6.0
setMethod("array_contains",
signature(x = "Column", value = "ANY"),
Expand All @@ -3121,18 +3139,12 @@ setMethod("array_contains",
column(jc)
})

#' explode
#'
#' Creates a new row for each element in the given array or map column.
#'
#' @param x Column to compute on
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#'
#' @rdname explode
#' @name explode
#' @family collection functions
#' @aliases explode,Column-method
#' @rdname column_collection_functions
#' @aliases explode explode,Column-method
#' @export
#' @examples \dontrun{explode(df$c)}
#' @note explode since 1.5.0
setMethod("explode",
signature(x = "Column"),
Expand All @@ -3141,18 +3153,12 @@ setMethod("explode",
column(jc)
})

#' size
#'
#' Returns length of array or map.
#'
#' @param x Column to compute on
#' @details
#' \code{size}: Returns length of array or map.
#'
#' @rdname size
#' @name size
#' @aliases size,Column-method
#' @family collection functions
#' @rdname column_collection_functions
#' @aliases size size,Column-method
#' @export
#' @examples \dontrun{size(df$c)}
#' @note size since 1.5.0
setMethod("size",
signature(x = "Column"),
Expand All @@ -3161,25 +3167,16 @@ setMethod("size",
column(jc)
})

#' sort_array
#'
#' Sorts the input array in ascending or descending order according
#' @details
#' \code{sort_array}: Sorts the input array in ascending or descending order according
#' to the natural ordering of the array elements.
#'
#' @param x A Column to sort
#' @rdname column_collection_functions
#' @param asc A logical flag indicating the sorting order.
#' TRUE, sorting is in ascending order.
#' FALSE, sorting is in descending order.
#' @rdname sort_array
#' @name sort_array
#' @aliases sort_array,Column-method
#' @family collection functions
#' @aliases sort_array sort_array,Column-method
#' @export
#' @examples
#' \dontrun{
#' sort_array(df$c)
#' sort_array(df$c, FALSE)
#' }
#' @note sort_array since 1.6.0
setMethod("sort_array",
signature(x = "Column"),
Expand All @@ -3188,18 +3185,13 @@ setMethod("sort_array",
column(jc)
})

#' posexplode
#'
#' Creates a new row for each element with position in the given array or map column.
#'
#' @param x Column to compute on
#' @details
#' \code{posexplode}: Creates a new row for each element with position in the given array
#' or map column.
#'
#' @rdname posexplode
#' @name posexplode
#' @family collection functions
#' @aliases posexplode,Column-method
#' @rdname column_collection_functions
#' @aliases posexplode posexplode,Column-method
#' @export
#' @examples \dontrun{posexplode(df$c)}
#' @note posexplode since 2.1.0
setMethod("posexplode",
signature(x = "Column"),
Expand Down Expand Up @@ -3325,27 +3317,24 @@ setMethod("repeat_string",
column(jc)
})

#' explode_outer
#'
#' Creates a new row for each element in the given array or map column.
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#' Unlike \code{explode}, if the array/map is \code{null} or empty
#' then \code{null} is produced.
#'
#' @param x Column to compute on
#'
#' @rdname explode_outer
#' @name explode_outer
#' @family collection functions
#' @aliases explode_outer,Column-method
#' @rdname column_collection_functions
#' @aliases explode_outer explode_outer,Column-method
#' @export
#' @examples
#'
#' \dontrun{
#' df <- createDataFrame(data.frame(
#' df2 <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
#' }
#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
#' @note explode_outer since 2.3.0
setMethod("explode_outer",
signature(x = "Column"),
Expand All @@ -3354,27 +3343,14 @@ setMethod("explode_outer",
column(jc)
})

#' posexplode_outer
#'
#' Creates a new row for each element with position in the given array or map column.
#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' @details
#' \code{posexplode_outer}: Creates a new row for each element with position in the given
#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' then the row (\code{null}, \code{null}) is produced.
#'
#' @param x Column to compute on
#'
#' @rdname posexplode_outer
#' @name posexplode_outer
#' @family collection functions
#' @aliases posexplode_outer,Column-method
#' @rdname column_collection_functions
#' @aliases posexplode_outer posexplode_outer,Column-method
#' @export
#' @examples
#' \dontrun{
#' df <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
#' }
#' @note posexplode_outer since 2.3.0
setMethod("posexplode_outer",
signature(x = "Column"),
Expand Down
Loading