-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20889][SparkR] Grouped documentation for COLLECTION column methods #18458
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9bdc739
ad9de63
664629d
c47b3a2
6aa4c6f
8be3e49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,6 +132,35 @@ NULL | |
| #' df <- createDataFrame(as.data.frame(Titanic, stringsAsFactors = FALSE))} | ||
| NULL | ||
|
|
||
| #' Collection functions for Column operations | ||
| #' | ||
| #' Collection functions defined for \code{Column}. | ||
| #' | ||
| #' @param x Column to compute on. Note the difference in the following methods: | ||
| #' \itemize{ | ||
| #' \item \code{to_json}: it is the column containing the struct or array of the structs. | ||
| #' \item \code{from_json}: it is the column containing the JSON string. | ||
| #' } | ||
| #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains | ||
| #' additional named properties to control how it is converted, accepts the same | ||
| #' options as the JSON data source. | ||
| #' @name column_collection_functions | ||
| #' @rdname column_collection_functions | ||
| #' @family collection functions | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' # Dataframe used throughout this doc | ||
| #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) | ||
| #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) | ||
| #' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp)) | ||
| #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1))) | ||
| #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1)) | ||
| #' head(tmp2) | ||
| #' head(select(tmp, posexplode(tmp$v1))) | ||
| #' head(select(tmp, sort_array(tmp$v1))) | ||
| #' head(select(tmp, sort_array(tmp$v1, FALSE)))} | ||
| NULL | ||
|
|
||
| #' lit | ||
| #' | ||
| #' A new \linkS4class{Column} is created to represent the literal value. | ||
|
|
@@ -1657,30 +1686,23 @@ setMethod("to_date", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' to_json | ||
| #' | ||
| #' Converts a column containing a \code{structType} or array of \code{structType} into a Column | ||
| #' of JSON string. Resolving the Column can fail if an unsupported type is encountered. | ||
| #' | ||
| #' @param x Column containing the struct or array of the structs | ||
| #' @param ... additional named properties to control how it is converted, accepts the same options | ||
| #' as the JSON data source. | ||
| #' @details | ||
| #' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType} | ||
| #' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered. | ||
| #' | ||
| #' @family non-aggregate functions | ||
| #' @rdname to_json | ||
| #' @name to_json | ||
| #' @aliases to_json,Column-method | ||
| #' @rdname column_collection_functions | ||
| #' @aliases to_json to_json,Column-method | ||
| #' @export | ||
| #' @examples | ||
| #' | ||
| #' \dontrun{ | ||
| #' # Converts a struct into a JSON object | ||
| #' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d") | ||
| #' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy')) | ||
| #' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d") | ||
| #' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy')) | ||
| #' | ||
| #' # Converts an array of structs into a JSON array | ||
| #' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people") | ||
| #' select(df, to_json(df$people)) | ||
| #'} | ||
| #' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people") | ||
| #' df2 <- mutate(df2, people_json = to_json(df2$people))} | ||
| #' @note to_json since 2.2.0 | ||
| setMethod("to_json", signature(x = "Column"), | ||
| function(x, ...) { | ||
|
|
@@ -2156,28 +2178,23 @@ setMethod("date_format", signature(y = "Column", x = "character"), | |
| column(jc) | ||
| }) | ||
|
|
||
| #' from_json | ||
| #' | ||
| #' Parses a column containing a JSON string into a Column of \code{structType} with the specified | ||
| #' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}. | ||
| #' If the string is unparseable, the Column will contains the value NA. | ||
| #' @details | ||
| #' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType} | ||
| #' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set | ||
| #' to \code{TRUE}. If the string is unparseable, the Column will contains the value NA. | ||
|
||
| #' | ||
| #' @param x Column containing the JSON string. | ||
| #' @rdname column_collection_functions | ||
| #' @param schema a structType object to use as the schema to use when parsing the JSON string. | ||
| #' @param as.json.array indicating if input string is JSON array of objects or a single object. | ||
| #' @param ... additional named properties to control how the json is parsed, accepts the same | ||
| #' options as the JSON data source. | ||
| #' | ||
| #' @family non-aggregate functions | ||
| #' @rdname from_json | ||
| #' @name from_json | ||
| #' @aliases from_json,Column,structType-method | ||
| #' @aliases from_json from_json,Column,structType-method | ||
| #' @export | ||
| #' @examples | ||
| #' | ||
| #' \dontrun{ | ||
| #' schema <- structType(structField("name", "string"), | ||
| #' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy")) | ||
| #'} | ||
| #' df2 <- sql("SELECT named_struct('name', 'Bob') as people") | ||
| #' df2 <- mutate(df2, people_json = to_json(df2$people)) | ||
| #' schema <- structType(structField("name", "string")) | ||
| #' head(select(df2, from_json(df2$people_json, schema)))} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's worthwhile to keep
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching this. Added an example. |
||
| #' @note from_json since 2.2.0 | ||
| setMethod("from_json", signature(x = "Column", schema = "structType"), | ||
| function(x, schema, as.json.array = FALSE, ...) { | ||
|
|
@@ -3156,18 +3173,14 @@ setMethod("row_number", | |
|
|
||
| ###################### Collection functions###################### | ||
|
|
||
| #' array_contains | ||
| #' | ||
| #' Returns null if the array is null, true if the array contains the value, and false otherwise. | ||
| #' @details | ||
| #' \code{array_contains}: Returns null if the array is null, true if the array contains | ||
| #' the value, and false otherwise. | ||
| #' | ||
| #' @param x A Column | ||
| #' @param value A value to be checked if contained in the column | ||
| #' @rdname array_contains | ||
| #' @aliases array_contains,Column-method | ||
| #' @name array_contains | ||
| #' @family collection functions | ||
| #' @rdname column_collection_functions | ||
| #' @aliases array_contains array_contains,Column-method | ||
| #' @export | ||
| #' @examples \dontrun{array_contains(df$c, 1)} | ||
| #' @note array_contains since 1.6.0 | ||
| setMethod("array_contains", | ||
| signature(x = "Column", value = "ANY"), | ||
|
|
@@ -3176,18 +3189,12 @@ setMethod("array_contains", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' explode | ||
| #' | ||
| #' Creates a new row for each element in the given array or map column. | ||
| #' | ||
| #' @param x Column to compute on | ||
| #' @details | ||
| #' \code{explode}: Creates a new row for each element in the given array or map column. | ||
| #' | ||
| #' @rdname explode | ||
| #' @name explode | ||
| #' @family collection functions | ||
| #' @aliases explode,Column-method | ||
| #' @rdname column_collection_functions | ||
| #' @aliases explode explode,Column-method | ||
| #' @export | ||
| #' @examples \dontrun{explode(df$c)} | ||
| #' @note explode since 1.5.0 | ||
| setMethod("explode", | ||
| signature(x = "Column"), | ||
|
|
@@ -3196,18 +3203,12 @@ setMethod("explode", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' size | ||
| #' | ||
| #' Returns length of array or map. | ||
| #' | ||
| #' @param x Column to compute on | ||
| #' @details | ||
| #' \code{size}: Returns length of array or map. | ||
| #' | ||
| #' @rdname size | ||
| #' @name size | ||
| #' @aliases size,Column-method | ||
| #' @family collection functions | ||
| #' @rdname column_collection_functions | ||
| #' @aliases size size,Column-method | ||
| #' @export | ||
| #' @examples \dontrun{size(df$c)} | ||
| #' @note size since 1.5.0 | ||
| setMethod("size", | ||
| signature(x = "Column"), | ||
|
|
@@ -3216,25 +3217,16 @@ setMethod("size", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' sort_array | ||
| #' | ||
| #' Sorts the input array in ascending or descending order according | ||
| #' @details | ||
| #' \code{sort_array}: Sorts the input array in ascending or descending order according | ||
| #' to the natural ordering of the array elements. | ||
| #' | ||
| #' @param x A Column to sort | ||
| #' @rdname column_collection_functions | ||
| #' @param asc A logical flag indicating the sorting order. | ||
| #' TRUE, sorting is in ascending order. | ||
| #' FALSE, sorting is in descending order. | ||
| #' @rdname sort_array | ||
| #' @name sort_array | ||
| #' @aliases sort_array,Column-method | ||
| #' @family collection functions | ||
| #' @aliases sort_array sort_array,Column-method | ||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' sort_array(df$c) | ||
| #' sort_array(df$c, FALSE) | ||
| #' } | ||
| #' @note sort_array since 1.6.0 | ||
| setMethod("sort_array", | ||
| signature(x = "Column"), | ||
|
|
@@ -3243,18 +3235,13 @@ setMethod("sort_array", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' posexplode | ||
| #' | ||
| #' Creates a new row for each element with position in the given array or map column. | ||
| #' | ||
| #' @param x Column to compute on | ||
| #' @details | ||
| #' \code{posexplode}: Creates a new row for each element with position in the given array | ||
| #' or map column. | ||
| #' | ||
| #' @rdname posexplode | ||
| #' @name posexplode | ||
| #' @family collection functions | ||
| #' @aliases posexplode,Column-method | ||
| #' @rdname column_collection_functions | ||
| #' @aliases posexplode posexplode,Column-method | ||
| #' @export | ||
| #' @examples \dontrun{posexplode(df$c)} | ||
| #' @note posexplode since 2.1.0 | ||
| setMethod("posexplode", | ||
| signature(x = "Column"), | ||
|
|
@@ -3394,27 +3381,24 @@ setMethod("repeat_string", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' explode_outer | ||
| #' | ||
| #' Creates a new row for each element in the given array or map column. | ||
| #' @details | ||
| #' \code{explode}: Creates a new row for each element in the given array or map column. | ||
| #' Unlike \code{explode}, if the array/map is \code{null} or empty | ||
| #' then \code{null} is produced. | ||
| #' | ||
| #' @param x Column to compute on | ||
| #' | ||
| #' @rdname explode_outer | ||
| #' @name explode_outer | ||
| #' @family collection functions | ||
| #' @aliases explode_outer,Column-method | ||
| #' @rdname column_collection_functions | ||
| #' @aliases explode_outer explode_outer,Column-method | ||
| #' @export | ||
| #' @examples | ||
| #' | ||
| #' \dontrun{ | ||
| #' df <- createDataFrame(data.frame( | ||
| #' df2 <- createDataFrame(data.frame( | ||
| #' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e") | ||
| #' )) | ||
| #' | ||
| #' head(select(df, df$id, explode_outer(split_string(df$text, ",")))) | ||
| #' } | ||
| #' head(select(df2, df2$id, explode_outer(split_string(df2$text, ",")))) | ||
| #' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))} | ||
| #' @note explode_outer since 2.3.0 | ||
| setMethod("explode_outer", | ||
| signature(x = "Column"), | ||
|
|
@@ -3423,27 +3407,14 @@ setMethod("explode_outer", | |
| column(jc) | ||
| }) | ||
|
|
||
| #' posexplode_outer | ||
| #' | ||
| #' Creates a new row for each element with position in the given array or map column. | ||
| #' Unlike \code{posexplode}, if the array/map is \code{null} or empty | ||
| #' @details | ||
| #' \code{posexplode_outer}: Creates a new row for each element with position in the given | ||
| #' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty | ||
| #' then the row (\code{null}, \code{null}) is produced. | ||
| #' | ||
| #' @param x Column to compute on | ||
| #' | ||
| #' @rdname posexplode_outer | ||
| #' @name posexplode_outer | ||
| #' @family collection functions | ||
| #' @aliases posexplode_outer,Column-method | ||
| #' @rdname column_collection_functions | ||
| #' @aliases posexplode_outer posexplode_outer,Column-method | ||
| #' @export | ||
| #' @examples | ||
| #' \dontrun{ | ||
| #' df <- createDataFrame(data.frame( | ||
| #' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e") | ||
| #' )) | ||
| #' | ||
| #' head(select(df, df$id, posexplode_outer(split_string(df$text, ",")))) | ||
| #' } | ||
| #' @note posexplode_outer since 2.3.0 | ||
| setMethod("posexplode_outer", | ||
| signature(x = "Column"), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit, let's improve this? I think in sort_array we could be more clear, eg.
sort_array(tmp$v1, asc = FALSE)