Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2437,6 +2437,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#'
#' @param x Column containing the JSON string.
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
#' @param asArray indicating if input string is JSON array or object.
#' @param ... additional named properties to control how the json is parsed, accepts the same
#' options as the JSON data source.
#'
Expand All @@ -2452,11 +2453,18 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#'}
#' @note from_json since 2.2.0
setMethod("from_json", signature(x = "Column", schema = "structType"),
function(x, schema, ...) {
function(x, schema, asArray = FALSE, ...) {
if (asArray) {
jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
"createArrayType",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a wrapper, actually? can't we call newJObject?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sure. Let me try.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems we'd better use

DataTypes.createArrayType

per

* The data type for collections of multiple values.
* Internally these are represented as columns that contain a ``scala.collection.Seq``.
*
* Please use `DataTypes.createArrayType()` to create a specific instance.
*
* An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
* `containsNull: Boolean`. The field of `elementType` is used to specify the type of
* array elements. The field of `containsNull` is used to specify if the array has `null` values.

Let me remove that new wrapper and use the original one.

schema$jobj)
} else {
jschema <- schema$jobj
}
options <- varargsToStrEnv(...)
jc <- callJStatic("org.apache.spark.sql.functions",
"from_json",
x@jc, schema$jobj, options)
x@jc, jschema, options)
column(jc)
})

Expand Down
12 changes: 12 additions & 0 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,18 @@ test_that("column functions", {
# check for unparseable
df <- as.DataFrame(list(list("a" = "")))
expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)

# check if array type in string is correctly supported.
jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
df <- as.DataFrame(list(list("people" = jsonArr)))
schema <- structType(structField("name", "string"))
arr <- collect(select(df, alias(from_json(df$people, schema, asArray = TRUE), "arrcol")))
expect_equal(ncol(arr), 1)
expect_equal(nrow(arr), 1)
expect_is(arr[[1]][[1]], "list")
expect_equal(length(arr$arrcol[[1]]), 2)
expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
})

test_that("column binary mathfunctions", {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,14 @@ private[sql] object SQLUtils extends Logging {
new JavaSparkContext(spark.sparkContext)
}

def createStructType(fields : Seq[StructField]): StructType = {
def createStructType(fields: Seq[StructField]): StructType = {
StructType(fields)
}

def createArrayType(dataType: DataType): ArrayType = {
ArrayType(elementType = dataType)
}

// Support using regex in string interpolation
private[this] implicit class RegexContext(sc: StringContext) {
def r: Regex = new Regex(sc.parts.mkString, sc.parts.tail.map(_ => "x"): _*)
Expand Down