apache · HyukjinKwon · Mar 6, 2017 · Mar 10, 2017 · Mar 10, 2017 · Mar 14, 2017
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -2437,6 +2437,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #'
 #' @param x Column containing the JSON string.
 #' @param schema a structType object to use as the schema to use when parsing the JSON string.
+#' @param asArray indicating if input string is JSON array or object.
 #' @param ... additional named properties to control how the json is parsed, accepts the same
 #'            options as the JSON data source.
 #'
@@ -2452,11 +2453,18 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #'}
 #' @note from_json since 2.2.0
 setMethod("from_json", signature(x = "Column", schema = "structType"),
-          function(x, schema, ...) {
+          function(x, schema, asArray = FALSE, ...) {
+            if (asArray) {
+              jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
+                                     "createArrayType",
  * The data type for collections of multiple values. 
  * Internally these are represented as columns that contain a ``scala.collection.Seq``. 
  * 
  * Please use `DataTypes.createArrayType()` to create a specific instance. 
  * 
  * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and 
  * `containsNull: Boolean`. The field of `elementType` is used to specify the type of 
  * array elements. The field of `containsNull` is used to specify if the array has `null` values. 
  * The data type for collections of multiple values. 
  * Internally these are represented as columns that contain a ``scala.collection.Seq``. 
  * 
  * Please use `DataTypes.createArrayType()` to create a specific instance. 
  * 
  * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and 
  * `containsNull: Boolean`. The field of `elementType` is used to specify the type of 
  * array elements. The field of `containsNull` is used to specify if the array has `null` values. 
+                                     schema$jobj)
+            } else {
+              jschema <- schema$jobj
+            }
             options <- varargsToStrEnv(...)
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "from_json",
-                              x@jc, schema$jobj, options)
+                              x@jc, jschema, options)
             column(jc)
           })
 

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1364,6 +1364,18 @@ test_that("column functions", {
   # check for unparseable
   df <- as.DataFrame(list(list("a" = "")))
   expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
+
+  # check if array type in string is correctly supported.
+  jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
+  df <- as.DataFrame(list(list("people" = jsonArr)))
+  schema <- structType(structField("name", "string"))
+  arr <- collect(select(df, alias(from_json(df$people, schema, asArray = TRUE), "arrcol")))
+  expect_equal(ncol(arr), 1)
+  expect_equal(nrow(arr), 1)
+  expect_is(arr[[1]][[1]], "list")
+  expect_equal(length(arr$arrcol[[1]]), 2)
+  expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
+  expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
 })
 
 test_that("column binary mathfunctions", {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -81,10 +81,14 @@ private[sql] object SQLUtils extends Logging {
     new JavaSparkContext(spark.sparkContext)
   }
 
-  def createStructType(fields : Seq[StructField]): StructType = {
+  def createStructType(fields: Seq[StructField]): StructType = {
     StructType(fields)
   }
 
+  def createArrayType(dataType: DataType): ArrayType = {
+    ArrayType(elementType = dataType)
+  }
+
   // Support using regex in string interpolation
   private[this] implicit class RegexContext(sc: StringContext) {
     def r: Regex = new Regex(sc.parts.mkString, sc.parts.tail.map(_ => "x"): _*)