From f09c477528d92dcdad87976de3e1e733ba2de6fb Mon Sep 17 00:00:00 2001 From: setjet Date: Wed, 24 May 2017 19:22:22 +0100 Subject: [PATCH 1/3] changed function to support scala instead of java, and added java wrapper --- .../scala/org/apache/spark/sql/functions.scala | 18 +++++++++++++++++- .../apache/spark/sql/JsonFunctionsSuite.scala | 9 ++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 563eae0b6483f..40e54520b6654 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3060,7 +3060,7 @@ object functions { from_json(e, schema, Map.empty[String, String]) /** - * Parses a column containing a JSON string into a `StructType` or `ArrayType` of `StructType`s + * (Java-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` of `StructType`s * with the specified schema. Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. @@ -3072,6 +3072,22 @@ object functions { * @since 2.1.0 */ def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = { + from_json(e, schema, options.asScala.toMap) + } + + /** + * (Scala-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` of `StructType`s + * with the specified schema. Returns `null`, in the case of an unparseable string. + * + * @param e a string column containing JSON data. + * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1, + * the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL + * format is also supported for the schema. + * + * @group collection_funcs + * @since 2.1.0 + */ + def from_json(e: Column, schema: String, options: Map[String, String]): Column = { val dataType = try { DataType.fromJson(schema) } catch { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 69a500c845a7b..cf2d00fc94423 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -156,13 +156,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { Row(Seq(Row(1, "a"), Row(2, null), Row(null, null)))) } - test("from_json uses DDL strings for defining a schema") { + test("from_json uses DDL strings for defining a schema - java") { val df = Seq("""{"a": 1, "b": "haa"}""").toDS() checkAnswer( df.select(from_json($"value", "a INT, b STRING", new java.util.HashMap[String, String]())), Row(Row(1, "haa")) :: Nil) } + test("from_json uses DDL strings for defining a schema - scala") { + val df = Seq("""{"a": 1, "b": "haa"}""").toDS() + checkAnswer( + df.select(from_json($"value", "a INT, b STRING", Map[String, String]())), + Row(Row(1, "haa")) :: Nil) + } + test("to_json - struct") { val df = Seq(Tuple1(Tuple1(1))).toDF("a") From a2f99ec42d54dd37d9908590d85000376108fa7a Mon Sep 17 00:00:00 2001 From: setjet Date: Thu, 25 May 2017 18:42:04 +0100 Subject: [PATCH 2/3] updated with suggestions --- .../main/scala/org/apache/spark/sql/functions.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 40e54520b6654..9af422a01467e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3060,8 +3060,9 @@ object functions { from_json(e, schema, Map.empty[String, String]) /** - * (Java-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` of `StructType`s - * with the specified schema. Returns `null`, in the case of an unparseable string. + * (Java-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` + * of `StructType`s with the specified schema. Returns `null`, in the case of an unparseable + * string. * * @param e a string column containing JSON data. * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1, @@ -3076,8 +3077,9 @@ object functions { } /** - * (Scala-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` of `StructType`s - * with the specified schema. Returns `null`, in the case of an unparseable string. + * (Scala-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` + * of `StructType`s with the specified schema. Returns `null`, in the case of an unparseable + * string. * * @param e a string column containing JSON data. * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1, @@ -3085,7 +3087,7 @@ object functions { * format is also supported for the schema. * * @group collection_funcs - * @since 2.1.0 + * @since 2.3.0 */ def from_json(e: Column, schema: String, options: Map[String, String]): Column = { val dataType = try { From 27a8c26dac354a326a76381097a6669ff973501a Mon Sep 17 00:00:00 2001 From: setjet Date: Thu, 25 May 2017 19:01:28 +0100 Subject: [PATCH 3/3] resolved comment alignment --- .../org/apache/spark/sql/functions.scala | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 9af422a01467e..388a9213c69b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3077,18 +3077,18 @@ object functions { } /** - * (Scala-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` - * of `StructType`s with the specified schema. Returns `null`, in the case of an unparseable - * string. - * - * @param e a string column containing JSON data. - * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1, - * the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL - * format is also supported for the schema. - * - * @group collection_funcs - * @since 2.3.0 - */ + * (Scala-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType` + * of `StructType`s with the specified schema. Returns `null`, in the case of an unparseable + * string. + * + * @param e a string column containing JSON data. + * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1, + * the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL + * format is also supported for the schema. + * + * @group collection_funcs + * @since 2.3.0 + */ def from_json(e: Column, schema: String, options: Map[String, String]): Column = { val dataType = try { DataType.fromJson(schema)