From 79cc91268ad2f6957d62ad6983458ace9d4e21a5 Mon Sep 17 00:00:00 2001 From: iRakson Date: Mon, 2 Mar 2020 16:30:38 +0530 Subject: [PATCH 01/14] [SPARK-31008]Support json_array_length function --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/jsonExpressions.scala | 57 ++++++++++++++++ .../expressions/JsonExpressionsSuite.scala | 26 +++++++ .../org/apache/spark/sql/functions.scala | 11 +++ .../sql-tests/inputs/json-functions.sql | 11 +++ .../sql-tests/results/json-functions.sql.out | 67 ++++++++++++++++++- 6 files changed, 172 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index c11186ebc0745..fb51fa743975c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -538,6 +538,7 @@ object FunctionRegistry { expression[StructsToJson]("to_json"), expression[JsonToStructs]("from_json"), expression[SchemaOfJson]("schema_of_json"), + expression[LengthOfJsonArray]("json_array_length"), // cast expression[Cast]("cast"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 4c2a511c11166..69f86e90b26bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -24,6 +24,7 @@ import scala.util.parsing.combinator.RegexParsers import com.fasterxml.jackson.core._ import com.fasterxml.jackson.core.json.JsonReadFeature +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -796,3 +797,59 @@ case class SchemaOfJson( override def prettyName: String = "schema_of_json" } + +/** + * A function that returns number of elements in outer Json Array. + */ +@ExpressionDescription( + usage = "_FUNC_(jsonArray) - Returns length of the jsonArray", + examples = """ + Examples: + > SELECT _FUNC_('[1,2,3,4]'); + 4 + > SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]'); + 5 + """, + since = "3.0.0" +) +case class LengthOfJsonArray(child: Expression) + extends UnaryExpression with CodegenFallback { + override def dataType: DataType = IntegerType + override def nullable: Boolean = true + override def prettyName: String = "json_array_length" + + override def eval(input: InternalRow): Any = { + @transient + val json = child.eval(input).asInstanceOf[UTF8String] + try { + Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) { + parser => { + // return null if null array is encountered. + if (parser.nextToken() == null) { + return null + } + // Parse the array to compute its length. + parseCounter(parser, input) + } + } + } catch { + case _: JsonProcessingException => null + } + } + + private def parseCounter(parser: JsonParser, input: InternalRow): Int = { + // Counter for length of array + var array_length: Int = 0; + // Only json array are supported for this function. + if (parser.getCurrentToken != JsonToken.START_ARRAY) { + throw new AnalysisException(s"$prettyName can only be called on Json Array.") + } + // Keep traversing until the end of Json Array + while(parser.nextToken() != JsonToken.END_ARRAY) { + array_length += 1 + // skip all the child of inner object or array + parser.skipChildren() + } + array_length + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 032e0ac61884b..7a00549588244 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -790,4 +790,30 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with checkDecimalInfer(_, """struct""") } } + + test("Length of Json Array") { + val null_json_array = """""" + val simple_json_array = """[1,2,3]""" + val empty_json_array = """[]""" + val json_array_of_array = """[[1],[2,3],[]]""" + val json_array_of_objects = """[{"a":123},{"b":"hello"}]""" + val complex_json_array = """[1,2,3,[33,44],{"key":[2,3,4]}]""" + val not_a_json_array = """{"key":"not a json array"}""" + val invalid_json_array = """[1,2,3,4,5""" + + checkEvaluation(LengthOfJsonArray(Literal(null_json_array)), null) + checkEvaluation(LengthOfJsonArray(Literal(simple_json_array)), 3) + checkEvaluation(LengthOfJsonArray(Literal(empty_json_array)), 0) + checkEvaluation(LengthOfJsonArray(Literal(json_array_of_array)), 3) + checkEvaluation(LengthOfJsonArray(Literal(json_array_of_objects)), 2) + checkEvaluation(LengthOfJsonArray(Literal(complex_json_array)), 5) + checkEvaluation(LengthOfJsonArray(Literal(invalid_json_array)), null) + + val exception = intercept[TestFailedException]{ + checkEvaluation(LengthOfJsonArray(Literal(not_a_json_array)), null) + }.getCause + + assert(exception.isInstanceOf[AnalysisException]) + assert(exception.getMessage.contains("can only be called on Json Array")) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 653e1a739aaf1..3ee328f71d272 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -4074,6 +4074,17 @@ object functions { def to_json(e: Column): Column = to_json(e, Map.empty[String, String]) + /** + * Returns length of json array if a valid json array is passed. + * + * @param e a string column containing a json array + * @return Length of json array + * @since 3.0.0 + */ + def json_array_legth(e: Column): Column = withExpr { + LengthOfJsonArray(e.expr) + } + /** * Returns length of array or map. * diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql index 6c14eee2e4e61..b9fba39a9fa9e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql @@ -58,5 +58,16 @@ select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'tr select schema_of_json(null); CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'); SELECT schema_of_json(jsonField) FROM jsonTable; + +-- json_array_length +select json_array_length(''); +select json_array_length('[]'); +select json_array_length('[1,2,3]'); +select json_array_length('[[1,2],[5,6,7]]'); +select json_array_length('[{"a":123},{"b":"hello"}]'); +select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]'); +select json_array_length('{"key":"not a json array"}'); +select json_array_length('[1,2,3,4,5'); + -- Clean up DROP VIEW IF EXISTS jsonTable; diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 920b45a8fa77c..922afe40250bc 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 42 +-- Number of queries: 50 -- !query @@ -346,6 +346,71 @@ org.apache.spark.sql.AnalysisException cannot resolve 'schema_of_json(jsontable.`jsonField`)' due to data type mismatch: The input json should be a foldable string expression and not null; however, got jsontable.`jsonField`.; line 1 pos 7 +-- !query +select json_array_length('') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select json_array_length('[]') +-- !query schema +struct +-- !query output +0 + + +-- !query +select json_array_length('[1,2,3]') +-- !query schema +struct +-- !query output +3 + + +-- !query +select json_array_length('[[1,2],[5,6,7]]') +-- !query schema +struct +-- !query output +2 + + +-- !query +select json_array_length('[{"a":123},{"b":"hello"}]') +-- !query schema +struct +-- !query output +2 + + +-- !query +select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]') +-- !query schema +struct +-- !query output +5 + + +-- !query +select json_array_length('{"key":"not a json array"}') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +json_array_length can only be called on Json Array.; + + +-- !query +select json_array_length('[1,2,3,4,5') +-- !query schema +struct +-- !query output +NULL + + -- !query DROP VIEW IF EXISTS jsonTable -- !query schema From 243fead78adc98816b5c8b1545449777f27ba47d Mon Sep 17 00:00:00 2001 From: iRakson Date: Wed, 4 Mar 2020 11:05:32 +0530 Subject: [PATCH 02/14] review comments fix --- .../sql/catalyst/expressions/jsonExpressions.scala | 8 +++++++- .../main/scala/org/apache/spark/sql/functions.scala | 11 ----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 69f86e90b26bf..9fa8f8d99c914 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -803,14 +803,20 @@ case class SchemaOfJson( */ @ExpressionDescription( usage = "_FUNC_(jsonArray) - Returns length of the jsonArray", + arguments = """ + jsonArray - A JSON array is required as argument. `Analysis Exception` is thrown if any other + valid JSON expression is passed. `NULL` is returned in case of invalid JSON. + """, examples = """ Examples: > SELECT _FUNC_('[1,2,3,4]'); 4 > SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]'); 5 + > SELECT _FUNC_('[1,2'); + NULL """, - since = "3.0.0" + since = "3.1.0" ) case class LengthOfJsonArray(child: Expression) extends UnaryExpression with CodegenFallback { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 3ee328f71d272..653e1a739aaf1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -4074,17 +4074,6 @@ object functions { def to_json(e: Column): Column = to_json(e, Map.empty[String, String]) - /** - * Returns length of json array if a valid json array is passed. - * - * @param e a string column containing a json array - * @return Length of json array - * @since 3.0.0 - */ - def json_array_legth(e: Column): Column = withExpr { - LengthOfJsonArray(e.expr) - } - /** * Returns length of array or map. * From 7d6d2250d5e146a97c39d7a66cb73cf2ee4abe0b Mon Sep 17 00:00:00 2001 From: iRakson Date: Wed, 4 Mar 2020 18:12:55 +0530 Subject: [PATCH 03/14] review comments fix --- .../catalyst/expressions/jsonExpressions.scala | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 9fa8f8d99c914..b8fd12448f125 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -825,7 +825,6 @@ case class LengthOfJsonArray(child: Expression) override def prettyName: String = "json_array_length" override def eval(input: InternalRow): Any = { - @transient val json = child.eval(input).asInstanceOf[UTF8String] try { Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) { @@ -839,23 +838,26 @@ case class LengthOfJsonArray(child: Expression) } } } catch { - case _: JsonProcessingException => null + case _: JsonProcessingException | _: IOException => null } } private def parseCounter(parser: JsonParser, input: InternalRow): Int = { - // Counter for length of array - var array_length: Int = 0; + var length: Int = 0; // Only json array are supported for this function. - if (parser.getCurrentToken != JsonToken.START_ARRAY) { + if (parser.currentToken != JsonToken.START_ARRAY) { throw new AnalysisException(s"$prettyName can only be called on Json Array.") } // Keep traversing until the end of Json Array while(parser.nextToken() != JsonToken.END_ARRAY) { - array_length += 1 + // Null indicates end of input. + if (parser.currentToken == null) { + throw new AnalysisException("Please provide a valid Json Array.") + } + length += 1 // skip all the child of inner object or array parser.skipChildren() } - array_length + length } } From 5d782ce44bd5488bec20ecd51da9c2b57f6473bc Mon Sep 17 00:00:00 2001 From: iRakson Date: Thu, 5 Mar 2020 11:03:45 +0530 Subject: [PATCH 04/14] review comments fix --- .../expressions/jsonExpressions.scala | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index b8fd12448f125..98800cb45d15e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -799,22 +799,23 @@ case class SchemaOfJson( } /** - * A function that returns number of elements in outer Json Array. + * A function that returns the number of elements in outer Json Array. */ @ExpressionDescription( - usage = "_FUNC_(jsonArray) - Returns length of the jsonArray", + usage = "_FUNC_(jsonArray) - Returns the number of elements in outer Json Array.", arguments = """ - jsonArray - A JSON array is required as argument. `Analysis Exception` is thrown if any other - valid JSON expression is passed. `NULL` is returned in case of invalid JSON. + Arguments: + * jsonArray - A JSON array is required as argument. `Analysis Exception` is thrown if any other + valid JSON expression is passed. `NULL` is returned in case of invalid JSON. """, examples = """ Examples: - > SELECT _FUNC_('[1,2,3,4]'); - 4 - > SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]'); - 5 - > SELECT _FUNC_('[1,2'); - NULL + > SELECT _FUNC_('[1,2,3,4]'); + 4 + > SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]'); + 5 + > SELECT _FUNC_('[1,2'); + NULL """, since = "3.1.0" ) From 5f0d010efff8ebb5e40babba3f877f25cba1da73 Mon Sep 17 00:00:00 2001 From: iRakson Date: Thu, 5 Mar 2020 11:41:30 +0530 Subject: [PATCH 05/14] scala style fix --- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 98800cb45d15e..2e9d7c0a510d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -805,8 +805,8 @@ case class SchemaOfJson( usage = "_FUNC_(jsonArray) - Returns the number of elements in outer Json Array.", arguments = """ Arguments: - * jsonArray - A JSON array is required as argument. `Analysis Exception` is thrown if any other - valid JSON expression is passed. `NULL` is returned in case of invalid JSON. + * jsonArray - A JSON array is required as argument. `Analysis Exception` is thrown if any + other valid JSON expression is passed. `NULL` is returned in case of invalid JSON. """, examples = """ Examples: From 4e06ff2bf3da7fa8bf4dcee3393d65e7d2346747 Mon Sep 17 00:00:00 2001 From: iRakson Date: Tue, 10 Mar 2020 14:32:27 +0530 Subject: [PATCH 06/14] review comment fix --- .../sql/catalyst/expressions/jsonExpressions.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 2e9d7c0a510d5..d737512ae2c2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -799,14 +799,14 @@ case class SchemaOfJson( } /** - * A function that returns the number of elements in outer Json Array. + * A function that returns the number of elements in outer JSON Array. */ @ExpressionDescription( - usage = "_FUNC_(jsonArray) - Returns the number of elements in outer Json Array.", + usage = "_FUNC_(jsonArray) - Returns the number of elements in outer JSON Array.", arguments = """ Arguments: - * jsonArray - A JSON array is required as argument. `Analysis Exception` is thrown if any - other valid JSON expression is passed. `NULL` is returned in case of invalid JSON. + * jsonArray - A JSON array is required as argument. An Exception is thrown if any + other valid JSON strings are passed. `NULL` is returned in case of invalid JSON. """, examples = """ Examples: @@ -847,13 +847,13 @@ case class LengthOfJsonArray(child: Expression) var length: Int = 0; // Only json array are supported for this function. if (parser.currentToken != JsonToken.START_ARRAY) { - throw new AnalysisException(s"$prettyName can only be called on Json Array.") + throw new AnalysisException(s"$prettyName can only be called on JSON Array.") } // Keep traversing until the end of Json Array while(parser.nextToken() != JsonToken.END_ARRAY) { // Null indicates end of input. if (parser.currentToken == null) { - throw new AnalysisException("Please provide a valid Json Array.") + throw new AnalysisException("Please provide a valid JSON Array.") } length += 1 // skip all the child of inner object or array From 27a5f95c7506f0c82036021492651bc9470147f5 Mon Sep 17 00:00:00 2001 From: iRakson Date: Wed, 11 Mar 2020 07:17:57 +0530 Subject: [PATCH 07/14] review comment fix --- .../sql/catalyst/expressions/jsonExpressions.scala | 12 ++++++------ .../catalyst/expressions/JsonExpressionsSuite.scala | 4 ++-- .../sql-tests/results/json-functions.sql.out | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index d737512ae2c2a..74a5fc09c23a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -799,10 +799,10 @@ case class SchemaOfJson( } /** - * A function that returns the number of elements in outer JSON Array. + * A function that returns the number of elements in outer JSON array. */ @ExpressionDescription( - usage = "_FUNC_(jsonArray) - Returns the number of elements in outer JSON Array.", + usage = "_FUNC_(jsonArray) - Returns the number of elements in outer JSON array.", arguments = """ Arguments: * jsonArray - A JSON array is required as argument. An Exception is thrown if any @@ -845,15 +845,15 @@ case class LengthOfJsonArray(child: Expression) private def parseCounter(parser: JsonParser, input: InternalRow): Int = { var length: Int = 0; - // Only json array are supported for this function. + // Only JSON array are supported for this function. if (parser.currentToken != JsonToken.START_ARRAY) { - throw new AnalysisException(s"$prettyName can only be called on JSON Array.") + throw new AnalysisException(s"$prettyName can only be called on JSON array.") } - // Keep traversing until the end of Json Array + // Keep traversing until the end of JSON array while(parser.nextToken() != JsonToken.END_ARRAY) { // Null indicates end of input. if (parser.currentToken == null) { - throw new AnalysisException("Please provide a valid JSON Array.") + throw new AnalysisException("Please provide a valid JSON array.") } length += 1 // skip all the child of inner object or array diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 7a00549588244..b560a62c38bcf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -791,7 +791,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with } } - test("Length of Json Array") { + test("Length of JSON array") { val null_json_array = """""" val simple_json_array = """[1,2,3]""" val empty_json_array = """[]""" @@ -814,6 +814,6 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with }.getCause assert(exception.isInstanceOf[AnalysisException]) - assert(exception.getMessage.contains("can only be called on Json Array")) + assert(exception.getMessage.contains("can only be called on JSON array")) } } diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 922afe40250bc..8602c108cd1ae 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -400,7 +400,7 @@ select json_array_length('{"key":"not a json array"}') struct<> -- !query output org.apache.spark.sql.AnalysisException -json_array_length can only be called on Json Array.; +json_array_length can only be called on JSON array.; -- !query From 3bcc1cd2078493aa79a746762442a152bec24b60 Mon Sep 17 00:00:00 2001 From: iRakson Date: Mon, 23 Mar 2020 23:08:23 +0530 Subject: [PATCH 08/14] review comment fix --- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 9 ++++----- .../sql/catalyst/expressions/JsonExpressionsSuite.scala | 2 +- .../resources/sql-tests/results/json-functions.sql.out | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 74a5fc09c23a6..fab6a61ebbe3f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -24,7 +24,6 @@ import scala.util.parsing.combinator.RegexParsers import com.fasterxml.jackson.core._ import com.fasterxml.jackson.core.json.JsonReadFeature -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -805,8 +804,8 @@ case class SchemaOfJson( usage = "_FUNC_(jsonArray) - Returns the number of elements in outer JSON array.", arguments = """ Arguments: - * jsonArray - A JSON array is required as argument. An Exception is thrown if any - other valid JSON strings are passed. `NULL` is returned in case of invalid JSON. + * jsonArray - A JSON array. An Exception is thrown if any other valid JSON strings are passed. + `NULL` is returned in case of invalid JSON. """, examples = """ Examples: @@ -847,13 +846,13 @@ case class LengthOfJsonArray(child: Expression) var length: Int = 0; // Only JSON array are supported for this function. if (parser.currentToken != JsonToken.START_ARRAY) { - throw new AnalysisException(s"$prettyName can only be called on JSON array.") + throw new IllegalArgumentException(s"$prettyName can only be called on JSON array.") } // Keep traversing until the end of JSON array while(parser.nextToken() != JsonToken.END_ARRAY) { // Null indicates end of input. if (parser.currentToken == null) { - throw new AnalysisException("Please provide a valid JSON array.") + throw new IllegalArgumentException("Please provide a valid JSON array.") } length += 1 // skip all the child of inner object or array diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index b560a62c38bcf..5099c02fe96f0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -813,7 +813,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with checkEvaluation(LengthOfJsonArray(Literal(not_a_json_array)), null) }.getCause - assert(exception.isInstanceOf[AnalysisException]) + assert(exception.isInstanceOf[IllegalArgumentException]) assert(exception.getMessage.contains("can only be called on JSON array")) } } diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 8602c108cd1ae..17898ec3e127a 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -399,7 +399,7 @@ select json_array_length('{"key":"not a json array"}') -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException +java.lang.IllegalArgumentException json_array_length can only be called on JSON array.; From 593dcbbd26f9d5914dcee386d01e968b62dcf783 Mon Sep 17 00:00:00 2001 From: iRakson Date: Tue, 24 Mar 2020 16:30:32 +0530 Subject: [PATCH 09/14] fix --- .../src/test/resources/sql-tests/results/json-functions.sql.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 17898ec3e127a..64b9c3ec892c1 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -400,7 +400,7 @@ select json_array_length('{"key":"not a json array"}') struct<> -- !query output java.lang.IllegalArgumentException -json_array_length can only be called on JSON array.; +json_array_length can only be called on JSON array. -- !query From b2c43490729a92278c491fd1bbf004bbf9ff3338 Mon Sep 17 00:00:00 2001 From: iRakson Date: Sat, 4 Apr 2020 19:51:43 +0530 Subject: [PATCH 10/14] review comment fix --- .../expressions/jsonExpressions.scala | 21 ++++++---- .../expressions/JsonExpressionsSuite.scala | 39 +++++++++---------- .../sql-tests/inputs/json-functions.sql | 3 ++ .../sql-tests/results/json-functions.sql.out | 28 ++++++++++++- .../apache/spark/sql/JsonFunctionsSuite.scala | 7 ++++ 5 files changed, 69 insertions(+), 29 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index fab6a61ebbe3f..c72f7af7b4f9e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -20,10 +20,8 @@ package org.apache.spark.sql.catalyst.expressions import java.io._ import scala.util.parsing.combinator.RegexParsers - import com.fasterxml.jackson.core._ import com.fasterxml.jackson.core.json.JsonReadFeature - import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -34,6 +32,8 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils +import scala.util.control.NonFatal + private[this] sealed trait PathInstruction private[this] object PathInstruction { private[expressions] case object Subscript extends PathInstruction @@ -804,8 +804,8 @@ case class SchemaOfJson( usage = "_FUNC_(jsonArray) - Returns the number of elements in outer JSON array.", arguments = """ Arguments: - * jsonArray - A JSON array. An Exception is thrown if any other valid JSON strings are passed. - `NULL` is returned in case of invalid JSON. + * jsonArray - A JSON array. An exception is thrown if any other valid JSON strings are passed. + `NULL` is returned in case of an invalid JSON. """, examples = """ Examples: @@ -818,14 +818,21 @@ case class SchemaOfJson( """, since = "3.1.0" ) -case class LengthOfJsonArray(child: Expression) - extends UnaryExpression with CodegenFallback { +case class LengthOfJsonArray(child: Expression) extends UnaryExpression + with CodegenFallback with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) override def dataType: DataType = IntegerType override def nullable: Boolean = true override def prettyName: String = "json_array_length" override def eval(input: InternalRow): Any = { val json = child.eval(input).asInstanceOf[UTF8String] + // return null for null input + if (json == null) { + return null + } + try { Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) { parser => { @@ -843,7 +850,7 @@ case class LengthOfJsonArray(child: Expression) } private def parseCounter(parser: JsonParser, input: InternalRow): Int = { - var length: Int = 0; + var length = 0; // Only JSON array are supported for this function. if (parser.currentToken != JsonToken.START_ARRAY) { throw new IllegalArgumentException(s"$prettyName can only be called on JSON array.") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 5099c02fe96f0..26ea9a28275a1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -792,28 +792,25 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with } test("Length of JSON array") { - val null_json_array = """""" - val simple_json_array = """[1,2,3]""" - val empty_json_array = """[]""" - val json_array_of_array = """[[1],[2,3],[]]""" - val json_array_of_objects = """[{"a":123},{"b":"hello"}]""" - val complex_json_array = """[1,2,3,[33,44],{"key":[2,3,4]}]""" + Seq( + ("""""", null), + ("""[1,2,3]""", 3), + ("""[]""", 0), + ("""[[1],[2,3],[]]""", 3), + ("""[{"a":123},{"b":"hello"}]""", 2), + ("""[1,2,3,[33,44],{"key":[2,3,4]}]""", 5), + ("""[1,2,3,4,5""", null), + ("""Random String""", null) + ).foreach{ + case(literal, expectedValue) => + checkEvaluation(LengthOfJsonArray(Literal(literal)), expectedValue) + } + val not_a_json_array = """{"key":"not a json array"}""" - val invalid_json_array = """[1,2,3,4,5""" - - checkEvaluation(LengthOfJsonArray(Literal(null_json_array)), null) - checkEvaluation(LengthOfJsonArray(Literal(simple_json_array)), 3) - checkEvaluation(LengthOfJsonArray(Literal(empty_json_array)), 0) - checkEvaluation(LengthOfJsonArray(Literal(json_array_of_array)), 3) - checkEvaluation(LengthOfJsonArray(Literal(json_array_of_objects)), 2) - checkEvaluation(LengthOfJsonArray(Literal(complex_json_array)), 5) - checkEvaluation(LengthOfJsonArray(Literal(invalid_json_array)), null) - - val exception = intercept[TestFailedException]{ - checkEvaluation(LengthOfJsonArray(Literal(not_a_json_array)), null) - }.getCause - assert(exception.isInstanceOf[IllegalArgumentException]) - assert(exception.getMessage.contains("can only be called on JSON array")) + checkExceptionInExpression[IllegalArgumentException]( + LengthOfJsonArray(Literal(not_a_json_array)), + expectedErrMsg = "json_array_length can only be called on JSON array" + ) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql index b9fba39a9fa9e..06de7982efce8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql @@ -60,6 +60,9 @@ CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, SELECT schema_of_json(jsonField) FROM jsonTable; -- json_array_length +select json_array_length(null); +select json_array_length(2); +select json_array_length(); select json_array_length(''); select json_array_length('[]'); select json_array_length('[1,2,3]'); diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 64b9c3ec892c1..5bb034d3273b2 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 50 +-- Number of queries: 53 -- !query @@ -346,6 +346,32 @@ org.apache.spark.sql.AnalysisException cannot resolve 'schema_of_json(jsontable.`jsonField`)' due to data type mismatch: The input json should be a foldable string expression and not null; however, got jsontable.`jsonField`.; line 1 pos 7 +-- !query +select json_array_length(null) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select json_array_length(2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'json_array_length(2)' due to data type mismatch: argument 1 requires string type, however, '2' is of int type.; line 1 pos 7 + + +-- !query +select json_array_length() +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function json_array_length. Expected: 1; Found: 0; line 1 pos 7 + + -- !query select json_array_length('') -- !query schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 65e1dde8ae141..0028f69fa8046 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -710,4 +710,11 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row("string"))) } + test("json_array_length") { + val df = Seq(1).toDF("json") + val errMsg = intercept[AnalysisException] { + df.selectExpr("json_array_length(json)") + }.getMessage + assert(errMsg.contains("due to data type mismatch")) + } } From b0c51dcadcf85a11d634f04c730d4546b565c5ca Mon Sep 17 00:00:00 2001 From: iRakson Date: Sat, 4 Apr 2020 19:59:52 +0530 Subject: [PATCH 11/14] fix --- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index c72f7af7b4f9e..f396605dd6fac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -20,8 +20,10 @@ package org.apache.spark.sql.catalyst.expressions import java.io._ import scala.util.parsing.combinator.RegexParsers + import com.fasterxml.jackson.core._ import com.fasterxml.jackson.core.json.JsonReadFeature + import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -32,8 +34,6 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils -import scala.util.control.NonFatal - private[this] sealed trait PathInstruction private[this] object PathInstruction { private[expressions] case object Subscript extends PathInstruction From 391f33d003998f2f5672405dc26ee96e0a1e4333 Mon Sep 17 00:00:00 2001 From: iRakson Date: Sun, 5 Apr 2020 02:25:10 +0530 Subject: [PATCH 12/14] review comment fix --- .../sql/catalyst/expressions/jsonExpressions.scala | 10 +++++----- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 8 -------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index f396605dd6fac..4a0fd4b23ff82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -798,14 +798,14 @@ case class SchemaOfJson( } /** - * A function that returns the number of elements in outer JSON array. + * A function that returns the number of elements in the outmost JSON array. */ @ExpressionDescription( - usage = "_FUNC_(jsonArray) - Returns the number of elements in outer JSON array.", + usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.", arguments = """ Arguments: * jsonArray - A JSON array. An exception is thrown if any other valid JSON strings are passed. - `NULL` is returned in case of an invalid JSON. + `NULL` is returned in case of `NULL` or an invalid JSON. """, examples = """ Examples: @@ -821,7 +821,7 @@ case class SchemaOfJson( case class LengthOfJsonArray(child: Expression) extends UnaryExpression with CodegenFallback with ExpectsInputTypes { - override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + override def inputTypes: Seq[DataType] = Seq(StringType) override def dataType: DataType = IntegerType override def nullable: Boolean = true override def prettyName: String = "json_array_length" @@ -850,7 +850,7 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression } private def parseCounter(parser: JsonParser, input: InternalRow): Int = { - var length = 0; + var length = 0 // Only JSON array are supported for this function. if (parser.currentToken != JsonToken.START_ARRAY) { throw new IllegalArgumentException(s"$prettyName can only be called on JSON array.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 0028f69fa8046..4ceaf03849735 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -709,12 +709,4 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { options.asJava)), Seq(Row("string"))) } - - test("json_array_length") { - val df = Seq(1).toDF("json") - val errMsg = intercept[AnalysisException] { - df.selectExpr("json_array_length(json)") - }.getMessage - assert(errMsg.contains("due to data type mismatch")) - } } From 313151f5964ed111b34d3c0dd03305150b2ef0b1 Mon Sep 17 00:00:00 2001 From: iRakson Date: Mon, 6 Apr 2020 11:49:05 +0530 Subject: [PATCH 13/14] review comment fix --- .../sql/catalyst/expressions/jsonExpressions.scala | 4 ---- .../catalyst/expressions/JsonExpressionsSuite.scala | 12 ++++++------ .../org/apache/spark/sql/JsonFunctionsSuite.scala | 1 + 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 4a0fd4b23ff82..82dc1e8414cb8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -857,10 +857,6 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression } // Keep traversing until the end of JSON array while(parser.nextToken() != JsonToken.END_ARRAY) { - // Null indicates end of input. - if (parser.currentToken == null) { - throw new IllegalArgumentException("Please provide a valid JSON array.") - } length += 1 // skip all the child of inner object or array parser.skipChildren() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 26ea9a28275a1..d6139bc8bf312 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -793,15 +793,15 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("Length of JSON array") { Seq( - ("""""", null), - ("""[1,2,3]""", 3), - ("""[]""", 0), - ("""[[1],[2,3],[]]""", 3), + ("", null), + ("[1,2,3]", 3), + ("[]", 0), + ("[[1],[2,3],[]]", 3), ("""[{"a":123},{"b":"hello"}]""", 2), ("""[1,2,3,[33,44],{"key":[2,3,4]}]""", 5), ("""[1,2,3,4,5""", null), - ("""Random String""", null) - ).foreach{ + ("Random String", null) + ).foreach { case(literal, expectedValue) => checkEvaluation(LengthOfJsonArray(Literal(literal)), expectedValue) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 4ceaf03849735..65e1dde8ae141 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -709,4 +709,5 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { options.asJava)), Seq(Row("string"))) } + } From f44e24ec66e11fdb71c1a9a813f04f6e37244b61 Mon Sep 17 00:00:00 2001 From: iRakson Date: Mon, 6 Apr 2020 17:55:00 +0530 Subject: [PATCH 14/14] review comment fix --- .../sql/catalyst/expressions/jsonExpressions.scala | 8 ++++---- .../catalyst/expressions/JsonExpressionsSuite.scala | 11 +++-------- .../sql-tests/results/json-functions.sql.out | 5 ++--- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 82dc1e8414cb8..fbb11de6a310e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -804,8 +804,8 @@ case class SchemaOfJson( usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.", arguments = """ Arguments: - * jsonArray - A JSON array. An exception is thrown if any other valid JSON strings are passed. - `NULL` is returned in case of `NULL` or an invalid JSON. + * jsonArray - A JSON array. `NULL` is returned in case of any other valid JSON string, + `NULL` or an invalid JSON. """, examples = """ Examples: @@ -849,11 +849,11 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression } } - private def parseCounter(parser: JsonParser, input: InternalRow): Int = { + private def parseCounter(parser: JsonParser, input: InternalRow): Any = { var length = 0 // Only JSON array are supported for this function. if (parser.currentToken != JsonToken.START_ARRAY) { - throw new IllegalArgumentException(s"$prettyName can only be called on JSON array.") + return null } // Keep traversing until the end of JSON array while(parser.nextToken() != JsonToken.END_ARRAY) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index d6139bc8bf312..7c64e74d1abe9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -800,17 +800,12 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with ("""[{"a":123},{"b":"hello"}]""", 2), ("""[1,2,3,[33,44],{"key":[2,3,4]}]""", 5), ("""[1,2,3,4,5""", null), - ("Random String", null) + ("Random String", null), + ("""{"key":"not a json array"}""", null), + ("""{"key": 25}""", null) ).foreach { case(literal, expectedValue) => checkEvaluation(LengthOfJsonArray(Literal(literal)), expectedValue) } - - val not_a_json_array = """{"key":"not a json array"}""" - - checkExceptionInExpression[IllegalArgumentException]( - LengthOfJsonArray(Literal(not_a_json_array)), - expectedErrMsg = "json_array_length can only be called on JSON array" - ) } } diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 5bb034d3273b2..135b18cd29801 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -423,10 +423,9 @@ struct -- !query select json_array_length('{"key":"not a json array"}') -- !query schema -struct<> +struct -- !query output -java.lang.IllegalArgumentException -json_array_length can only be called on JSON array. +NULL -- !query