diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index c11186ebc0745..fb51fa743975c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -538,6 +538,7 @@ object FunctionRegistry { expression[StructsToJson]("to_json"), expression[JsonToStructs]("from_json"), expression[SchemaOfJson]("schema_of_json"), + expression[LengthOfJsonArray]("json_array_length"), // cast expression[Cast]("cast"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 4c2a511c11166..fbb11de6a310e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -796,3 +796,71 @@ case class SchemaOfJson( override def prettyName: String = "schema_of_json" } + +/** + * A function that returns the number of elements in the outmost JSON array. + */ +@ExpressionDescription( + usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.", + arguments = """ + Arguments: + * jsonArray - A JSON array. `NULL` is returned in case of any other valid JSON string, + `NULL` or an invalid JSON. + """, + examples = """ + Examples: + > SELECT _FUNC_('[1,2,3,4]'); + 4 + > SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]'); + 5 + > SELECT _FUNC_('[1,2'); + NULL + """, + since = "3.1.0" +) +case class LengthOfJsonArray(child: Expression) extends UnaryExpression + with CodegenFallback with ExpectsInputTypes { + + override def inputTypes: Seq[DataType] = Seq(StringType) + override def dataType: DataType = IntegerType + override def nullable: Boolean = true + override def prettyName: String = "json_array_length" + + override def eval(input: InternalRow): Any = { + val json = child.eval(input).asInstanceOf[UTF8String] + // return null for null input + if (json == null) { + return null + } + + try { + Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) { + parser => { + // return null if null array is encountered. + if (parser.nextToken() == null) { + return null + } + // Parse the array to compute its length. + parseCounter(parser, input) + } + } + } catch { + case _: JsonProcessingException | _: IOException => null + } + } + + private def parseCounter(parser: JsonParser, input: InternalRow): Any = { + var length = 0 + // Only JSON array are supported for this function. + if (parser.currentToken != JsonToken.START_ARRAY) { + return null + } + // Keep traversing until the end of JSON array + while(parser.nextToken() != JsonToken.END_ARRAY) { + length += 1 + // skip all the child of inner object or array + parser.skipChildren() + } + length + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 032e0ac61884b..7c64e74d1abe9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -790,4 +790,22 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with checkDecimalInfer(_, """struct""") } } + + test("Length of JSON array") { + Seq( + ("", null), + ("[1,2,3]", 3), + ("[]", 0), + ("[[1],[2,3],[]]", 3), + ("""[{"a":123},{"b":"hello"}]""", 2), + ("""[1,2,3,[33,44],{"key":[2,3,4]}]""", 5), + ("""[1,2,3,4,5""", null), + ("Random String", null), + ("""{"key":"not a json array"}""", null), + ("""{"key": 25}""", null) + ).foreach { + case(literal, expectedValue) => + checkEvaluation(LengthOfJsonArray(Literal(literal)), expectedValue) + } + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql index 6c14eee2e4e61..06de7982efce8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql @@ -58,5 +58,19 @@ select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'tr select schema_of_json(null); CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'); SELECT schema_of_json(jsonField) FROM jsonTable; + +-- json_array_length +select json_array_length(null); +select json_array_length(2); +select json_array_length(); +select json_array_length(''); +select json_array_length('[]'); +select json_array_length('[1,2,3]'); +select json_array_length('[[1,2],[5,6,7]]'); +select json_array_length('[{"a":123},{"b":"hello"}]'); +select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]'); +select json_array_length('{"key":"not a json array"}'); +select json_array_length('[1,2,3,4,5'); + -- Clean up DROP VIEW IF EXISTS jsonTable; diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 920b45a8fa77c..135b18cd29801 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 42 +-- Number of queries: 53 -- !query @@ -346,6 +346,96 @@ org.apache.spark.sql.AnalysisException cannot resolve 'schema_of_json(jsontable.`jsonField`)' due to data type mismatch: The input json should be a foldable string expression and not null; however, got jsontable.`jsonField`.; line 1 pos 7 +-- !query +select json_array_length(null) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select json_array_length(2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'json_array_length(2)' due to data type mismatch: argument 1 requires string type, however, '2' is of int type.; line 1 pos 7 + + +-- !query +select json_array_length() +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function json_array_length. Expected: 1; Found: 0; line 1 pos 7 + + +-- !query +select json_array_length('') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select json_array_length('[]') +-- !query schema +struct +-- !query output +0 + + +-- !query +select json_array_length('[1,2,3]') +-- !query schema +struct +-- !query output +3 + + +-- !query +select json_array_length('[[1,2],[5,6,7]]') +-- !query schema +struct +-- !query output +2 + + +-- !query +select json_array_length('[{"a":123},{"b":"hello"}]') +-- !query schema +struct +-- !query output +2 + + +-- !query +select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]') +-- !query schema +struct +-- !query output +5 + + +-- !query +select json_array_length('{"key":"not a json array"}') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select json_array_length('[1,2,3,4,5') +-- !query schema +struct +-- !query output +NULL + + -- !query DROP VIEW IF EXISTS jsonTable -- !query schema