Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ object FunctionRegistry {
expression[StructsToJson]("to_json"),
expression[JsonToStructs]("from_json"),
expression[SchemaOfJson]("schema_of_json"),
expression[LengthOfJsonArray]("json_array_length"),

// cast
expression[Cast]("cast"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -796,3 +796,71 @@ case class SchemaOfJson(

override def prettyName: String = "schema_of_json"
}

/**
* A function that returns the number of elements in the outmost JSON array.
*/
@ExpressionDescription(
usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.",
arguments = """
Arguments:
* jsonArray - A JSON array. `NULL` is returned in case of any other valid JSON string,
`NULL` or an invalid JSON.
""",
examples = """
Examples:
> SELECT _FUNC_('[1,2,3,4]');
4
> SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]');
5
> SELECT _FUNC_('[1,2');
NULL
""",
since = "3.1.0"
)
case class LengthOfJsonArray(child: Expression) extends UnaryExpression
with CodegenFallback with ExpectsInputTypes {

override def inputTypes: Seq[DataType] = Seq(StringType)
override def dataType: DataType = IntegerType
override def nullable: Boolean = true
override def prettyName: String = "json_array_length"

override def eval(input: InternalRow): Any = {
val json = child.eval(input).asInstanceOf[UTF8String]
// return null for null input
if (json == null) {
return null
}

try {
Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) {
parser => {
// return null if null array is encountered.
if (parser.nextToken() == null) {
return null
}
// Parse the array to compute its length.
parseCounter(parser, input)
}
}
} catch {
case _: JsonProcessingException | _: IOException => null
}
}

private def parseCounter(parser: JsonParser, input: InternalRow): Any = {
var length = 0
// Only JSON array are supported for this function.
if (parser.currentToken != JsonToken.START_ARRAY) {
return null
}
// Keep traversing until the end of JSON array
while(parser.nextToken() != JsonToken.END_ARRAY) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can nextToken return null? Looks like it can:

def nextUntil(parser: JsonParser, stopOn: JsonToken): Boolean = {
parser.nextToken() match {
case null => false
case x => x != stopOn
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It returns null when end of input is reached.
If it returns null before returning END_ARRAY then our json is invalid. Invalid input was already handled.
Anyway now i will add one more check for null.

length += 1
// skip all the child of inner object or array
parser.skipChildren()
}
length
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -790,4 +790,22 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
checkDecimalInfer(_, """struct<d:decimal(7,3)>""")
}
}

test("Length of JSON array") {
Seq(
("", null),
("[1,2,3]", 3),
("[]", 0),
("[[1],[2,3],[]]", 3),
("""[{"a":123},{"b":"hello"}]""", 2),
("""[1,2,3,[33,44],{"key":[2,3,4]}]""", 5),
("""[1,2,3,4,5""", null),
("Random String", null),
("""{"key":"not a json array"}""", null),
("""{"key": 25}""", null)
).foreach {
case(literal, expectedValue) =>
checkEvaluation(LengthOfJsonArray(Literal(literal)), expectedValue)
}
}
}
14 changes: 14 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,19 @@ select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'tr
select schema_of_json(null);
CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a');
SELECT schema_of_json(jsonField) FROM jsonTable;

-- json_array_length
select json_array_length(null);
select json_array_length(2);
select json_array_length();
select json_array_length('');
select json_array_length('[]');
select json_array_length('[1,2,3]');
select json_array_length('[[1,2],[5,6,7]]');
select json_array_length('[{"a":123},{"b":"hello"}]');
select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]');
select json_array_length('{"key":"not a json array"}');
select json_array_length('[1,2,3,4,5');

-- Clean up
DROP VIEW IF EXISTS jsonTable;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 42
-- Number of queries: 53


-- !query
Expand Down Expand Up @@ -346,6 +346,96 @@ org.apache.spark.sql.AnalysisException
cannot resolve 'schema_of_json(jsontable.`jsonField`)' due to data type mismatch: The input json should be a foldable string expression and not null; however, got jsontable.`jsonField`.; line 1 pos 7


-- !query
select json_array_length(null)
-- !query schema
struct<json_array_length(CAST(NULL AS STRING)):int>
-- !query output
NULL


-- !query
select json_array_length(2)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'json_array_length(2)' due to data type mismatch: argument 1 requires string type, however, '2' is of int type.; line 1 pos 7


-- !query
select json_array_length()
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function json_array_length. Expected: 1; Found: 0; line 1 pos 7


-- !query
select json_array_length('')
-- !query schema
struct<json_array_length():int>
-- !query output
NULL


-- !query
select json_array_length('[]')
-- !query schema
struct<json_array_length([]):int>
-- !query output
0


-- !query
select json_array_length('[1,2,3]')
-- !query schema
struct<json_array_length([1,2,3]):int>
-- !query output
3


-- !query
select json_array_length('[[1,2],[5,6,7]]')
-- !query schema
struct<json_array_length([[1,2],[5,6,7]]):int>
-- !query output
2


-- !query
select json_array_length('[{"a":123},{"b":"hello"}]')
-- !query schema
struct<json_array_length([{"a":123},{"b":"hello"}]):int>
-- !query output
2


-- !query
select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]')
-- !query schema
struct<json_array_length([1,2,3,[33,44],{"key":[2,3,4]}]):int>
-- !query output
5


-- !query
select json_array_length('{"key":"not a json array"}')
-- !query schema
struct<json_array_length({"key":"not a json array"}):int>
-- !query output
NULL


-- !query
select json_array_length('[1,2,3,4,5')
-- !query schema
struct<json_array_length([1,2,3,4,5):int>
-- !query output
NULL


-- !query
DROP VIEW IF EXISTS jsonTable
-- !query schema
Expand Down