-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-31008][SQL] Support json_array_length function #27759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
79cc912
243fead
7d6d225
5d782ce
5f0d010
4e06ff2
27a5f95
3bcc1cd
593dcbb
b2c4349
b0c51dc
391f33d
313151f
f44e24e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -796,3 +796,71 @@ case class SchemaOfJson( | |||||||||||||
|
|
||||||||||||||
| override def prettyName: String = "schema_of_json" | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * A function that returns the number of elements in the outmost JSON array. | ||||||||||||||
| */ | ||||||||||||||
| @ExpressionDescription( | ||||||||||||||
| usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.", | ||||||||||||||
| arguments = """ | ||||||||||||||
| Arguments: | ||||||||||||||
| * jsonArray - A JSON array. `NULL` is returned in case of any other valid JSON string, | ||||||||||||||
| `NULL` or an invalid JSON. | ||||||||||||||
| """, | ||||||||||||||
| examples = """ | ||||||||||||||
| Examples: | ||||||||||||||
| > SELECT _FUNC_('[1,2,3,4]'); | ||||||||||||||
| 4 | ||||||||||||||
| > SELECT _FUNC_('[1,2,3,{"f1":1,"f2":[5,6]},4]'); | ||||||||||||||
| 5 | ||||||||||||||
| > SELECT _FUNC_('[1,2'); | ||||||||||||||
| NULL | ||||||||||||||
| """, | ||||||||||||||
| since = "3.1.0" | ||||||||||||||
| ) | ||||||||||||||
| case class LengthOfJsonArray(child: Expression) extends UnaryExpression | ||||||||||||||
| with CodegenFallback with ExpectsInputTypes { | ||||||||||||||
|
|
||||||||||||||
| override def inputTypes: Seq[DataType] = Seq(StringType) | ||||||||||||||
| override def dataType: DataType = IntegerType | ||||||||||||||
| override def nullable: Boolean = true | ||||||||||||||
| override def prettyName: String = "json_array_length" | ||||||||||||||
|
|
||||||||||||||
| override def eval(input: InternalRow): Any = { | ||||||||||||||
| val json = child.eval(input).asInstanceOf[UTF8String] | ||||||||||||||
| // return null for null input | ||||||||||||||
| if (json == null) { | ||||||||||||||
| return null | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| try { | ||||||||||||||
| Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) { | ||||||||||||||
| parser => { | ||||||||||||||
| // return null if null array is encountered. | ||||||||||||||
| if (parser.nextToken() == null) { | ||||||||||||||
| return null | ||||||||||||||
| } | ||||||||||||||
| // Parse the array to compute its length. | ||||||||||||||
| parseCounter(parser, input) | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } catch { | ||||||||||||||
| case _: JsonProcessingException | _: IOException => null | ||||||||||||||
dongjoon-hyun marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| private def parseCounter(parser: JsonParser, input: InternalRow): Any = { | ||||||||||||||
| var length = 0 | ||||||||||||||
| // Only JSON array are supported for this function. | ||||||||||||||
| if (parser.currentToken != JsonToken.START_ARRAY) { | ||||||||||||||
| return null | ||||||||||||||
| } | ||||||||||||||
| // Keep traversing until the end of JSON array | ||||||||||||||
| while(parser.nextToken() != JsonToken.END_ARRAY) { | ||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can nextToken return null? Looks like it can: spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala Lines 28 to 33 in 8e280ce
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It returns null when end of input is reached. |
||||||||||||||
| length += 1 | ||||||||||||||
| // skip all the child of inner object or array | ||||||||||||||
| parser.skipChildren() | ||||||||||||||
dongjoon-hyun marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| } | ||||||||||||||
| length | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.