Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ object FunctionRegistry {
expression[JsonToStructs]("from_json"),
expression[SchemaOfJson]("schema_of_json"),
expression[LengthOfJsonArray]("json_array_length"),
expression[JsonObjectKeys]("json_object_keys"),

// cast
expression[Cast]("cast"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import java.io._

import scala.collection.mutable.ArrayBuffer
import scala.util.parsing.combinator.RegexParsers

import com.fasterxml.jackson.core._
Expand Down Expand Up @@ -864,3 +865,71 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression
length
}
}

/**
* A function which returns all the keys of the outmost JSON object.
*/
@ExpressionDescription(
usage = "_FUNC_(json_object) - Returns all the keys of the outmost JSON object as an array.",
arguments = """
Arguments:
* json_object - A JSON object. If a valid JSON object is given, all the keys of the outmost
object will be returned as an array. If it is any other valid JSON string, an invalid JSON
string or an empty string, the function returns null.
""",
examples = """
Examples:
> Select _FUNC_('{}');
[]
> Select _FUNC_('{"key": "value"}');
["key"]
> Select _FUNC_('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}');
["f1","f2"]
""",
since = "3.1.0"
)
case class JsonObjectKeys(child: Expression) extends UnaryExpression with CodegenFallback
with ExpectsInputTypes {

override def inputTypes: Seq[DataType] = Seq(StringType)
override def dataType: DataType = ArrayType(StringType)
override def nullable: Boolean = true
override def prettyName: String = "json_object_keys"

override def eval(input: InternalRow): Any = {
val json = child.eval(input).asInstanceOf[UTF8String]
// return null for `NULL` input
if(json == null) {
return null
}

try {
Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) {
parser => {
// return null if an empty string or any other valid JSON string is encountered
if (parser.nextToken() == null || parser.currentToken() != JsonToken.START_OBJECT) {
return null
}
// Parse the JSON string to get all the keys of outmost JSON object
getJsonKeys(parser, input)
}
}
} catch {
case _: JsonProcessingException | _: IOException => null
}
}

private def getJsonKeys(parser: JsonParser, input: InternalRow): GenericArrayData = {
var arrayBufferOfKeys = ArrayBuffer.empty[UTF8String]

// traverse until the end of input and ensure it returns valid key
while(parser.nextValue() != null && parser.currentName() != null) {
// add current fieldName to the ArrayBuffer
arrayBufferOfKeys += UTF8String.fromString(parser.getCurrentName)

// skip all the children of inner object or array
parser.skipChildren()
}
new GenericArrayData(arrayBufferOfKeys.toArray)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -831,4 +831,26 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
checkEvaluation(LengthOfJsonArray(Literal(literal)), expectedValue)
}
}

test("json_object_keys") {
Seq(
// Invalid inputs
("", null),
("[]", null),
("""[{"key": "JSON"}]""", null),
("""{"key": 45, "random_string"}""", null),
("""{[1, 2, {"Key": "Invalid JSON"}]}""", null),
// JSON objects
("{}", Seq.empty[UTF8String]),
("""{"key": 1}""", Seq("key")),
("""{"key": "value", "key2": 2}""", Seq("key", "key2")),
("""{"arrayKey": [1, 2, 3]}""", Seq("arrayKey")),
("""{"key":[1,2,3,{"key":"value"},[1,2,3]]}""", Seq("key")),
("""{"f1":"abc","f2":{"f3":"a", "f4":"b"}}""", Seq("f1", "f2")),
("""{"k1": [1, 2, {"key": 5}], "k2": {"key2": [1, 2]}}""", Seq("k1", "k2"))
).foreach {
case (input, expected) =>
checkEvaluation(JsonObjectKeys(Literal(input)), expected)
}
}
}
16 changes: 16 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,21 @@ select json_array_length('[1,2,3,[33,44],{"key":[2,3,4]}]');
select json_array_length('{"key":"not a json array"}');
select json_array_length('[1,2,3,4,5');

-- json_object_keys
select json_object_keys();
select json_object_keys(null);
select json_object_keys(200);
select json_object_keys('');
select json_object_keys('{}');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we switch line 67 and 68?

select json_object_keys('{"key": 1}');
select json_object_keys('{"key": "value", "key2": 2}');
select json_object_keys('{"arrayKey": [1, 2, 3]}');
select json_object_keys('{"key":[1,2,3,{"key":"value"},[1,2,3]]}');
select json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}');
select json_object_keys('{"k1": [1, 2, {"key": 5}], "k2": {"key2": [1, 2]}}');
select json_object_keys('{[1,2]}');
select json_object_keys('{"key": 45, "random_string"}');
select json_object_keys('[1, 2, 3]');

-- Clean up
DROP VIEW IF EXISTS jsonTable;
116 changes: 115 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 53
-- Number of queries: 67


-- !query
Expand Down Expand Up @@ -436,6 +436,120 @@ struct<json_array_length([1,2,3,4,5):int>
NULL


-- !query
select json_object_keys()
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function json_object_keys. Expected: 1; Found: 0; line 1 pos 7


-- !query
select json_object_keys(null)
-- !query schema
struct<json_object_keys(CAST(NULL AS STRING)):array<string>>
-- !query output
NULL


-- !query
select json_object_keys(200)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'json_object_keys(200)' due to data type mismatch: argument 1 requires string type, however, '200' is of int type.; line 1 pos 7


-- !query
select json_object_keys('')
-- !query schema
struct<json_object_keys():array<string>>
-- !query output
NULL


-- !query
select json_object_keys('{}')
-- !query schema
struct<json_object_keys({}):array<string>>
-- !query output
[]


-- !query
select json_object_keys('{"key": 1}')
-- !query schema
struct<json_object_keys({"key": 1}):array<string>>
-- !query output
["key"]


-- !query
select json_object_keys('{"key": "value", "key2": 2}')
-- !query schema
struct<json_object_keys({"key": "value", "key2": 2}):array<string>>
-- !query output
["key","key2"]


-- !query
select json_object_keys('{"arrayKey": [1, 2, 3]}')
-- !query schema
struct<json_object_keys({"arrayKey": [1, 2, 3]}):array<string>>
-- !query output
["arrayKey"]


-- !query
select json_object_keys('{"key":[1,2,3,{"key":"value"},[1,2,3]]}')
-- !query schema
struct<json_object_keys({"key":[1,2,3,{"key":"value"},[1,2,3]]}):array<string>>
-- !query output
["key"]


-- !query
select json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}')
-- !query schema
struct<json_object_keys({"f1":"abc","f2":{"f3":"a", "f4":"b"}}):array<string>>
-- !query output
["f1","f2"]


-- !query
select json_object_keys('{"k1": [1, 2, {"key": 5}], "k2": {"key2": [1, 2]}}')
-- !query schema
struct<json_object_keys({"k1": [1, 2, {"key": 5}], "k2": {"key2": [1, 2]}}):array<string>>
-- !query output
["k1","k2"]


-- !query
select json_object_keys('{[1,2]}')
-- !query schema
struct<json_object_keys({[1,2]}):array<string>>
-- !query output
NULL


-- !query
select json_object_keys('{"key": 45, "random_string"}')
-- !query schema
struct<json_object_keys({"key": 45, "random_string"}):array<string>>
-- !query output
NULL


-- !query
select json_object_keys('[1, 2, 3]')
-- !query schema
struct<json_object_keys([1, 2, 3]):array<string>>
-- !query output
NULL


-- !query
DROP VIEW IF EXISTS jsonTable
-- !query schema
Expand Down