diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index de7e1160185d..a1e3a84bd045 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -22,6 +22,7 @@ import java.io._ import scala.util.parsing.combinator.RegexParsers import com.fasterxml.jackson.core._ +import com.fasterxml.jackson.core.json.JsonReadFeature import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult @@ -99,10 +100,10 @@ private[this] object JsonPathParser extends RegexParsers { } private[this] object SharedFactory { - val jsonFactory = new JsonFactory() - - // Enabled for Hive compatibility - jsonFactory.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS) + val jsonFactory = new JsonFactoryBuilder() + // Enabled for Hive compatibility + .enable(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS) + .build() } /** @@ -756,11 +757,7 @@ case class SchemaOfJson( private lazy val jsonOptions = new JSONOptions(options, "UTC") @transient - private lazy val jsonFactory = { - val factory = new JsonFactory() - jsonOptions.setJacksonOptions(factory) - factory - } + private lazy val jsonFactory = jsonOptions.buildJsonFactory() @transient private lazy val jsonInferSchema = new JsonInferSchema(jsonOptions) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index 4952540f1132..cdf4b4689e82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -21,7 +21,8 @@ import java.nio.charset.{Charset, StandardCharsets} import java.time.ZoneId import java.util.Locale -import com.fasterxml.jackson.core.{JsonFactory, JsonParser} +import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder} +import com.fasterxml.jackson.core.json.JsonReadFeature import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util._ @@ -30,7 +31,7 @@ import org.apache.spark.sql.internal.SQLConf /** * Options for parsing JSON data into Spark SQL rows. * - * Most of these map directly to Jackson's internal options, specified in [[JsonParser.Feature]]. + * Most of these map directly to Jackson's internal options, specified in [[JsonReadFeature]]. */ private[sql] class JSONOptions( @transient val parameters: CaseInsensitiveMap[String], @@ -129,16 +130,19 @@ private[sql] class JSONOptions( */ val inferTimestamp: Boolean = parameters.get("inferTimestamp").map(_.toBoolean).getOrElse(true) - /** Sets config options on a Jackson [[JsonFactory]]. */ - def setJacksonOptions(factory: JsonFactory): Unit = { - factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) - factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames) - factory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, allowSingleQuotes) - factory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, allowNumericLeadingZeros) - factory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers) - factory.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, - allowBackslashEscapingAnyCharacter) - factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, allowUnquotedControlChars) + /** Build a Jackson [[JsonFactory]] using JSON options. */ + def buildJsonFactory(): JsonFactory = { + new JsonFactoryBuilder() + .configure(JsonReadFeature.ALLOW_JAVA_COMMENTS, allowComments) + .configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames) + .configure(JsonReadFeature.ALLOW_SINGLE_QUOTES, allowSingleQuotes) + .configure(JsonReadFeature.ALLOW_LEADING_ZEROS_FOR_NUMBERS, allowNumericLeadingZeros) + .configure(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers) + .configure( + JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, + allowBackslashEscapingAnyCharacter) + .configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS, allowUnquotedControlChars) + .build() } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index b534b5a3d2d6..ead26665bd6e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -52,8 +52,7 @@ class JacksonParser( // `ValueConverter`s for the root schema for all fields in the schema private val rootConverter = makeRootConverter(schema) - private val factory = new JsonFactory() - options.setJacksonOptions(factory) + private val factory = options.buildJsonFactory() private val timestampFormatter = TimestampFormatter( options.timestampFormat, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala index c5a97c7b8835..f030955ee6e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala @@ -57,8 +57,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable { // In each RDD partition, perform schema inference on each row and merge afterwards. val typeMerger = JsonInferSchema.compatibleRootType(columnNameOfCorruptRecord, parseMode) val mergedTypesFromPartitions = json.mapPartitions { iter => - val factory = new JsonFactory() - options.setJacksonOptions(factory) + val factory = options.buildJsonFactory() iter.flatMap { row => try { Utils.tryWithResource(createParser(factory, row)) { parser => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala index 8ce45f06ba65..a48e61861c15 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala @@ -28,8 +28,7 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper { def checkType(options: Map[String, String], json: String, dt: DataType): Unit = { val jsonOptions = new JSONOptions(options, "UTC", "") val inferSchema = new JsonInferSchema(jsonOptions) - val factory = new JsonFactory() - jsonOptions.setJacksonOptions(factory) + val factory = jsonOptions.buildJsonFactory() val parser = CreateJacksonParser.string(factory, json) parser.nextToken() val expectedType = StructType(Seq(StructField("a", dt, true))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala index bafb6769af69..7592809d7c85 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala @@ -103,7 +103,7 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession { } // The following two tests are not really working - need to look into Jackson's - // JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS. + // JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS. ignore("allowNonNumericNumbers off") { val str = """{"age": NaN}""" val df = spark.read.json(Seq(str).toDS())