From cf2d583353bb28e314172b087dc5f221a8dffa1d Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Thu, 24 Nov 2022 18:08:56 +1300
Subject: [PATCH 1/3] add config flag

---
 .../sql/catalyst/json/JacksonParser.scala     | 10 +--
 .../apache/spark/sql/internal/SQLConf.scala   | 11 +++
 .../apache/spark/sql/JsonFunctionsSuite.scala | 67 +++++++++++++++----
 .../datasources/json/JsonSuite.scala          | 25 +++++--
 4 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 609fe9bc903a6..ee21a1e2b7602 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -96,6 +96,8 @@ class JacksonParser(
           options.dateFormatInRead.isEmpty
       }
 
+  private val enablePartialResults = SQLConf.get.jsonEnablePartialResults
+
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
    * to a value according to a desired schema. This is a wrapper for the method
@@ -456,7 +458,7 @@ class JacksonParser(
             schema.existenceDefaultsBitmask(index) = false
           } catch {
             case e: SparkUpgradeException => throw e
-            case NonFatal(e) =>
+            case NonFatal(e) if isRoot || enablePartialResults =>
               badRecordException = badRecordException.orElse(Some(e))
               parser.skipChildren()
           }
@@ -489,10 +491,10 @@ class JacksonParser(
       try {
         values += fieldConverter.apply(parser)
       } catch {
-        case PartialResultException(row, cause) =>
+        case PartialResultException(row, cause) if enablePartialResults =>
           badRecordException = badRecordException.orElse(Some(cause))
           values += row
-        case NonFatal(e) =>
+        case NonFatal(e) if enablePartialResults =>
           badRecordException = badRecordException.orElse(Some(e))
           parser.skipChildren()
       }
@@ -525,7 +527,7 @@ class JacksonParser(
         if (isRoot && v == null) throw QueryExecutionErrors.rootConverterReturnNullError()
         values += v
       } catch {
-        case PartialResultException(row, cause) =>
+        case PartialResultException(row, cause) if enablePartialResults =>
           badRecordException = badRecordException.orElse(Some(cause))
           values += row
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 84d78f365acbc..792090f7261cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3629,6 +3629,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val JSON_ENABLE_PARTIAL_RESULTS =
+    buildConf("spark.sql.json.enablePartialResults")
+      .internal()
+      .doc("When set to true, enables partial results for structs, maps, and arrays in JSON " +
+        "when one or more fields do not match the schema")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
     buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")
       .internal()
@@ -4772,6 +4781,8 @@ class SQLConf extends Serializable with Logging {
 
   def avroFilterPushDown: Boolean = getConf(AVRO_FILTER_PUSHDOWN_ENABLED)
 
+  def jsonEnablePartialResults: Boolean = getConf(JSON_ENABLE_PARTIAL_RESULTS)
+
   def jsonEnableDateTimeParsingFallback: Option[Boolean] =
     getConf(LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 99d5fc60cad7e..399665c0de696 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -920,8 +920,17 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df1.select(from_json($"c0", st)), Row(Row(123456, null)))
     val df2 = Seq("""{"data": {"c2": [19], "c1": 123456}}""").toDF("c0")
     checkAnswer(df2.select(from_json($"c0", new StructType().add("data", st))), Row(Row(null)))
-    val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
-    checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(Array(Row(123456, null))))
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
+      checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(Array(Row(123456, null))))
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
+      checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(null))
+    }
+
     val df4 = Seq("""{"c2": [19]}""").toDF("c0")
     checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null))
   }
@@ -933,10 +942,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
     // "c2" is expected to be an array of structs but it is a struct in the data.
     val df = Seq("""[{"c2": {"a": 1}, "c1": "abc"}]""").toDF("c0")
-    checkAnswer(
-      df.select(from_json($"c0", ArrayType(st))),
-      Row(Array(Row("abc", null)))
-    )
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(Array(Row("abc", null)))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(null)
+      )
+    }
   }
 
   test("SPARK-40646: return partial results for JSON maps") {
@@ -946,10 +965,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
     // Map "c2" has "k2" key that is a string, not an integer.
     val df = Seq("""{"c1": {"k1": 1, "k2": "A", "k3": 3}, "c2": "abc"}""").toDF("c0")
-    checkAnswer(
-      df.select(from_json($"c0", st)),
-      Row(Row(null, "abc"))
-    )
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", st)),
+        Row(Row(null, "abc"))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", st)),
+        Row(Row(null, null))
+      )
+    }
   }
 
   test("SPARK-40646: return partial results for JSON arrays") {
@@ -990,10 +1019,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     // Value "a" cannot be parsed as an integer,
     // the error cascades to "c2", thus making its value null.
     val df = Seq("""[{"c1": [{"c2": ["a"]}]}]""").toDF("c0")
-    checkAnswer(
-      df.select(from_json($"c0", ArrayType(st))),
-      Row(Array(Row(null)))
-    )
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(Array(Row(null)))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(null)
+      )
+    }
   }
 
   test("SPARK-33270: infers schema for JSON field with spaces and pass them to from_json") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 6cf5ec74ab0e9..0d2c98316e779 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -3393,14 +3393,25 @@ abstract class JsonSuite
         .repartition(1)
         .write.text(path.getAbsolutePath)
 
-      val df = spark.read
-        .schema("a struct<x: int, y: struct<x: int>>, b struct<x: int>")
-        .json(path.getAbsolutePath)
+      for (enablePartialResults <- Seq(true, false)) {
+        withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> s"$enablePartialResults") {
+          val df = spark.read
+            .schema("a struct<x: int, y: struct<x: int>>, b struct<x: int>")
+            .json(path.getAbsolutePath)
 
-      checkAnswer(
-        df,
-        Seq(Row(null, Row(1)), Row(Row(2, null), Row(2)))
-      )
+          if (enablePartialResults) {
+            checkAnswer(
+              df,
+              Seq(Row(null, Row(1)), Row(Row(2, null), Row(2)))
+            )
+          } else {
+            checkAnswer(
+              df,
+              Seq(Row(null, null), Row(Row(2, null), Row(2)))
+            )
+          }
+        }
+      }
     }
   }
 

From 46a952b47d519cba7d46574feb546ffeaf43c643 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Tue, 13 Dec 2022 11:33:26 +1300
Subject: [PATCH 2/3] update benchmark results

---
 sql/core/benchmarks/JsonBenchmark-results.txt | 155 ++++++++----------
 1 file changed, 70 insertions(+), 85 deletions(-)

diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
index 36c87ac1fac75..517aca7fa837f 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -1,120 +1,105 @@
-================================================================================================
-Benchmark for performance of JSON parsing
-================================================================================================
-
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3363           3446          79          1.5         672.7       1.0X
-UTF-8 is set                                       4894           4976          72          1.0         978.7       0.7X
+No encoding                                        2545           2616          65          2.0         509.0       1.0X
+UTF-8 is set                                       3845           3854           8          1.3         768.9       0.7X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3088           3123          32          1.6         617.6       1.0X
-UTF-8 is set                                       4854           4938          87          1.0         970.9       0.6X
+No encoding                                        2130           2176          41          2.3         426.0       1.0X
+UTF-8 is set                                       3907           3911           4          1.3         781.3       0.5X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        6411           7338        1497          0.2        6411.2       1.0X
-UTF-8 is set                                      10589          10644          58          0.1       10589.1       0.6X
+No encoding                                        5032           5068          50          0.2        5032.3       1.0X
+UTF-8 is set                                       8304           8349          40          0.1        8304.3       0.6X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       12862          13165         263          0.0      257239.1       1.0X
-UTF-8 is set                                      14792          15110         371          0.0      295834.1       0.9X
+No encoding                                       10782          10872          78          0.0      215647.2       1.0X
+UTF-8 is set                                      12514          12560          41          0.0      250277.3       0.9X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  2352           2369          17          0.4        2351.8       1.0X
-Select 1 column                                    2680           2683           5          0.4        2680.0       0.9X
+Select 10 columns                                  1901           1903           2          0.5        1901.0       1.0X
+Select 1 column                                    1493           1501           8          0.7        1493.3       1.3X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       884            887           2          1.1         884.1       1.0X
-Short column with UTF-8                            1193           1202           8          0.8        1192.6       0.7X
-Wide column without encoding                      12289          12448         170          0.1       12289.3       0.1X
-Wide column with UTF-8                            16609          16663          79          0.1       16608.6       0.1X
+Short column without encoding                       697            700           3          1.4         697.2       1.0X
+Short column with UTF-8                             979            979           0          1.0         978.7       0.7X
+Wide column without encoding                      10365          10403          51          0.1       10364.5       0.1X
+Wide column with UTF-8                            15209          15226          15          0.1       15208.7       0.0X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           147            148           0          6.8         147.2       1.0X
-from_json                                          2201           2202           1          0.5        2200.7       0.1X
-json_tuple                                         2452           2473          20          0.4        2452.5       0.1X
-get_json_object                                    2248           2263          22          0.4        2248.2       0.1X
+Text read                                           120            123           4          8.3         120.2       1.0X
+from_json                                          1944           1957          21          0.5        1944.4       0.1X
+json_tuple                                         2142           2146           4          0.5        2141.6       0.1X
+get_json_object                                    1967           1969           2          0.5        1966.7       0.1X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           647            654           7          7.7         129.4       1.0X
-schema inferring                                   2842           2862          25          1.8         568.4       0.2X
-parsing                                            3213           3239          33          1.6         642.6       0.2X
+Text read                                           537            542           4          9.3         107.5       1.0X
+schema inferring                                   2319           2323           4          2.2         463.7       0.2X
+parsing                                            2828           2854          29          1.8         565.6       0.2X
 
-Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          1046           1058          12          4.8         209.3       1.0X
-Schema inferring                                   3321           3378          58          1.5         664.2       0.3X
-Parsing without charset                            3751           3791          36          1.3         750.2       0.3X
-Parsing with UTF-8                                 5361           5403          37          0.9        1072.1       0.2X
+Text read                                           798            811          16          6.3         159.6       1.0X
+Schema inferring                                   2774           2781          10          1.8         554.9       0.3X
+Parsing without charset                            3213           3218           7          1.6         642.7       0.2X
+Parsing with UTF-8                                 4574           4588          13          1.1         914.7       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      171            173           2          5.8         171.3       1.0X
-to_json(timestamp)                                 1414           1427          12          0.7        1414.0       0.1X
-write timestamps to files                          1183           1211          40          0.8        1183.2       0.1X
-Create a dataset of dates                           191            198           7          5.2         191.5       0.9X
-to_json(date)                                       934            945          16          1.1         934.1       0.2X
-write dates to files                                727            748          22          1.4         726.9       0.2X
+Create a dataset of timestamps                      143            144           2          7.0         142.7       1.0X
+to_json(timestamp)                                 1075           1079           7          0.9        1074.9       0.1X
+write timestamps to files                           928            932           4          1.1         928.1       0.2X
+Create a dataset of dates                           165            170           4          6.1         165.2       0.9X
+to_json(date)                                       739            742           3          1.4         739.0       0.2X
+write dates to files                                573            576           4          1.7         573.4       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      263            264           1          3.8         262.8       1.0X
-read timestamps from files                         2743           2807          59          0.4        2742.9       0.1X
-infer timestamps from files                       14799          15093         383          0.1       14799.3       0.0X
-read date text from files                           245            253           8          4.1         245.5       1.1X
-read date from files                                998           1008           9          1.0         998.4       0.3X
-timestamp strings                                   383            403          17          2.6         382.8       0.7X
-parse timestamps from Dataset[String]              3165           3185          17          0.3        3165.4       0.1X
-infer timestamps from Dataset[String]             15717          15830         147          0.1       15717.2       0.0X
-date strings                                        434            450          19          2.3         433.5       0.6X
-parse dates from Dataset[String]                   1466           1472           7          0.7        1465.6       0.2X
-from_json(timestamp)                               4682           4736          50          0.2        4681.9       0.1X
-from_json(date)                                    2823           2848          22          0.4        2822.6       0.1X
+read timestamp text from files                      215            220           5          4.6         215.2       1.0X
+read timestamps from files                         2389           2424          31          0.4        2388.8       0.1X
+infer timestamps from files                        6115           6122          11          0.2        6115.4       0.0X
+read date text from files                           191            193           2          5.2         191.4       1.1X
+read date from files                                840            841           2          1.2         839.7       0.3X
+timestamp strings                                   301            306           4          3.3         300.8       0.7X
+parse timestamps from Dataset[String]              2706           2713           6          0.4        2706.1       0.1X
+infer timestamps from Dataset[String]              6476           6482           5          0.2        6475.9       0.0X
+date strings                                        343            343           0          2.9         342.5       0.6X
+parse dates from Dataset[String]                   1169           1172           5          0.9        1168.6       0.2X
+from_json(timestamp)                               4067           4074           7          0.2        4066.5       0.1X
+from_json(date)                                    2470           2472           3          0.4        2469.9       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
+Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       21058          21148         143          0.0      210582.1       1.0X
-pushdown disabled                                 20208          20464         226          0.0      202080.3       1.0X
-w/ filters                                          750            756           6          0.1        7499.1      28.1X
-
-
+w/o filters                                       18219          18230          18          0.0      182188.8       1.0X
+pushdown disabled                                 17180          17183           4          0.0      171798.7       1.1X
+w/ filters                                         1197           1219          22          0.1       11974.0      15.2X

From d85134c86dac590b3fdd04cac5b7789aeeed5ed1 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Wed, 14 Dec 2022 10:47:03 +1300
Subject: [PATCH 3/3] enable by default

---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 792090f7261cf..3c8c0d34e8d7c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3636,7 +3636,7 @@ object SQLConf {
         "when one or more fields do not match the schema")
       .version("3.4.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
     buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")