From cf2d583353bb28e314172b087dc5f221a8dffa1d Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Thu, 24 Nov 2022 18:08:56 +1300 Subject: [PATCH 1/3] add config flag --- .../sql/catalyst/json/JacksonParser.scala | 10 +-- .../apache/spark/sql/internal/SQLConf.scala | 11 +++ .../apache/spark/sql/JsonFunctionsSuite.scala | 67 +++++++++++++++---- .../datasources/json/JsonSuite.scala | 25 +++++-- 4 files changed, 88 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 609fe9bc903a6..ee21a1e2b7602 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -96,6 +96,8 @@ class JacksonParser( options.dateFormatInRead.isEmpty } + private val enablePartialResults = SQLConf.get.jsonEnablePartialResults + /** * Create a converter which converts the JSON documents held by the `JsonParser` * to a value according to a desired schema. This is a wrapper for the method @@ -456,7 +458,7 @@ class JacksonParser( schema.existenceDefaultsBitmask(index) = false } catch { case e: SparkUpgradeException => throw e - case NonFatal(e) => + case NonFatal(e) if isRoot || enablePartialResults => badRecordException = badRecordException.orElse(Some(e)) parser.skipChildren() } @@ -489,10 +491,10 @@ class JacksonParser( try { values += fieldConverter.apply(parser) } catch { - case PartialResultException(row, cause) => + case PartialResultException(row, cause) if enablePartialResults => badRecordException = badRecordException.orElse(Some(cause)) values += row - case NonFatal(e) => + case NonFatal(e) if enablePartialResults => badRecordException = badRecordException.orElse(Some(e)) parser.skipChildren() } @@ -525,7 +527,7 @@ class JacksonParser( if (isRoot && v == null) throw QueryExecutionErrors.rootConverterReturnNullError() values += v } catch { - case PartialResultException(row, cause) => + case PartialResultException(row, cause) if enablePartialResults => badRecordException = badRecordException.orElse(Some(cause)) values += row } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 84d78f365acbc..792090f7261cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3629,6 +3629,15 @@ object SQLConf { .booleanConf .createWithDefault(true) + val JSON_ENABLE_PARTIAL_RESULTS = + buildConf("spark.sql.json.enablePartialResults") + .internal() + .doc("When set to true, enables partial results for structs, maps, and arrays in JSON " + + "when one or more fields do not match the schema") + .version("3.4.0") + .booleanConf + .createWithDefault(false) + val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK = buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback") .internal() @@ -4772,6 +4781,8 @@ class SQLConf extends Serializable with Logging { def avroFilterPushDown: Boolean = getConf(AVRO_FILTER_PUSHDOWN_ENABLED) + def jsonEnablePartialResults: Boolean = getConf(JSON_ENABLE_PARTIAL_RESULTS) + def jsonEnableDateTimeParsingFallback: Option[Boolean] = getConf(LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 99d5fc60cad7e..399665c0de696 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -920,8 +920,17 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df1.select(from_json($"c0", st)), Row(Row(123456, null))) val df2 = Seq("""{"data": {"c2": [19], "c1": 123456}}""").toDF("c0") checkAnswer(df2.select(from_json($"c0", new StructType().add("data", st))), Row(Row(null))) - val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0") - checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(Array(Row(123456, null)))) + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") { + val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0") + checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(Array(Row(123456, null)))) + } + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") { + val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0") + checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(null)) + } + val df4 = Seq("""{"c2": [19]}""").toDF("c0") checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null)) } @@ -933,10 +942,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { // "c2" is expected to be an array of structs but it is a struct in the data. val df = Seq("""[{"c2": {"a": 1}, "c1": "abc"}]""").toDF("c0") - checkAnswer( - df.select(from_json($"c0", ArrayType(st))), - Row(Array(Row("abc", null))) - ) + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") { + checkAnswer( + df.select(from_json($"c0", ArrayType(st))), + Row(Array(Row("abc", null))) + ) + } + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") { + checkAnswer( + df.select(from_json($"c0", ArrayType(st))), + Row(null) + ) + } } test("SPARK-40646: return partial results for JSON maps") { @@ -946,10 +965,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { // Map "c2" has "k2" key that is a string, not an integer. val df = Seq("""{"c1": {"k1": 1, "k2": "A", "k3": 3}, "c2": "abc"}""").toDF("c0") - checkAnswer( - df.select(from_json($"c0", st)), - Row(Row(null, "abc")) - ) + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") { + checkAnswer( + df.select(from_json($"c0", st)), + Row(Row(null, "abc")) + ) + } + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") { + checkAnswer( + df.select(from_json($"c0", st)), + Row(Row(null, null)) + ) + } } test("SPARK-40646: return partial results for JSON arrays") { @@ -990,10 +1019,20 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { // Value "a" cannot be parsed as an integer, // the error cascades to "c2", thus making its value null. val df = Seq("""[{"c1": [{"c2": ["a"]}]}]""").toDF("c0") - checkAnswer( - df.select(from_json($"c0", ArrayType(st))), - Row(Array(Row(null))) - ) + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") { + checkAnswer( + df.select(from_json($"c0", ArrayType(st))), + Row(Array(Row(null))) + ) + } + + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") { + checkAnswer( + df.select(from_json($"c0", ArrayType(st))), + Row(null) + ) + } } test("SPARK-33270: infers schema for JSON field with spaces and pass them to from_json") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 6cf5ec74ab0e9..0d2c98316e779 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -3393,14 +3393,25 @@ abstract class JsonSuite .repartition(1) .write.text(path.getAbsolutePath) - val df = spark.read - .schema("a struct>, b struct") - .json(path.getAbsolutePath) + for (enablePartialResults <- Seq(true, false)) { + withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> s"$enablePartialResults") { + val df = spark.read + .schema("a struct>, b struct") + .json(path.getAbsolutePath) - checkAnswer( - df, - Seq(Row(null, Row(1)), Row(Row(2, null), Row(2))) - ) + if (enablePartialResults) { + checkAnswer( + df, + Seq(Row(null, Row(1)), Row(Row(2, null), Row(2))) + ) + } else { + checkAnswer( + df, + Seq(Row(null, null), Row(Row(2, null), Row(2))) + ) + } + } + } } } From 46a952b47d519cba7d46574feb546ffeaf43c643 Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Tue, 13 Dec 2022 11:33:26 +1300 Subject: [PATCH 2/3] update benchmark results --- sql/core/benchmarks/JsonBenchmark-results.txt | 155 ++++++++---------- 1 file changed, 70 insertions(+), 85 deletions(-) diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt index 36c87ac1fac75..517aca7fa837f 100644 --- a/sql/core/benchmarks/JsonBenchmark-results.txt +++ b/sql/core/benchmarks/JsonBenchmark-results.txt @@ -1,120 +1,105 @@ -================================================================================================ -Benchmark for performance of JSON parsing -================================================================================================ - -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz JSON schema inferring: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 3363 3446 79 1.5 672.7 1.0X -UTF-8 is set 4894 4976 72 1.0 978.7 0.7X +No encoding 2545 2616 65 2.0 509.0 1.0X +UTF-8 is set 3845 3854 8 1.3 768.9 0.7X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz count a short column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 3088 3123 32 1.6 617.6 1.0X -UTF-8 is set 4854 4938 87 1.0 970.9 0.6X +No encoding 2130 2176 41 2.3 426.0 1.0X +UTF-8 is set 3907 3911 4 1.3 781.3 0.5X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz count a wide column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 6411 7338 1497 0.2 6411.2 1.0X -UTF-8 is set 10589 10644 58 0.1 10589.1 0.6X +No encoding 5032 5068 50 0.2 5032.3 1.0X +UTF-8 is set 8304 8349 40 0.1 8304.3 0.6X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz select wide row: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 12862 13165 263 0.0 257239.1 1.0X -UTF-8 is set 14792 15110 371 0.0 295834.1 0.9X +No encoding 10782 10872 78 0.0 215647.2 1.0X +UTF-8 is set 12514 12560 41 0.0 250277.3 0.9X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz Select a subset of 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns 2352 2369 17 0.4 2351.8 1.0X -Select 1 column 2680 2683 5 0.4 2680.0 0.9X +Select 10 columns 1901 1903 2 0.5 1901.0 1.0X +Select 1 column 1493 1501 8 0.7 1493.3 1.3X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz creation of JSON parser per line: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Short column without encoding 884 887 2 1.1 884.1 1.0X -Short column with UTF-8 1193 1202 8 0.8 1192.6 0.7X -Wide column without encoding 12289 12448 170 0.1 12289.3 0.1X -Wide column with UTF-8 16609 16663 79 0.1 16608.6 0.1X +Short column without encoding 697 700 3 1.4 697.2 1.0X +Short column with UTF-8 979 979 0 1.0 978.7 0.7X +Wide column without encoding 10365 10403 51 0.1 10364.5 0.1X +Wide column with UTF-8 15209 15226 15 0.1 15208.7 0.0X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz JSON functions: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 147 148 0 6.8 147.2 1.0X -from_json 2201 2202 1 0.5 2200.7 0.1X -json_tuple 2452 2473 20 0.4 2452.5 0.1X -get_json_object 2248 2263 22 0.4 2248.2 0.1X +Text read 120 123 4 8.3 120.2 1.0X +from_json 1944 1957 21 0.5 1944.4 0.1X +json_tuple 2142 2146 4 0.5 2141.6 0.1X +get_json_object 1967 1969 2 0.5 1966.7 0.1X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz Dataset of json strings: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 647 654 7 7.7 129.4 1.0X -schema inferring 2842 2862 25 1.8 568.4 0.2X -parsing 3213 3239 33 1.6 642.6 0.2X +Text read 537 542 4 9.3 107.5 1.0X +schema inferring 2319 2323 4 2.2 463.7 0.2X +parsing 2828 2854 29 1.8 565.6 0.2X -Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz Json files in the per-line mode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 1046 1058 12 4.8 209.3 1.0X -Schema inferring 3321 3378 58 1.5 664.2 0.3X -Parsing without charset 3751 3791 36 1.3 750.2 0.3X -Parsing with UTF-8 5361 5403 37 0.9 1072.1 0.2X +Text read 798 811 16 6.3 159.6 1.0X +Schema inferring 2774 2781 10 1.8 554.9 0.3X +Parsing without charset 3213 3218 7 1.6 642.7 0.2X +Parsing with UTF-8 4574 4588 13 1.1 914.7 0.2X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 171 173 2 5.8 171.3 1.0X -to_json(timestamp) 1414 1427 12 0.7 1414.0 0.1X -write timestamps to files 1183 1211 40 0.8 1183.2 0.1X -Create a dataset of dates 191 198 7 5.2 191.5 0.9X -to_json(date) 934 945 16 1.1 934.1 0.2X -write dates to files 727 748 22 1.4 726.9 0.2X +Create a dataset of timestamps 143 144 2 7.0 142.7 1.0X +to_json(timestamp) 1075 1079 7 0.9 1074.9 0.1X +write timestamps to files 928 932 4 1.1 928.1 0.2X +Create a dataset of dates 165 170 4 6.1 165.2 0.9X +to_json(date) 739 742 3 1.4 739.0 0.2X +write dates to files 573 576 4 1.7 573.4 0.2X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -read timestamp text from files 263 264 1 3.8 262.8 1.0X -read timestamps from files 2743 2807 59 0.4 2742.9 0.1X -infer timestamps from files 14799 15093 383 0.1 14799.3 0.0X -read date text from files 245 253 8 4.1 245.5 1.1X -read date from files 998 1008 9 1.0 998.4 0.3X -timestamp strings 383 403 17 2.6 382.8 0.7X -parse timestamps from Dataset[String] 3165 3185 17 0.3 3165.4 0.1X -infer timestamps from Dataset[String] 15717 15830 147 0.1 15717.2 0.0X -date strings 434 450 19 2.3 433.5 0.6X -parse dates from Dataset[String] 1466 1472 7 0.7 1465.6 0.2X -from_json(timestamp) 4682 4736 50 0.2 4681.9 0.1X -from_json(date) 2823 2848 22 0.4 2822.6 0.1X +read timestamp text from files 215 220 5 4.6 215.2 1.0X +read timestamps from files 2389 2424 31 0.4 2388.8 0.1X +infer timestamps from files 6115 6122 11 0.2 6115.4 0.0X +read date text from files 191 193 2 5.2 191.4 1.1X +read date from files 840 841 2 1.2 839.7 0.3X +timestamp strings 301 306 4 3.3 300.8 0.7X +parse timestamps from Dataset[String] 2706 2713 6 0.4 2706.1 0.1X +infer timestamps from Dataset[String] 6476 6482 5 0.2 6475.9 0.0X +date strings 343 343 0 2.9 342.5 0.6X +parse dates from Dataset[String] 1169 1172 5 0.9 1168.6 0.2X +from_json(timestamp) 4067 4074 7 0.2 4066.5 0.1X +from_json(date) 2470 2472 3 0.4 2469.9 0.1X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws +Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 21058 21148 143 0.0 210582.1 1.0X -pushdown disabled 20208 20464 226 0.0 202080.3 1.0X -w/ filters 750 756 6 0.1 7499.1 28.1X - - +w/o filters 18219 18230 18 0.0 182188.8 1.0X +pushdown disabled 17180 17183 4 0.0 171798.7 1.1X +w/ filters 1197 1219 22 0.1 11974.0 15.2X From d85134c86dac590b3fdd04cac5b7789aeeed5ed1 Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Wed, 14 Dec 2022 10:47:03 +1300 Subject: [PATCH 3/3] enable by default --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 792090f7261cf..3c8c0d34e8d7c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3636,7 +3636,7 @@ object SQLConf { "when one or more fields do not match the schema") .version("3.4.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK = buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")