From cb4147c3586a2df0bd803f6afae824c6132895c9 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Thu, 28 Jul 2022 17:26:32 +1200
Subject: [PATCH 1/7] rename the option

---
 .../src/main/resources/error/error-classes.json |  6 ++++++
 docs/sql-data-sources-csv.md                    |  2 +-
 .../spark/sql/catalyst/csv/CSVInferSchema.scala |  6 +++---
 .../spark/sql/catalyst/csv/CSVOptions.scala     | 17 +++++++++++------
 .../sql/catalyst/csv/UnivocityParser.scala      |  4 ++--
 .../spark/sql/errors/QueryExecutionErrors.scala |  6 ++++++
 .../sql/catalyst/csv/CSVInferSchemaSuite.scala  | 10 +++++-----
 .../sql/catalyst/csv/UnivocityParserSuite.scala |  4 ++--
 .../execution/datasources/csv/CSVSuite.scala    | 17 ++++++++++++++---
 9 files changed, 50 insertions(+), 22 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index c4b59799f88d..15084b2a0b50 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -29,6 +29,12 @@
     ],
     "sqlState" : "22007"
   },
+  "CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA" : {
+    "message" : [
+      "Cannot infer date when schema inference is disabled."
+    ],
+    "sqlState" : "22007"
+  },
   "CANNOT_PARSE_DECIMAL" : {
     "message" : [
       "Cannot parse decimal"
diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 7b538528219a..ebdaa6864610 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -109,7 +109,7 @@ Data source options of CSV can be set via:
     <td>read</td>
   </tr>
   <tr>
-    <td><code>inferDate</code></td> 
+    <td><code>preferDate</code></td>
     <td>false</td>
     <td>Whether or not to infer columns that satisfy the <code>dateFormat</code> option as <code>Date</code>. Requires <code>inferSchema</code> to be <code>true</code>. When <code>false</code>, columns with dates will be inferred as <code>String</code> (or as <code>Timestamp</code> if it fits the <code>timestampFormat</code>).</td>
     <td>read</td>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 3132fea8700b..1c3f7fe2b92f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -124,9 +124,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
         case _: DecimalType => tryParseDecimal(field)
         case DoubleType => tryParseDouble(field)
         case DateType => tryParseDateTime(field)
-        case TimestampNTZType if options.inferDate => tryParseDateTime(field)
+        case TimestampNTZType if options.preferDate => tryParseDateTime(field)
         case TimestampNTZType => tryParseTimestampNTZ(field)
-        case TimestampType if options.inferDate => tryParseDateTime(field)
+        case TimestampType if options.preferDate => tryParseDateTime(field)
         case TimestampType => tryParseTimestamp(field)
         case BooleanType => tryParseBoolean(field)
         case StringType => StringType
@@ -178,7 +178,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   private def tryParseDouble(field: String): DataType = {
     if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) {
       DoubleType
-    } else if (options.inferDate) {
+    } else if (options.preferDate) {
       tryParseDateTime(field)
     } else {
       tryParseTimestampNTZ(field)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 27806ea1c403..4a7012a1767f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -153,19 +153,24 @@ class CSVOptions(
    * Disabled by default for backwards compatibility and performance. When enabled, date entries in
    * timestamp columns will be cast to timestamp upon parsing. Not compatible with
    * legacyTimeParserPolicy == LEGACY since legacy date parser will accept extra trailing characters
+   *
+   * The flag is only enabled if inferSchema is set to true.
    */
-  val inferDate = {
-    val inferDateFlag = getBool("inferDate")
-    if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY && inferDateFlag) {
+  val preferDate = {
+    val preferDateFlag = getBool("preferDate")
+    if (preferDateFlag && SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       throw QueryExecutionErrors.inferDateWithLegacyTimeParserError()
     }
-    inferDateFlag
+    if (preferDateFlag && !inferSchemaFlag) {
+      throw QueryExecutionErrors.inferDateWithoutInferSchemaError()
+    }
+    preferDateFlag
   }
 
-  // Provide a default value for dateFormatInRead when inferDate. This ensures that the
+  // Provide a default value for dateFormatInRead when preferDate. This ensures that the
   // Iso8601DateFormatter (with strict date parsing) is used for date inference
   val dateFormatInRead: Option[String] =
-    if (inferDate) {
+    if (preferDate) {
       Option(parameters.getOrElse("dateFormat", DateFormatter.defaultPattern))
     } else {
       parameters.get("dateFormat")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index a6b4d7ea6679..cc44a1b71751 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -235,7 +235,7 @@ class UnivocityParser(
         } catch {
           case NonFatal(e) =>
             // There may be date type entries in timestamp column due to schema inference
-            if (options.inferDate) {
+            if (options.preferDate) {
               daysToMicros(dateFormatter.parse(datum), options.zoneId)
             } else {
               // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
@@ -254,7 +254,7 @@ class UnivocityParser(
         try {
           timestampNTZFormatter.parseWithoutTimeZone(datum, false)
         } catch {
-          case NonFatal(e) if (options.inferDate) =>
+          case NonFatal(e) if options.preferDate =>
             daysToMicros(dateFormatter.parse(datum), TimeZoneUTC.toZoneId)
         }
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 35a40ce684f3..e05679e2ceef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -575,6 +575,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
     )
   }
 
+  def inferDateWithoutInferSchemaError(): Throwable with SparkThrowable = {
+    new SparkIllegalArgumentException(errorClass = "CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA",
+      messageParameters = Array()
+    )
+  }
+
   def streamedOperatorUnsupportedByDataSourceError(
       className: String, operator: String): Throwable = {
     new UnsupportedOperationException(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 8790223a680f..2269c4331bcd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -201,19 +201,19 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
   test("SPARK-39469: inferring date type") {
     // "yyyy/MM/dd" format
-    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "inferDate" -> "true"),
+    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "preferDate" -> "true"),
       false, "UTC")
     var inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
     // "MMM yyyy" format
-    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "inferDate" -> "true"),
+    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "preferDate" -> "true"),
       false, "GMT")
     inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
     // Field should strictly match date format to infer as date
     options = new CSVOptions(
       Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
-        "inferDate" -> "true"),
+        "preferDate" -> "true"),
       columnPruning = false,
       defaultTimeZoneId = "GMT")
     inferSchema = new CSVInferSchema(options)
@@ -221,10 +221,10 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
   }
 
-  test("SPARK-39469: inferring date and timestamp types in a mixed column with inferDate=true") {
+  test("SPARK-39469: inferring date and timestamp types in a mixed column with preferDate=true") {
     var options = new CSVOptions(
       Map("dateFormat" -> "yyyy_MM_dd", "timestampFormat" -> "yyyy|MM|dd",
-        "timestampNTZFormat" -> "yyyy/MM/dd", "inferDate" -> "true"),
+        "timestampNTZFormat" -> "yyyy/MM/dd", "preferDate" -> "true"),
       columnPruning = false,
       defaultTimeZoneId = "UTC")
     var inferSchema = new CSVInferSchema(options)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 381ec57fcd13..7dbe89f9b513 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -373,10 +373,10 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     assert(err.getMessage.contains("Illegal pattern character: n"))
   }
 
-  test("SPARK-39469: dates should be parsed correctly in a timestamp column when inferDate=true") {
+  test("SPARK-39469: dates should be parsed correctly in a timestamp column when preferDate=true") {
     def checkDate(dataType: DataType): Unit = {
       val timestampsOptions =
-        new CSVOptions(Map("inferDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",
+        new CSVOptions(Map("preferDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",
           "timestampNTZFormat" -> "dd-MM-yyyy HH:mm", "dateFormat" -> "dd_MM_yyyy"),
           false, DateTimeUtils.getZoneId("-08:00").toString)
       // Use CSVOption ZoneId="-08:00" (PST) to test that Dates in TimestampNTZ column are always
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 0e5718103902..d95a47ff6563 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2797,13 +2797,13 @@ abstract class CSVSuite
       "inferSchema" -> "true",
       "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
       "dateFormat" -> "yyyy-MM-dd",
-      "inferDate" -> "true")
+      "preferDate" -> "true")
     val options2 = Map(
       "header" -> "true",
       "inferSchema" -> "true",
-      "inferDate" -> "true")
+      "preferDate" -> "true")
 
-    // Error should be thrown when attempting to inferDate with Legacy parser
+    // Error should be thrown when attempting to preferDate with Legacy parser
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       val msg = intercept[IllegalArgumentException] {
         spark.read
@@ -2840,6 +2840,17 @@ abstract class CSVSuite
     }
   }
 
+  test("SPARK-39904: Fail to prefer dates if inferSchema=false") {
+    val msg = intercept[IllegalArgumentException] {
+      spark.read
+        .format("csv")
+        .option("inferSchema", "false")
+        .option("preferDate", "true")
+        .load(testFile(dateInferSchemaFile))
+    }.getMessage
+    assert(msg.contains("CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA"))
+  }
+
   test("SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern") {
     withTempPath { path =>
       Seq(

From a865018227644ce994c3b20721aa5bcf6444de8e Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Mon, 1 Aug 2022 11:42:45 +1200
Subject: [PATCH 2/7] revert rename

---
 core/src/main/resources/error/error-classes.json  |  6 ------
 docs/sql-data-sources-csv.md                      |  2 +-
 .../spark/sql/catalyst/csv/CSVInferSchema.scala   |  6 +++---
 .../spark/sql/catalyst/csv/CSVOptions.scala       | 15 ++++++---------
 .../spark/sql/catalyst/csv/UnivocityParser.scala  |  4 ++--
 .../spark/sql/errors/QueryExecutionErrors.scala   |  6 ------
 .../sql/catalyst/csv/CSVInferSchemaSuite.scala    | 10 +++++-----
 .../sql/catalyst/csv/UnivocityParserSuite.scala   |  4 ++--
 .../sql/execution/datasources/csv/CSVSuite.scala  |  8 ++++----
 9 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 15084b2a0b50..c4b59799f88d 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -29,12 +29,6 @@
     ],
     "sqlState" : "22007"
   },
-  "CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA" : {
-    "message" : [
-      "Cannot infer date when schema inference is disabled."
-    ],
-    "sqlState" : "22007"
-  },
   "CANNOT_PARSE_DECIMAL" : {
     "message" : [
       "Cannot parse decimal"
diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index ebdaa6864610..57c8f67839bd 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -109,7 +109,7 @@ Data source options of CSV can be set via:
     <td>read</td>
   </tr>
   <tr>
-    <td><code>preferDate</code></td>
+    <td><code>inferDate</code></td>
     <td>false</td>
     <td>Whether or not to infer columns that satisfy the <code>dateFormat</code> option as <code>Date</code>. Requires <code>inferSchema</code> to be <code>true</code>. When <code>false</code>, columns with dates will be inferred as <code>String</code> (or as <code>Timestamp</code> if it fits the <code>timestampFormat</code>).</td>
     <td>read</td>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 1c3f7fe2b92f..3132fea8700b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -124,9 +124,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
         case _: DecimalType => tryParseDecimal(field)
         case DoubleType => tryParseDouble(field)
         case DateType => tryParseDateTime(field)
-        case TimestampNTZType if options.preferDate => tryParseDateTime(field)
+        case TimestampNTZType if options.inferDate => tryParseDateTime(field)
         case TimestampNTZType => tryParseTimestampNTZ(field)
-        case TimestampType if options.preferDate => tryParseDateTime(field)
+        case TimestampType if options.inferDate => tryParseDateTime(field)
         case TimestampType => tryParseTimestamp(field)
         case BooleanType => tryParseBoolean(field)
         case StringType => StringType
@@ -178,7 +178,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   private def tryParseDouble(field: String): DataType = {
     if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) {
       DoubleType
-    } else if (options.preferDate) {
+    } else if (options.inferDate) {
       tryParseDateTime(field)
     } else {
       tryParseTimestampNTZ(field)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 4a7012a1767f..dfc3abe823bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -156,21 +156,18 @@ class CSVOptions(
    *
    * The flag is only enabled if inferSchema is set to true.
    */
-  val preferDate = {
-    val preferDateFlag = getBool("preferDate")
-    if (preferDateFlag && SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+  val inferDate = {
+    val inferDateFlag = getBool("inferDate")
+    if (inferDateFlag && SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       throw QueryExecutionErrors.inferDateWithLegacyTimeParserError()
     }
-    if (preferDateFlag && !inferSchemaFlag) {
-      throw QueryExecutionErrors.inferDateWithoutInferSchemaError()
-    }
-    preferDateFlag
+    inferDateFlag
   }
 
-  // Provide a default value for dateFormatInRead when preferDate. This ensures that the
+  // Provide a default value for dateFormatInRead when inferDate. This ensures that the
   // Iso8601DateFormatter (with strict date parsing) is used for date inference
   val dateFormatInRead: Option[String] =
-    if (preferDate) {
+    if (inferDate) {
       Option(parameters.getOrElse("dateFormat", DateFormatter.defaultPattern))
     } else {
       parameters.get("dateFormat")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index cc44a1b71751..aea8cb49e7b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -235,7 +235,7 @@ class UnivocityParser(
         } catch {
           case NonFatal(e) =>
             // There may be date type entries in timestamp column due to schema inference
-            if (options.preferDate) {
+            if (options.inferDate) {
               daysToMicros(dateFormatter.parse(datum), options.zoneId)
             } else {
               // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
@@ -254,7 +254,7 @@ class UnivocityParser(
         try {
           timestampNTZFormatter.parseWithoutTimeZone(datum, false)
         } catch {
-          case NonFatal(e) if options.preferDate =>
+          case NonFatal(e) if options.inferDate =>
             daysToMicros(dateFormatter.parse(datum), TimeZoneUTC.toZoneId)
         }
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index e05679e2ceef..35a40ce684f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -575,12 +575,6 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
     )
   }
 
-  def inferDateWithoutInferSchemaError(): Throwable with SparkThrowable = {
-    new SparkIllegalArgumentException(errorClass = "CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA",
-      messageParameters = Array()
-    )
-  }
-
   def streamedOperatorUnsupportedByDataSourceError(
       className: String, operator: String): Throwable = {
     new UnsupportedOperationException(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 2269c4331bcd..8790223a680f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -201,19 +201,19 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
   test("SPARK-39469: inferring date type") {
     // "yyyy/MM/dd" format
-    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "preferDate" -> "true"),
+    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "inferDate" -> "true"),
       false, "UTC")
     var inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
     // "MMM yyyy" format
-    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "preferDate" -> "true"),
+    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "inferDate" -> "true"),
       false, "GMT")
     inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
     // Field should strictly match date format to infer as date
     options = new CSVOptions(
       Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
-        "preferDate" -> "true"),
+        "inferDate" -> "true"),
       columnPruning = false,
       defaultTimeZoneId = "GMT")
     inferSchema = new CSVInferSchema(options)
@@ -221,10 +221,10 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
   }
 
-  test("SPARK-39469: inferring date and timestamp types in a mixed column with preferDate=true") {
+  test("SPARK-39469: inferring date and timestamp types in a mixed column with inferDate=true") {
     var options = new CSVOptions(
       Map("dateFormat" -> "yyyy_MM_dd", "timestampFormat" -> "yyyy|MM|dd",
-        "timestampNTZFormat" -> "yyyy/MM/dd", "preferDate" -> "true"),
+        "timestampNTZFormat" -> "yyyy/MM/dd", "inferDate" -> "true"),
       columnPruning = false,
       defaultTimeZoneId = "UTC")
     var inferSchema = new CSVInferSchema(options)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 7dbe89f9b513..381ec57fcd13 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -373,10 +373,10 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     assert(err.getMessage.contains("Illegal pattern character: n"))
   }
 
-  test("SPARK-39469: dates should be parsed correctly in a timestamp column when preferDate=true") {
+  test("SPARK-39469: dates should be parsed correctly in a timestamp column when inferDate=true") {
     def checkDate(dataType: DataType): Unit = {
       val timestampsOptions =
-        new CSVOptions(Map("preferDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",
+        new CSVOptions(Map("inferDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",
           "timestampNTZFormat" -> "dd-MM-yyyy HH:mm", "dateFormat" -> "dd_MM_yyyy"),
           false, DateTimeUtils.getZoneId("-08:00").toString)
       // Use CSVOption ZoneId="-08:00" (PST) to test that Dates in TimestampNTZ column are always
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index d95a47ff6563..8341d4f1b339 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2797,13 +2797,13 @@ abstract class CSVSuite
       "inferSchema" -> "true",
       "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
       "dateFormat" -> "yyyy-MM-dd",
-      "preferDate" -> "true")
+      "inferDate" -> "true")
     val options2 = Map(
       "header" -> "true",
       "inferSchema" -> "true",
-      "preferDate" -> "true")
+      "inferDate" -> "true")
 
-    // Error should be thrown when attempting to preferDate with Legacy parser
+    // Error should be thrown when attempting to inferDate with Legacy parser
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       val msg = intercept[IllegalArgumentException] {
         spark.read
@@ -2845,7 +2845,7 @@ abstract class CSVSuite
       spark.read
         .format("csv")
         .option("inferSchema", "false")
-        .option("preferDate", "true")
+        .option("inferDate", "true")
         .load(testFile(dateInferSchemaFile))
     }.getMessage
     assert(msg.contains("CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA"))

From b475d514c75ad67af6aa3945418b9aa29c4d7b8b Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Mon, 1 Aug 2022 12:31:52 +1200
Subject: [PATCH 3/7] update documentation and test

---
 docs/sql-data-sources-csv.md                  |  6 +--
 docs/sql-data-sources-json.md                 |  4 +-
 .../spark/sql/catalyst/csv/CSVOptions.scala   | 14 ++++---
 .../execution/datasources/csv/CSVSuite.scala  | 41 +++++++++++++++----
 4 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 57c8f67839bd..162acb04845e 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -111,7 +111,7 @@ Data source options of CSV can be set via:
   <tr>
     <td><code>inferDate</code></td>
     <td>false</td>
-    <td>Whether or not to infer columns that satisfy the <code>dateFormat</code> option as <code>Date</code>. Requires <code>inferSchema</code> to be <code>true</code>. When <code>false</code>, columns with dates will be inferred as <code>String</code> (or as <code>Timestamp</code> if it fits the <code>timestampFormat</code>).</td>
+    <td>Attempts to infer string columns that contain dates or timestamps as <code>Date</code> if the values satisfy <code>dateFormat</code> option and failed to be parsed by the respective formatter during schema inference (<code>inferSchema</code>). When used in conjunction with a user-provided schema, attempts parse timestamp columns as dates using <code>dateFormat</code> if they fail to conform to <code>timestampFormat</code>, the parsed values will be cast to timestamp type afterwards.</td>
     <td>read</td>
   </tr>
   <tr>
@@ -176,8 +176,8 @@ Data source options of CSV can be set via:
   </tr>
   <tr>
     <td><code>enableDateTimeParsingFallback</code></td>
-    <td>Enabled if the time parser policy is legacy or no custom date or timestamp pattern was provided</td>
-    <td>Allows to fall back to the backward compatible (Spark 1.x and 2.0) behavior of parsing dates and timestamps if values do not match the set patterns.</td>
+    <td>Enabled if the time parser policy has legacy settings or if no custom date or timestamp pattern was provided.</td>
+    <td>Allows falling back to the backward compatible (Spark 1.x and 2.0) behavior of parsing dates and timestamps if values do not match the set patterns.</td>
     <td>read</td>
   </tr>
   <tr>
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index 500cd65b58b8..a0772dd3656f 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -204,8 +204,8 @@ Data source options of JSON can be set via:
   </tr>
   <tr>
     <td><code>enableDateTimeParsingFallback</code></td>
-    <td>Enabled if the time parser policy is legacy or no custom date or timestamp pattern was provided</td>
-    <td>Allows to fall back to the backward compatible (Spark 1.x and 2.0) behavior of parsing dates and timestamps if values do not match the set patterns.</td>
+    <td>Enabled if the time parser policy has legacy settings or if no custom date or timestamp pattern was provided.</td>
+    <td>Allows falling back to the backward compatible (Spark 1.x and 2.0) behavior of parsing dates and timestamps if values do not match the set patterns.</td>
     <td>read</td>
   </tr>
   <tr>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index dfc3abe823bb..e34d730f1747 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -149,12 +149,16 @@ class CSVOptions(
   val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
 
   /**
-   * Infer columns with all valid date entries as date type (otherwise inferred as timestamp type).
-   * Disabled by default for backwards compatibility and performance. When enabled, date entries in
-   * timestamp columns will be cast to timestamp upon parsing. Not compatible with
-   * legacyTimeParserPolicy == LEGACY since legacy date parser will accept extra trailing characters
+   * Infer columns with all valid date entries as date type (otherwise inferred as timestamp type)
+   * if schema inference is enabled. When being used with user-provided schema, tries to parse
+   * timestamp values as dates if the values do not conform to the timestamp formatter before
+   * falling back to the backward compatible parsing - the parsed values will be cast to timestamp
+   * afterwards.
    *
-   * The flag is only enabled if inferSchema is set to true.
+   * Disabled by default for backwards compatibility and performance.
+   *
+   * Not compatible with legacyTimeParserPolicy == LEGACY since legacy date parser will accept
+   * extra trailing characters.
    */
   val inferDate = {
     val inferDateFlag = getBool("inferDate")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 8341d4f1b339..934fa02e748d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2840,15 +2840,40 @@ abstract class CSVSuite
     }
   }
 
-  test("SPARK-39904: Fail to prefer dates if inferSchema=false") {
-    val msg = intercept[IllegalArgumentException] {
-      spark.read
-        .format("csv")
-        .option("inferSchema", "false")
+  test("SPARK-39904: Parse incorrect timestamp values with inferDate=true") {
+    withTempPath { path =>
+      Seq(
+        "2020-02-01 12:34:56",
+        "2020-02-02",
+        "invalid"
+      ).toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      val schema = new StructType()
+        .add("ts", TimestampType)
+
+      val output = spark.read
+        .schema(schema)
         .option("inferDate", "true")
-        .load(testFile(dateInferSchemaFile))
-    }.getMessage
-    assert(msg.contains("CANNOT_INFER_DATE_WITHOUT_INFER_SCHEMA"))
+        .csv(path.getAbsolutePath)
+
+      if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+        val msg = intercept[IllegalArgumentException] {
+          output.collect()
+        }.getMessage
+        assert(msg.contains("CANNOT_INFER_DATE"))
+      } else {
+        checkAnswer(
+          output,
+          Seq(
+            Row(Timestamp.valueOf("2020-02-01 12:34:56")),
+            Row(Timestamp.valueOf("2020-02-02 00:00:00")),
+            Row(null)
+          )
+        )
+      }
+    }
   }
 
   test("SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern") {

From 6e75a735c1d2708fe25db0d347ce766193d0d8f8 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Mon, 1 Aug 2022 14:12:44 +1200
Subject: [PATCH 4/7] rename inferDate to prefersDate

---
 docs/sql-data-sources-csv.md                           |  2 +-
 .../apache/spark/sql/catalyst/csv/CSVInferSchema.scala |  6 +++---
 .../org/apache/spark/sql/catalyst/csv/CSVOptions.scala |  8 ++++----
 .../spark/sql/catalyst/csv/UnivocityParser.scala       |  4 ++--
 .../spark/sql/catalyst/csv/CSVInferSchemaSuite.scala   | 10 +++++-----
 .../spark/sql/catalyst/csv/UnivocityParserSuite.scala  |  4 ++--
 .../spark/sql/execution/datasources/csv/CSVSuite.scala | 10 +++++-----
 7 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 162acb04845e..ebb6d0d1c01b 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -109,7 +109,7 @@ Data source options of CSV can be set via:
     <td>read</td>
   </tr>
   <tr>
-    <td><code>inferDate</code></td>
+    <td><code>prefersDate</code></td>
     <td>false</td>
     <td>Attempts to infer string columns that contain dates or timestamps as <code>Date</code> if the values satisfy <code>dateFormat</code> option and failed to be parsed by the respective formatter during schema inference (<code>inferSchema</code>). When used in conjunction with a user-provided schema, attempts parse timestamp columns as dates using <code>dateFormat</code> if they fail to conform to <code>timestampFormat</code>, the parsed values will be cast to timestamp type afterwards.</td>
     <td>read</td>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 3132fea8700b..53d748989204 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -124,9 +124,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
         case _: DecimalType => tryParseDecimal(field)
         case DoubleType => tryParseDouble(field)
         case DateType => tryParseDateTime(field)
-        case TimestampNTZType if options.inferDate => tryParseDateTime(field)
+        case TimestampNTZType if options.prefersDate => tryParseDateTime(field)
         case TimestampNTZType => tryParseTimestampNTZ(field)
-        case TimestampType if options.inferDate => tryParseDateTime(field)
+        case TimestampType if options.prefersDate => tryParseDateTime(field)
         case TimestampType => tryParseTimestamp(field)
         case BooleanType => tryParseBoolean(field)
         case StringType => StringType
@@ -178,7 +178,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   private def tryParseDouble(field: String): DataType = {
     if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) {
       DoubleType
-    } else if (options.inferDate) {
+    } else if (options.prefersDate) {
       tryParseDateTime(field)
     } else {
       tryParseTimestampNTZ(field)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index e34d730f1747..1162c2882dd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -160,18 +160,18 @@ class CSVOptions(
    * Not compatible with legacyTimeParserPolicy == LEGACY since legacy date parser will accept
    * extra trailing characters.
    */
-  val inferDate = {
-    val inferDateFlag = getBool("inferDate")
+  val prefersDate = {
+    val inferDateFlag = getBool("prefersDate")
     if (inferDateFlag && SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       throw QueryExecutionErrors.inferDateWithLegacyTimeParserError()
     }
     inferDateFlag
   }
 
-  // Provide a default value for dateFormatInRead when inferDate. This ensures that the
+  // Provide a default value for dateFormatInRead when prefersDate. This ensures that the
   // Iso8601DateFormatter (with strict date parsing) is used for date inference
   val dateFormatInRead: Option[String] =
-    if (inferDate) {
+    if (prefersDate) {
       Option(parameters.getOrElse("dateFormat", DateFormatter.defaultPattern))
     } else {
       parameters.get("dateFormat")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index aea8cb49e7b7..c9955d72524c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -235,7 +235,7 @@ class UnivocityParser(
         } catch {
           case NonFatal(e) =>
             // There may be date type entries in timestamp column due to schema inference
-            if (options.inferDate) {
+            if (options.prefersDate) {
               daysToMicros(dateFormatter.parse(datum), options.zoneId)
             } else {
               // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
@@ -254,7 +254,7 @@ class UnivocityParser(
         try {
           timestampNTZFormatter.parseWithoutTimeZone(datum, false)
         } catch {
-          case NonFatal(e) if options.inferDate =>
+          case NonFatal(e) if options.prefersDate =>
             daysToMicros(dateFormatter.parse(datum), TimeZoneUTC.toZoneId)
         }
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 8790223a680f..7066a5614ee9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -201,19 +201,19 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
   test("SPARK-39469: inferring date type") {
     // "yyyy/MM/dd" format
-    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "inferDate" -> "true"),
+    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "prefersDate" -> "true"),
       false, "UTC")
     var inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
     // "MMM yyyy" format
-    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "inferDate" -> "true"),
+    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "prefersDate" -> "true"),
       false, "GMT")
     inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
     // Field should strictly match date format to infer as date
     options = new CSVOptions(
       Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
-        "inferDate" -> "true"),
+        "prefersDate" -> "true"),
       columnPruning = false,
       defaultTimeZoneId = "GMT")
     inferSchema = new CSVInferSchema(options)
@@ -221,10 +221,10 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
   }
 
-  test("SPARK-39469: inferring date and timestamp types in a mixed column with inferDate=true") {
+  test("SPARK-39469: inferring date and timestamp types in a mixed column with prefersDate=true") {
     var options = new CSVOptions(
       Map("dateFormat" -> "yyyy_MM_dd", "timestampFormat" -> "yyyy|MM|dd",
-        "timestampNTZFormat" -> "yyyy/MM/dd", "inferDate" -> "true"),
+        "timestampNTZFormat" -> "yyyy/MM/dd", "prefersDate" -> "true"),
       columnPruning = false,
       defaultTimeZoneId = "UTC")
     var inferSchema = new CSVInferSchema(options)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 381ec57fcd13..fdb884c6cf22 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -373,10 +373,10 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     assert(err.getMessage.contains("Illegal pattern character: n"))
   }
 
-  test("SPARK-39469: dates should be parsed correctly in a timestamp column when inferDate=true") {
+  test("SPARK-39469: dates should be parsed correctly in a timestamp column when prefersDate=true") {
     def checkDate(dataType: DataType): Unit = {
       val timestampsOptions =
-        new CSVOptions(Map("inferDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",
+        new CSVOptions(Map("prefersDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",
           "timestampNTZFormat" -> "dd-MM-yyyy HH:mm", "dateFormat" -> "dd_MM_yyyy"),
           false, DateTimeUtils.getZoneId("-08:00").toString)
       // Use CSVOption ZoneId="-08:00" (PST) to test that Dates in TimestampNTZ column are always
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 934fa02e748d..0068f57a7697 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2797,13 +2797,13 @@ abstract class CSVSuite
       "inferSchema" -> "true",
       "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
       "dateFormat" -> "yyyy-MM-dd",
-      "inferDate" -> "true")
+      "prefersDate" -> "true")
     val options2 = Map(
       "header" -> "true",
       "inferSchema" -> "true",
-      "inferDate" -> "true")
+      "prefersDate" -> "true")
 
-    // Error should be thrown when attempting to inferDate with Legacy parser
+    // Error should be thrown when attempting to prefersDate with Legacy parser
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       val msg = intercept[IllegalArgumentException] {
         spark.read
@@ -2840,7 +2840,7 @@ abstract class CSVSuite
     }
   }
 
-  test("SPARK-39904: Parse incorrect timestamp values with inferDate=true") {
+  test("SPARK-39904: Parse incorrect timestamp values with prefersDate=true") {
     withTempPath { path =>
       Seq(
         "2020-02-01 12:34:56",
@@ -2855,7 +2855,7 @@ abstract class CSVSuite
 
       val output = spark.read
         .schema(schema)
-        .option("inferDate", "true")
+        .option("prefersDate", "true")
         .csv(path.getAbsolutePath)
 
       if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {

From 71a095f4678a4a7ec48ff82ab206833f06f6b972 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Mon, 1 Aug 2022 17:29:04 +1200
Subject: [PATCH 5/7] fix scalastyle

---
 .../apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index fdb884c6cf22..42bc122dfdcb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -373,7 +373,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     assert(err.getMessage.contains("Illegal pattern character: n"))
   }
 
-  test("SPARK-39469: dates should be parsed correctly in a timestamp column when prefersDate=true") {
+  test("SPARK-39469: dates should be parsed correctly in timestamp column when prefersDate=true") {
     def checkDate(dataType: DataType): Unit = {
       val timestampsOptions =
         new CSVOptions(Map("prefersDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm",

From 5b01e20c8531d92231419af8fe1f537c82250ffe Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Mon, 1 Aug 2022 17:38:42 +1200
Subject: [PATCH 6/7] update documentation

---
 docs/sql-data-sources-csv.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index ebb6d0d1c01b..0552f4393998 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -111,7 +111,7 @@ Data source options of CSV can be set via:
   <tr>
     <td><code>prefersDate</code></td>
     <td>false</td>
-    <td>Attempts to infer string columns that contain dates or timestamps as <code>Date</code> if the values satisfy <code>dateFormat</code> option and failed to be parsed by the respective formatter during schema inference (<code>inferSchema</code>). When used in conjunction with a user-provided schema, attempts parse timestamp columns as dates using <code>dateFormat</code> if they fail to conform to <code>timestampFormat</code>, the parsed values will be cast to timestamp type afterwards.</td>
+    <td>Attempts to infer string columns as <code>Date</code> if the values satisfy <code>dateFormat</code> option and failed to be parsed by the respective formatter during schema inference (<code>inferSchema</code>). When used in conjunction with a user-provided schema, attempts to parse timestamp columns as dates using <code>dateFormat</code> if they fail to conform to <code>timestampFormat</code>, the parsed values will be cast to timestamp type afterwards.</td>
     <td>read</td>
   </tr>
   <tr>

From ef445be15b9f71c5e0c51e2e2d8f70e4b6b537af Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Wed, 3 Aug 2022 11:13:00 +1200
Subject: [PATCH 7/7] update documentation

---
 docs/sql-data-sources-csv.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 0552f4393998..98d31a59ac7a 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -111,7 +111,7 @@ Data source options of CSV can be set via:
   <tr>
     <td><code>prefersDate</code></td>
     <td>false</td>
-    <td>Attempts to infer string columns as <code>Date</code> if the values satisfy <code>dateFormat</code> option and failed to be parsed by the respective formatter during schema inference (<code>inferSchema</code>). When used in conjunction with a user-provided schema, attempts to parse timestamp columns as dates using <code>dateFormat</code> if they fail to conform to <code>timestampFormat</code>, the parsed values will be cast to timestamp type afterwards.</td>
+    <td>During schema inference (<code>inferSchema</code>), attempts to infer string columns that contain dates or timestamps as <code>Date</code> if the values satisfy the <code>dateFormat</code> option and failed to be parsed by the respective formatter. With a user-provided schema, attempts to parse timestamp columns as dates using <code>dateFormat</code> if they fail to conform to <code>timestampFormat</code>, in this case the parsed values will be cast to timestamp type afterwards.</td>
     <td>read</td>
   </tr>
   <tr>