diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index b27d6ed0efed8..83c76c2d4e2bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -132,12 +132,9 @@ object TypeCoercion { case (NullType, StringType) => Some(StringType) // Cast to TimestampType when we compare DateType with TimestampType - // if conf.compareDateTimestampInTimestamp is true // i.e. TimeStamp('2017-03-01 00:00:00') eq Date('2017-03-01') = true - case (TimestampType, DateType) - => if (conf.compareDateTimestampInTimestamp) Some(TimestampType) else Some(StringType) - case (DateType, TimestampType) - => if (conf.compareDateTimestampInTimestamp) Some(TimestampType) else Some(StringType) + case (TimestampType, DateType) => Some(TimestampType) + case (DateType, TimestampType) => Some(TimestampType) // There is no proper decimal type we can pick, // using double type is the best we can do. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index be8526454f9f1..e72352714a697 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -327,8 +327,7 @@ class SessionCatalog( def validateTableLocation(table: CatalogTable): Unit = { // SPARK-19724: the default location of a managed table should be non-existent or empty. - if (table.tableType == CatalogTableType.MANAGED && - !conf.allowCreatingManagedTableUsingNonemptyLocation) { + if (table.tableType == CatalogTableType.MANAGED) { val tableLocation = new Path(table.storage.locationUri.getOrElse(defaultTablePath(table.identifier))) val fs = tableLocation.getFileSystem(hadoopConf) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index a13a6836c6be6..de7e1160185dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -515,12 +515,10 @@ case class JsonToStructs( timeZoneId: Option[String] = None) extends UnaryExpression with TimeZoneAwareExpression with CodegenFallback with ExpectsInputTypes { - val forceNullableSchema = SQLConf.get.getConf(SQLConf.FROM_JSON_FORCE_NULLABLE_SCHEMA) - // The JSON input data might be missing certain fields. We force the nullability // of the user-provided schema to avoid data corruptions. In particular, the parquet-mr encoder // can generate incorrect files if values are missing in columns declared as non-nullable. - val nullableSchema = if (forceNullableSchema) schema.asNullable else schema + val nullableSchema = schema.asNullable override def nullable: Boolean = true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 5f3a984cc8d5d..8b714d6dcdda3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -720,14 +720,6 @@ object SQLConf { .stringConf .createWithDefault("_corrupt_record") - val FROM_JSON_FORCE_NULLABLE_SCHEMA = buildConf("spark.sql.fromJsonForceNullableSchema") - .internal() - .doc("When true, force the output schema of the from_json() function to be nullable " + - "(including all the fields). Otherwise, the schema might not be compatible with" + - "actual data, which leads to corruptions. This config will be removed in Spark 3.0.") - .booleanConf - .createWithDefault(true) - val BROADCAST_TIMEOUT = buildConf("spark.sql.broadcastTimeout") .doc("Timeout in seconds for the broadcast wait time in broadcast joins.") .timeConf(TimeUnit.SECONDS) @@ -1687,14 +1679,6 @@ object SQLConf { "the SQL parser.") .fallbackConf(ANSI_ENABLED) - val ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION = - buildConf("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation") - .internal() - .doc("When this option is set to true, creating managed tables with nonempty location " + - "is allowed. Otherwise, an analysis exception is thrown. ") - .booleanConf - .createWithDefault(false) - val VALIDATE_PARTITION_COLUMNS = buildConf("spark.sql.sources.validatePartitionColumns") .internal() @@ -1913,16 +1897,6 @@ object SQLConf { .checkValues((1 to 9).toSet + Deflater.DEFAULT_COMPRESSION) .createWithDefault(Deflater.DEFAULT_COMPRESSION) - val COMPARE_DATE_TIMESTAMP_IN_TIMESTAMP = - buildConf("spark.sql.legacy.compareDateTimestampInTimestamp") - .internal() - .doc("When true (default), compare Date with Timestamp after converting both sides to " + - "Timestamp. This behavior is compatible with Hive 2.2 or later. See HIVE-15236. " + - "When false, restore the behavior prior to Spark 2.4. Compare Date with Timestamp after " + - "converting both sides to string. This config will be removed in Spark 3.0.") - .booleanConf - .createWithDefault(true) - val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull") .doc("If it is set to true, size of null returns -1. This behavior was inherited from Hive. " + "The size function returns null for null input if the flag is disabled.") @@ -2236,8 +2210,6 @@ class SQLConf extends Serializable with Logging { def caseSensitiveInferenceMode: HiveCaseSensitiveInferenceMode.Value = HiveCaseSensitiveInferenceMode.withName(getConf(HIVE_CASE_SENSITIVE_INFERENCE)) - def compareDateTimestampInTimestamp : Boolean = getConf(COMPARE_DATE_TIMESTAMP_IN_TIMESTAMP) - def gatherFastStats: Boolean = getConf(GATHER_FASTSTAT) def optimizerMetadataOnly: Boolean = getConf(OPTIMIZER_METADATA_ONLY) @@ -2516,9 +2488,6 @@ class SQLConf extends Serializable with Logging { def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING) - def allowCreatingManagedTableUsingNonemptyLocation: Boolean = - getConf(ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION) - def validatePartitionColumns: Boolean = getConf(VALIDATE_PARTITION_COLUMNS) def partitionOverwriteMode: PartitionOverwriteMode.Value = diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index c7371a7911df5..567cf5ec8ebe6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1526,26 +1526,15 @@ class TypeCoercionSuite extends AnalysisTest { GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))), GreaterThan(Cast(Literal("1.5"), DoubleType), Cast(Literal(BigDecimal("0.5")), DoubleType))) - Seq(true, false).foreach { convertToTS => - withSQLConf( - SQLConf.COMPARE_DATE_TIMESTAMP_IN_TIMESTAMP.key -> convertToTS.toString) { - val date0301 = Literal(java.sql.Date.valueOf("2017-03-01")) - val timestamp0301000000 = Literal(Timestamp.valueOf("2017-03-01 00:00:00")) - val timestamp0301000001 = Literal(Timestamp.valueOf("2017-03-01 00:00:01")) - if (convertToTS) { - // `Date` should be treated as timestamp at 00:00:00 See SPARK-23549 - ruleTest(rule, EqualTo(date0301, timestamp0301000000), - EqualTo(Cast(date0301, TimestampType), timestamp0301000000)) - ruleTest(rule, LessThan(date0301, timestamp0301000001), - LessThan(Cast(date0301, TimestampType), timestamp0301000001)) - } else { - ruleTest(rule, LessThan(date0301, timestamp0301000000), - LessThan(Cast(date0301, StringType), Cast(timestamp0301000000, StringType))) - ruleTest(rule, LessThan(date0301, timestamp0301000001), - LessThan(Cast(date0301, StringType), Cast(timestamp0301000001, StringType))) - } - } - } + // Checks that dates/timestamps are not promoted to strings + val date0301 = Literal(java.sql.Date.valueOf("2017-03-01")) + val timestamp0301000000 = Literal(Timestamp.valueOf("2017-03-01 00:00:00")) + val timestamp0301000001 = Literal(Timestamp.valueOf("2017-03-01 00:00:01")) + // `Date` should be treated as timestamp at 00:00:00 See SPARK-23549 + ruleTest(rule, EqualTo(date0301, timestamp0301000000), + EqualTo(Cast(date0301, TimestampType), timestamp0301000000)) + ruleTest(rule, LessThan(date0301, timestamp0301000001), + LessThan(Cast(date0301, TimestampType), timestamp0301000001)) } test("cast WindowFrame boundaries to the type they operate upon") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index f8400a590606a..d5cc1d4f0fdde 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -702,26 +702,22 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with } test("from_json missing fields") { - for (forceJsonNullableSchema <- Seq(false, true)) { - withSQLConf(SQLConf.FROM_JSON_FORCE_NULLABLE_SCHEMA.key -> forceJsonNullableSchema.toString) { - val input = - """{ - | "a": 1, - | "c": "foo" - |} - |""".stripMargin - val jsonSchema = new StructType() - .add("a", LongType, nullable = false) - .add("b", StringType, nullable = !forceJsonNullableSchema) - .add("c", StringType, nullable = false) - val output = InternalRow(1L, null, UTF8String.fromString("foo")) - val expr = JsonToStructs(jsonSchema, Map.empty, Literal.create(input, StringType), gmtId) - checkEvaluation(expr, output) - val schema = expr.dataType - val schemaToCompare = if (forceJsonNullableSchema) jsonSchema.asNullable else jsonSchema - assert(schemaToCompare == schema) - } - } + val input = + """{ + | "a": 1, + | "c": "foo" + |} + |""".stripMargin + val jsonSchema = new StructType() + .add("a", LongType, nullable = false) + .add("b", StringType, nullable = false) + .add("c", StringType, nullable = false) + val output = InternalRow(1L, null, UTF8String.fromString("foo")) + val expr = JsonToStructs(jsonSchema, Map.empty, Literal.create(input, StringType), gmtId) + checkEvaluation(expr, output) + val schema = expr.dataType + val schemaToCompare = jsonSchema.asNullable + assert(schemaToCompare == schema) } test("SPARK-24709: infer schema of json strings") {