huangxiaopingRD · pull · Jan 5, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024
diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
@@ -3588,6 +3588,12 @@
     ],
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_DATA_SOURCE_SAVE_MODE" : {
+    "message" : [
+      "The data source '<source>' cannot be written in the <createMode> mode. Please use either the \"Append\" or \"Overwrite\" mode instead."
+    ],
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE" : {
     "message" : [
       "The <format> datasource doesn't support the column <columnName> of the type <columnType>."
@@ -5403,11 +5409,6 @@
       "There is a 'path' option set and save() is called with a path parameter. Either remove the path option, or call save() without the parameter. To ignore this check, set '<config>' to 'true'."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1308" : {
-    "message" : [
-      "TableProvider implementation <source> cannot be written with <createMode> mode, please use Append or Overwrite modes instead."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1309" : {
     "message" : [
       "insertInto() can't be used together with partitionBy(). Partition columns have already been defined for the table. It is not necessary to use partitionBy()."

diff --git a/...cker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/...cker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
  *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
+ *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -34,7 +34,8 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
- *     ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "docker-integration-tests/testOnly *DB2KrbIntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...tegration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/...tegration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.12
  *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
+ *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...egration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/...egration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1
  *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04
  *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
+ *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...er-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/...er-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.tags.DockerTest
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.0.31
  *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
+ *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala b/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.io.{File, PrintWriter}
+
+import com.github.dockerjava.api.model._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils
+
+class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
+  lazy override val imageName =
+    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.3")
+  val oracle_password = "Th1s1sThe0racle#Pass"
+  override val env = Map(
+    "ORACLE_PWD" -> oracle_password, // oracle images uses this
+    "ORACLE_PASSWORD" -> oracle_password // gvenzl/oracle-free uses this
+  )
+  override val usesIpc = false
+  override val jdbcPort: Int = 1521
+
+  override def getJdbcUrl(ip: String, port: Int): String = {
+    s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/freepdb1"
+  }
+
+  override def beforeContainerStart(
+      hostConfigBuilder: HostConfig,
+      containerConfigBuilder: ContainerConfig): Unit = {
+    try {
+      val dir = Utils.createTempDir()
+      val writer = new PrintWriter(new File(dir, "install.sql"))
+      // SPARK-46592: gvenzl/oracle-free occasionally fails to start with the following error:
+      // 'ORA-04021: timeout occurred while waiting to lock object', when initializing the
+      // SYSTEM user. This is due to the fact that the default DDL_LOCK_TIMEOUT is 0, which
+      // means that the lock will no wait. We set the timeout to 30 seconds to try again.
+      // TODO: This workaround should be removed once the issue is fixed in the image.
+      // https://github.com/gvenzl/oci-oracle-free/issues/35
+      writer.write("ALTER SESSION SET DDL_LOCK_TIMEOUT = 30;\n")
+      writer.write(s"""ALTER USER SYSTEM IDENTIFIED BY "$oracle_password";""")
+      writer.close()
+      val newBind = new Bind(
+        dir.getAbsolutePath,
+        new Volume("/docker-entrypoint-initdb.d"),
+        AccessMode.ro)
+      hostConfigBuilder.withBinds(hostConfigBuilder.getBinds :+ newBind: _*)
+    } catch {
+      case e: Exception =>
+        logWarning("Failed to create install.sql file", e)
+    }
+  }
+}
diff --git a/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -65,19 +65,7 @@ import org.apache.spark.tags.DockerTest
 class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
   import testImplicits._
 
-  override val db = new DatabaseOnDocker {
-    lazy override val imageName =
-      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.3")
-    val oracle_password = "Th1s1sThe0racle#Pass"
-    override val env = Map(
-      "ORACLE_PWD" -> oracle_password,      // oracle images uses this
-      "ORACLE_PASSWORD" -> oracle_password  // gvenzl/oracle-free uses this
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1521
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/freepdb1"
-  }
+  override val db = new OracleDatabaseOnDocker
 
   override val connectionTimeout = timeout(7.minutes)
 

diff --git a/...integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/...integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.tags.DockerTest
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
  *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
+ *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...egration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/...egration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -28,7 +28,8 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., postgres:15.1):
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
- *     ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "docker-integration-tests/testOnly *PostgresKrbIntegrationSuite"
  * }}}
  */
 @DockerTest

diff --git a/...on-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala b/...on-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.jdbc
+
+import java.io.File
+import java.sql.ResultSet
+
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.SQLQueryTestHelper
+import org.apache.spark.sql.catalyst.util.fileToString
+
+/**
+ * This suite builds off of that to allow us to run other DBMS against the SQL test golden files (on
+ * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness.
+ * Note that this is not currently run on all SQL input files by default because there is
+ * incompatibility between SQL dialects for Spark and the other DBMS.
+ *
+ * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
+ * which is eligible for the SQL file. These strings are defined in the companion object. For
+ * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
+ * incompatible, add the following comment into the input file:
+ * --ONLY_IF spark
+ */
+trait CrossDbmsQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper {
+
+  val DATABASE_NAME: String
+
+  protected val baseResourcePath = {
+    // We use a path based on Spark home for 2 reasons:
+    //   1. Maven can't get correct resource directory when resources in other jars.
+    //   2. We test subclasses in the hive-thriftserver module.
+    getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile
+  }
+  protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath
+  protected val customInputFilePath: String
+  protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath
+
+  protected def listTestCases: Seq[TestCase] = {
+    listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
+      val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      val absPath = file.getAbsolutePath
+      val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
+      RegularTestCase(testCaseName, absPath, resultFile) :: Nil
+    }.sortBy(_.name)
+  }
+
+  def createScalaTestCase(testCase: TestCase): Unit = {
+    testCase match {
+      case _: RegularTestCase =>
+        // Create a test case to run this case.
+        test(testCase.name) {
+          runSqlTestCase(testCase, listTestCases)
+        }
+      case _ =>
+        ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
+          log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
+        }
+    }
+  }
+
+  protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+    val (comments, code) = splitCommentsAndCodes(input)
+    val queries = getQueries(code, comments, listTestCases)
+
+    val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
+      .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
+    // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
+    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) {
+      log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
+        s"not eligible with $DATABASE_NAME.")
+    } else {
+      runQueriesAndCheckAgainstGoldenFile(queries, testCase)
+    }
+  }
+
+  protected def runQueriesAndCheckAgainstGoldenFile(
+      queries: Seq[String], testCase: TestCase): Unit = {
+    // The local Spark session is needed because we use Spark analyzed plan to check if the query
+    // result is already semantically sorted, below.
+    val localSparkSession = spark.newSession()
+    val conn = getConnection()
+    val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
+
+    val outputs: Seq[QueryTestOutput] = queries.map { sql =>
+      val output = {
+        try {
+          val sparkDf = localSparkSession.sql(sql)
+          val isResultSet = stmt.execute(sql)
+          val rows = ArrayBuffer[Row]()
+          if (isResultSet) {
+            val rs = stmt.getResultSet
+            val metadata = rs.getMetaData
+            while (rs.next()) {
+              val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
+                val value = rs.getObject(i)
+                if (value == null) {
+                  "NULL"
+                } else {
+                  value
+                }
+              }))
+              rows.append(row)
+            }
+          }
+          val output = rows.map(_.mkString("\t")).toSeq
+          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+            output
+          } else {
+            // Sort the answer manually if it isn't sorted.
+            output.sorted
+          }
+        } catch {
+          case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
+        }
+      }
+
+      ExecutionOutput(
+        sql = sql,
+        // Don't care about the schema for this test. Only care about correctness.
+        schema = None,
+        output = output.mkString("\n"))
+    }
+    conn.close()
+
+    // Read back the golden files.
+    var curSegment = 0
+    val expectedOutputs: Seq[QueryTestOutput] = {
+      val goldenOutput = fileToString(new File(testCase.resultFile))
+      val segments = goldenOutput.split("-- !query.*\n")
+      outputs.map { output =>
+        val result =
+          ExecutionOutput(
+            segments(curSegment + 1).trim, // SQL
+            None, // Schema
+            normalizeTestResults(segments(curSegment + 3))) // Output
+        // Assume that the golden file always has all 3 segments.
+        curSegment += 3
+        result
+      }
+    }
+
+    // Compare results.
+    assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
+      outputs.size
+    }
+
+    outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
+      assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") {
+        output.sql
+      }
+      assertResult(expected.output, s"Result did not match" +
+        s" for query #$i\n${expected.sql}") {
+        output.output
+      }
+    }
+  }
+
+}
+
+object CrossDbmsQueryTestSuite {
+
+  final val POSTGRES = "postgres"
+  // Argument in input files to indicate that the sql file is restricted to certain systems.
+  final val ONLY_IF_ARG = "--ONLY_IF "
+}