Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
733be49
[SPARK-46539][SQL][FOLLOWUP] fix golden files
cloud-fan Jan 4, 2024
f3e454a
[SPARK-45292][SQL][HIVE] Remove Guava from shared classes from Isolat…
pan3793 Jan 4, 2024
5602363
[SPARK-46412][K8S][DOCS] Update Java and JDK info in K8S testing
engrravijain Jan 4, 2024
1cd3a1b
Revert "[SPARK-46582][R][INFRA] Upgrade R Tools version from 4.0.2 to…
HyukjinKwon Jan 4, 2024
59d147a
[SPARK-46504][PS][TESTS][FOLLOWUP] Break the remaining part of `Index…
zhengruifeng Jan 4, 2024
69c4687
[SPARK-46576][SQL] Improve error messages for unsupported data source…
allisonwang-db Jan 4, 2024
b303ece
[SPARK-46530][PYTHON][SQL][FOLLOW-UP] Uses path separator instead of …
HyukjinKwon Jan 4, 2024
c63e064
[SPARK-46587][SQL] XML: Fix XSD big integer conversion
sandip-db Jan 4, 2024
c813796
[SPARK-46585][CORE] Directly constructed `metricPeaks` as an `immutab…
LuciferYang Jan 4, 2024
6a9a06d
[SPARK-46569][SQL] Remove ThreadLocal due to SecureRandom is thread s…
beliefer Jan 4, 2024
ae2e00e
[SPARK-45527][CORE] Use fraction to do the resource calculation
wbo4958 Jan 4, 2024
9ec426d
[SPARK-46592][DOCKER][TEST] OracleIntegrationSuite is flaky because o…
yaooqinn Jan 4, 2024
760de23
[SPARK-46597][SQL][TESTS] Optimize the run tests command in the doc o…
LuciferYang Jan 4, 2024
ddcfe6b
[SPARK-46596][CORE][TESTS] Correct package name of `SslTestUtils`
LuciferYang Jan 4, 2024
3793c2f
[SPARK-46593][PS][TESTS] Refactor `data_type_ops` tests
zhengruifeng Jan 5, 2024
f9ca519
[SPARK-46179][SQL] Add CrossDbmsQueryTestSuites, which runs other DBM…
andylam-db Jan 5, 2024
b71192c
[SPARK-46586][SQL] Support `s.c.immutable.ArraySeq` as `customCollect…
panbingkun Jan 5, 2024
f03f3d0
[SPARK-46560][PYTHON][DOCS] Refine docstring `reverse/map_contains_key`
panbingkun Jan 5, 2024
b96e13d
[SPARK-46595][PYTHON][DOCS] Refine docstring of `map_from_arrays/map_…
LuciferYang Jan 5, 2024
20b6a32
Revert "[SPARK-46593][PS][TESTS] Refactor `data_type_ops` tests"
zhengruifeng Jan 5, 2024
a98c885
[SPARK-46568][PYTHON] Make Python data source options a case-insensit…
allisonwang-db Jan 5, 2024
70b90c8
[SPARK-46592][DOCKER][TESTS][FOLLOWUP] Add newline to ORACLE Docker p…
yaooqinn Jan 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -3588,6 +3588,12 @@
],
"sqlState" : "0A000"
},
"UNSUPPORTED_DATA_SOURCE_SAVE_MODE" : {
"message" : [
"The data source '<source>' cannot be written in the <createMode> mode. Please use either the \"Append\" or \"Overwrite\" mode instead."
],
"sqlState" : "0A000"
},
"UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE" : {
"message" : [
"The <format> datasource doesn't support the column <columnName> of the type <columnType>."
Expand Down Expand Up @@ -5403,11 +5409,6 @@
"There is a 'path' option set and save() is called with a path parameter. Either remove the path option, or call save() without the parameter. To ignore this check, set '<config>' to 'true'."
]
},
"_LEGACY_ERROR_TEMP_1308" : {
"message" : [
"TableProvider implementation <source> cannot be written with <createMode> mode, please use Append or Overwrite modes instead."
]
},
"_LEGACY_ERROR_TEMP_1309" : {
"message" : [
"insertInto() can't be used together with partitionBy(). Partition columns have already been defined for the table. It is not necessary to use partitionBy()."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
* {{{
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
* "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ import org.apache.spark.tags.DockerTest
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
* {{{
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
* ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite"
* ./build/sbt -Pdocker-integration-tests
* "docker-integration-tests/testOnly *DB2KrbIntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest
* {{{
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.12
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
* "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
* ENABLE_DOCKER_INTEGRATION_TESTS=1
* MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
* "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.tags.DockerTest
* {{{
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.0.31
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
* "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.jdbc

import java.io.{File, PrintWriter}

import com.github.dockerjava.api.model._

import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils

class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
lazy override val imageName =
sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.3")
val oracle_password = "Th1s1sThe0racle#Pass"
override val env = Map(
"ORACLE_PWD" -> oracle_password, // oracle images uses this
"ORACLE_PASSWORD" -> oracle_password // gvenzl/oracle-free uses this
)
override val usesIpc = false
override val jdbcPort: Int = 1521

override def getJdbcUrl(ip: String, port: Int): String = {
s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/freepdb1"
}

override def beforeContainerStart(
hostConfigBuilder: HostConfig,
containerConfigBuilder: ContainerConfig): Unit = {
try {
val dir = Utils.createTempDir()
val writer = new PrintWriter(new File(dir, "install.sql"))
// SPARK-46592: gvenzl/oracle-free occasionally fails to start with the following error:
// 'ORA-04021: timeout occurred while waiting to lock object', when initializing the
// SYSTEM user. This is due to the fact that the default DDL_LOCK_TIMEOUT is 0, which
// means that the lock will no wait. We set the timeout to 30 seconds to try again.
// TODO: This workaround should be removed once the issue is fixed in the image.
// https://github.com/gvenzl/oci-oracle-free/issues/35
writer.write("ALTER SESSION SET DDL_LOCK_TIMEOUT = 30;\n")
writer.write(s"""ALTER USER SYSTEM IDENTIFIED BY "$oracle_password";""")
writer.close()
val newBind = new Bind(
dir.getAbsolutePath,
new Volume("/docker-entrypoint-initdb.d"),
AccessMode.ro)
hostConfigBuilder.withBinds(hostConfigBuilder.getBinds :+ newBind: _*)
} catch {
case e: Exception =>
logWarning("Failed to create install.sql file", e)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,7 @@ import org.apache.spark.tags.DockerTest
class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
import testImplicits._

override val db = new DatabaseOnDocker {
lazy override val imageName =
sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.3")
val oracle_password = "Th1s1sThe0racle#Pass"
override val env = Map(
"ORACLE_PWD" -> oracle_password, // oracle images uses this
"ORACLE_PASSWORD" -> oracle_password // gvenzl/oracle-free uses this
)
override val usesIpc = false
override val jdbcPort: Int = 1521
override def getJdbcUrl(ip: String, port: Int): String =
s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/freepdb1"
}
override val db = new OracleDatabaseOnDocker

override val connectionTimeout = timeout(7.minutes)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import org.apache.spark.tags.DockerTest
* {{{
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
* "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import org.apache.spark.tags.DockerTest
* To run this test suite for a specific version (e.g., postgres:15.1):
* {{{
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
* ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
* ./build/sbt -Pdocker-integration-tests
* "docker-integration-tests/testOnly *PostgresKrbIntegrationSuite"
* }}}
*/
@DockerTest
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.jdbc

import java.io.File
import java.sql.ResultSet

import scala.collection.mutable.ArrayBuffer
import scala.util.control.NonFatal

import org.apache.spark.sql.Row
import org.apache.spark.sql.SQLQueryTestHelper
import org.apache.spark.sql.catalyst.util.fileToString

/**
* This suite builds off of that to allow us to run other DBMS against the SQL test golden files (on
* which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness.
* Note that this is not currently run on all SQL input files by default because there is
* incompatibility between SQL dialects for Spark and the other DBMS.
*
* This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
* which is eligible for the SQL file. These strings are defined in the companion object. For
* example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
* incompatible, add the following comment into the input file:
* --ONLY_IF spark
*/
trait CrossDbmsQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper {

val DATABASE_NAME: String

protected val baseResourcePath = {
// We use a path based on Spark home for 2 reasons:
// 1. Maven can't get correct resource directory when resources in other jars.
// 2. We test subclasses in the hive-thriftserver module.
getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile
}
protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath
protected val customInputFilePath: String
protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath

protected def listTestCases: Seq[TestCase] = {
listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
val absPath = file.getAbsolutePath
val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
RegularTestCase(testCaseName, absPath, resultFile) :: Nil
}.sortBy(_.name)
}

def createScalaTestCase(testCase: TestCase): Unit = {
testCase match {
case _: RegularTestCase =>
// Create a test case to run this case.
test(testCase.name) {
runSqlTestCase(testCase, listTestCases)
}
case _ =>
ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
}
}
}

protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
val input = fileToString(new File(testCase.inputFile))
val (comments, code) = splitCommentsAndCodes(input)
val queries = getQueries(code, comments, listTestCases)

val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
.map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
// If `--ONLY_IF` is found, check if the DBMS being used is allowed.
if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) {
log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
s"not eligible with $DATABASE_NAME.")
} else {
runQueriesAndCheckAgainstGoldenFile(queries, testCase)
}
}

protected def runQueriesAndCheckAgainstGoldenFile(
queries: Seq[String], testCase: TestCase): Unit = {
// The local Spark session is needed because we use Spark analyzed plan to check if the query
// result is already semantically sorted, below.
val localSparkSession = spark.newSession()
val conn = getConnection()
val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)

val outputs: Seq[QueryTestOutput] = queries.map { sql =>
val output = {
try {
val sparkDf = localSparkSession.sql(sql)
val isResultSet = stmt.execute(sql)
val rows = ArrayBuffer[Row]()
if (isResultSet) {
val rs = stmt.getResultSet
val metadata = rs.getMetaData
while (rs.next()) {
val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
val value = rs.getObject(i)
if (value == null) {
"NULL"
} else {
value
}
}))
rows.append(row)
}
}
val output = rows.map(_.mkString("\t")).toSeq
if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
output
} else {
// Sort the answer manually if it isn't sorted.
output.sorted
}
} catch {
case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
}
}

ExecutionOutput(
sql = sql,
// Don't care about the schema for this test. Only care about correctness.
schema = None,
output = output.mkString("\n"))
}
conn.close()

// Read back the golden files.
var curSegment = 0
val expectedOutputs: Seq[QueryTestOutput] = {
val goldenOutput = fileToString(new File(testCase.resultFile))
val segments = goldenOutput.split("-- !query.*\n")
outputs.map { output =>
val result =
ExecutionOutput(
segments(curSegment + 1).trim, // SQL
None, // Schema
normalizeTestResults(segments(curSegment + 3))) // Output
// Assume that the golden file always has all 3 segments.
curSegment += 3
result
}
}

// Compare results.
assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
outputs.size
}

outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") {
output.sql
}
assertResult(expected.output, s"Result did not match" +
s" for query #$i\n${expected.sql}") {
output.output
}
}
}

}

object CrossDbmsQueryTestSuite {

final val POSTGRES = "postgres"
// Argument in input files to indicate that the sql file is restricted to certain systems.
final val ONLY_IF_ARG = "--ONLY_IF "
}
Loading