Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/sql-data-sources-hive-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used
<td><code>2.3.10</code></td>
<td>
Version of the Hive metastore. Available
options are <code>2.0.0</code> through <code>2.3.10</code> and <code>3.0.0</code> through <code>3.1.3</code>.
options are <code>2.0.0</code> through <code>2.3.10</code>, <code>3.0.0</code> through <code>3.1.3</code>, and <code>4.0.0</code> through <code>4.0.1</code>.
</td>
<td>1.4.0</td>
</tr>
Expand Down
8 changes: 8 additions & 0 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,14 @@ object Hive {
// Hive tests need higher metaspace size
(Test / javaOptions) := (Test / javaOptions).value.filterNot(_.contains("MaxMetaspaceSize")),
(Test / javaOptions) += "-XX:MaxMetaspaceSize=2g",
// SPARK-45265: HivePartitionFilteringSuite addPartitions related tests generate supper long
// direct sql against derby server, which may cause stack overflow error when derby do sql
// parsing.
// We need to increase the Xss for the test. Meanwhile, QueryParsingErrorsSuite requires a
// smaller size of Xss to mock a FAILED_TO_PARSE_TOO_COMPLEX error, so we need to set for
// hive moudle specifically.
(Test / javaOptions) := (Test / javaOptions).value.filterNot(_.contains("Xss")),
(Test / javaOptions) += "-Xss64m",
// Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
// only for this subproject.
scalacOptions := (scalacOptions map { currentOpts: Seq[String] =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,27 +458,28 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
if (sessionState.getIsVerbose) {
out.println(cmd)
}
val rc = driver.run(cmd)
val endTimeNs = System.nanoTime()
val timeTaken: Double = TimeUnit.NANOSECONDS.toMillis(endTimeNs - startTimeNs) / 1000.0

ret = rc.getResponseCode
if (ret != 0) {
val format = SparkSQLEnv.sparkSession.sessionState.conf.errorMessageFormat
val e = rc.getException
val msg = e match {
case st: SparkThrowable with Throwable => SparkThrowableHelper.getMessage(st, format)
case _ => e.getMessage
}
err.println(msg)
if (format == ErrorMessageFormat.PRETTY &&
try {
driver.run(cmd)
} catch {
case t: Throwable =>
ret = 1
val format = SparkSQLEnv.sparkSession.sessionState.conf.errorMessageFormat
val msg = t match {
case st: SparkThrowable with Throwable =>
SparkThrowableHelper.getMessage(st, format)
case _ => t.getMessage
}
err.println(msg)
if (format == ErrorMessageFormat.PRETTY &&
!sessionState.getIsSilent &&
(!e.isInstanceOf[AnalysisException] || e.getCause != null)) {
e.printStackTrace(err)
}
driver.close()
return ret
(!t.isInstanceOf[AnalysisException] || t.getCause != null)) {
t.printStackTrace(err)
}
driver.close()
return ret
}
val endTimeNs = System.nanoTime()
val timeTaken: Double = TimeUnit.NANOSECONDS.toMillis(endTimeNs - startTimeNs) / 1000.0

val res = new JArrayList[String]()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.internal.LogKeys.COMMAND
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.plans.logical.CommandResult
import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
import org.apache.spark.sql.execution.{QueryExecution, QueryExecutionException, SQLExecution}
import org.apache.spark.sql.execution.HiveResult.hiveResultString
import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution}

Expand Down Expand Up @@ -82,10 +82,10 @@ private[hive] class SparkSQLDriver(val sparkSession: SparkSession = SparkSQLEnv.
} catch {
case st: SparkThrowable =>
logDebug(s"Failed in [$command]", st)
new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(st), st.getSqlState, st)
throw st
case cause: Throwable =>
logError(log"Failed in [${MDC(COMMAND, command)}]", cause)
new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause)
throw new QueryExecutionException(ExceptionUtils.getStackTrace(cause))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
}
val metaStoreParts = partsWithLocation
.map(p => p.copy(spec = toMetaStorePartitionSpec(p.spec)))
client.createPartitions(db, table, metaStoreParts, ignoreIfExists)
client.createPartitions(tableMeta, metaStoreParts, ignoreIfExists)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

}

override def dropPartitions(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ private[spark] object HiveUtils extends Logging {

val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
.doc("Version of the Hive metastore. Available options are " +
"<code>2.0.0</code> through <code>2.3.10</code> and " +
"<code>3.0.0</code> through <code>3.1.3</code>.")
"<code>2.0.0</code> through <code>2.3.10</code>, " +
"<code>3.0.0</code> through <code>3.1.3</code> and " +
"<code>4.0.0</code> through <code>4.0.1</code>.")
.version("1.4.0")
.stringConf
.checkValue(isCompatibleHiveVersion, "Unsupported Hive Metastore version")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,7 @@ private[hive] trait HiveClient {
* Create one or many partitions in the given table.
*/
def createPartitions(
db: String,
table: String,
table: CatalogTable,
parts: Seq[CatalogTablePartition],
ignoreIfExists: Boolean): Unit

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.hive.client

import java.io.PrintStream
import java.io.{OutputStream, PrintStream}
import java.lang.{Iterable => JIterable}
import java.lang.reflect.InvocationTargetException
import java.nio.charset.StandardCharsets.UTF_8
Expand All @@ -28,6 +28,7 @@ import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters._

import org.apache.commons.lang3.exception.ExceptionUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.common.StatsSetupConst
Expand All @@ -44,7 +45,7 @@ import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.hadoop.security.UserGroupInformation

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.{SparkConf, SparkException, SparkThrowable}
import org.apache.spark.deploy.SparkHadoopUtil.SOURCE_SPARK
import org.apache.spark.internal.{Logging, LogKeys, MDC}
import org.apache.spark.internal.LogKeys._
Expand Down Expand Up @@ -121,6 +122,7 @@ private[hive] class HiveClientImpl(
case hive.v2_3 => new Shim_v2_3()
case hive.v3_0 => new Shim_v3_0()
case hive.v3_1 => new Shim_v3_1()
case hive.v4_0 => new Shim_v4_0()
}

// Create an internal session state for this HiveClientImpl.
Expand Down Expand Up @@ -177,8 +179,10 @@ private[hive] class HiveClientImpl(
// got changed. We reset it to clientLoader.ClassLoader here.
state.getConf.setClassLoader(clientLoader.classLoader)
shim.setCurrentSessionState(state)
state.out = new PrintStream(outputBuffer, true, UTF_8.name())
state.err = new PrintStream(outputBuffer, true, UTF_8.name())
val clz = state.getClass.getField("out").getType.asInstanceOf[Class[_ <: PrintStream]]
val ctor = clz.getConstructor(classOf[OutputStream], classOf[Boolean], classOf[String])
state.getClass.getField("out").set(state, ctor.newInstance(outputBuffer, true, UTF_8.name()))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

build fail : ”the result type of an implicit conversion must be more specific than Object“

state.getClass.getField("err").set(state, ctor.newInstance(outputBuffer, true, UTF_8.name()))
state
}

Expand Down Expand Up @@ -307,15 +311,27 @@ private[hive] class HiveClientImpl(
}

def setOut(stream: PrintStream): Unit = withHiveState {
state.out = stream
val ctor = state.getClass.getField("out")
.getType
.asInstanceOf[Class[_ <: PrintStream]]
.getConstructor(classOf[OutputStream])
state.getClass.getField("out").set(state, ctor.newInstance(stream))
}

def setInfo(stream: PrintStream): Unit = withHiveState {
state.info = stream
val ctor = state.getClass.getField("info")
.getType
.asInstanceOf[Class[_ <: PrintStream]]
.getConstructor(classOf[OutputStream])
state.getClass.getField("info").set(state, ctor.newInstance(stream))
}

def setError(stream: PrintStream): Unit = withHiveState {
state.err = stream
val ctor = state.getClass.getField("err")
.getType
.asInstanceOf[Class[_ <: PrintStream]]
.getConstructor(classOf[OutputStream])
state.getClass.getField("err").set(state, ctor.newInstance(stream))
}

private def setCurrentDatabaseRaw(db: String): Unit = {
Expand Down Expand Up @@ -629,21 +645,22 @@ private[hive] class HiveClientImpl(
}

override def createPartitions(
db: String,
table: String,
table: CatalogTable,
parts: Seq[CatalogTablePartition],
ignoreIfExists: Boolean): Unit = withHiveState {
def replaceExistException(e: Throwable): Unit = e match {
case _: HiveException if e.getCause.isInstanceOf[AlreadyExistsException] =>
val hiveTable = client.getTable(db, table)
val db = table.identifier.database.getOrElse(state.getCurrentDatabase)
val tableName = table.identifier.table
val hiveTable = client.getTable(db, tableName)
val existingParts = parts.filter { p =>
shim.getPartitions(client, hiveTable, p.spec.asJava).nonEmpty
}
throw new PartitionsAlreadyExistException(db, table, existingParts.map(_.spec))
throw new PartitionsAlreadyExistException(db, tableName, existingParts.map(_.spec))
case _ => throw e
}
try {
shim.createPartitions(client, db, table, parts, ignoreIfExists)
shim.createPartitions(client, toHiveTable(table), parts, ignoreIfExists)
} catch {
case e: InvocationTargetException => replaceExistException(e.getCause)
case e: Throwable => replaceExistException(e)
Expand Down Expand Up @@ -861,11 +878,22 @@ private[hive] class HiveClientImpl(
// Since HIVE-18238(Hive 3.0.0), the Driver.close function's return type changed
// and the CommandProcessorFactory.clean function removed.
driver.getClass.getMethod("close").invoke(driver)
if (version != hive.v3_0 && version != hive.v3_1) {
if (version != hive.v3_0 && version != hive.v3_1 && version != hive.v4_0) {
CommandProcessorFactory.clean(conf)
}
}

def getResponseCode(response: CommandProcessorResponse): Int = {
if (version < hive.v4_0) {
response.getResponseCode
} else {
// Since Hive 4.0, response code is removed from CommandProcessorResponse.
// Here we simply return 0 for the positive cases as for error cases it will
// throw exceptions early.
0
}
}

// Hive query needs to start SessionState.
SessionState.start(state)
logDebug(s"Running hiveql '$cmd'")
Expand All @@ -878,30 +906,44 @@ private[hive] class HiveClientImpl(
val proc = shim.getCommandProcessor(tokens(0), conf)
proc match {
case driver: Driver =>
val response: CommandProcessorResponse = driver.run(cmd)
// Throw an exception if there is an error in query processing.
if (response.getResponseCode != 0) {
try {
val response: CommandProcessorResponse = driver.run(cmd)
if (getResponseCode(response) != 0) {
// Throw an exception if there is an error in query processing.
// This works for hive 3.x and earlier versions.
throw new QueryExecutionException(response.getErrorMessage)
}
driver.setMaxRows(maxRows)
val results = shim.getDriverResults(driver)
results
} catch {
case e @ (_: QueryExecutionException | _: SparkThrowable) =>
throw e
case e: Exception =>
// Wrap the original hive error with QueryExecutionException and throw it
// if there is an error in query processing.
// This works for hive 4.x and later versions.
throw new QueryExecutionException(ExceptionUtils.getStackTrace(e))
} finally {
closeDriver(driver)
throw new QueryExecutionException(response.getErrorMessage)
}
driver.setMaxRows(maxRows)

val results = shim.getDriverResults(driver)
closeDriver(driver)
results

case _ =>
if (state.out != null) {
val out = state.getClass.getField("out").get(state)
if (out != null) {
// scalastyle:off println
state.out.println(tokens(0) + " " + cmd_1)
out.asInstanceOf[PrintStream].println(tokens(0) + " " + cmd_1)
// scalastyle:on println
}
val response: CommandProcessorResponse = proc.run(cmd_1)
// Throw an exception if there is an error in query processing.
if (response.getResponseCode != 0) {
val responseCode = getResponseCode(response)
if (responseCode != 0) {
// Throw an exception if there is an error in query processing.
// This works for hive 3.x and earlier versions. For 4.x and later versions,
// It will go to the catch block directly.
throw new QueryExecutionException(response.getErrorMessage)
}
Seq(response.getResponseCode.toString)
Seq(responseCode.toString)
}
} catch {
case e: Exception =>
Expand Down Expand Up @@ -971,7 +1013,7 @@ private[hive] class HiveClientImpl(
partSpec,
replace,
numDP,
listBucketingEnabled = hiveTable.isStoredAsSubDirectories)
hiveTable)
}

override def createFunction(db: String, func: CatalogFunction): Unit = withHiveState {
Expand Down
Loading