Skip to content

Commit 572b62c

Browse files
committed
[SPARK-7853] [SQL] Fix HiveContext in Spark Shell
https://issues.apache.org/jira/browse/SPARK-7853 This fixes the problem introduced by my change in #6435, which causes that Hive Context fails to create in spark shell because of the class loader issue. Author: Yin Huai <[email protected]> Closes #6459 from yhuai/SPARK-7853 and squashes the following commits: 37ad33e [Yin Huai] Do not use hiveQlTable at all. 47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf. 005649b [Yin Huai] Update comment. 35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions. 3737766 [Yin Huai] Recursively find all jars.
1 parent 0077af2 commit 572b62c

File tree

2 files changed

+25
-22
lines changed

2 files changed

+25
-22
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
189189
"Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
190190
s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
191191
}
192-
// We recursively add all jars in the class loader chain,
193-
// starting from the given urlClassLoader.
194-
def addJars(urlClassLoader: URLClassLoader): Array[URL] = {
195-
val jarsInParent = urlClassLoader.getParent match {
196-
case parent: URLClassLoader => addJars(parent)
197-
case other => Array.empty[URL]
198-
}
199192

200-
urlClassLoader.getURLs ++ jarsInParent
193+
// We recursively find all jars in the class loader chain,
194+
// starting from the given classLoader.
195+
def allJars(classLoader: ClassLoader): Array[URL] = classLoader match {
196+
case null => Array.empty[URL]
197+
case urlClassLoader: URLClassLoader =>
198+
urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent)
199+
case other => allJars(other.getParent)
201200
}
202201

203-
val jars = Utils.getContextOrSparkClassLoader match {
204-
case urlClassLoader: URLClassLoader => addJars(urlClassLoader)
205-
case other =>
206-
throw new IllegalArgumentException(
207-
"Unable to locate hive jars to connect to metastore " +
208-
s"using classloader ${other.getClass.getName}. " +
209-
"Please set spark.sql.hive.metastore.jars")
202+
val classLoader = Utils.getContextOrSparkClassLoader
203+
val jars = allJars(classLoader)
204+
if (jars.length == 0) {
205+
throw new IllegalArgumentException(
206+
"Unable to locate hive jars to connect to metastore. " +
207+
"Please set spark.sql.hive.metastore.jars.")
210208
}
211209

212210
logInfo(
@@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
356354

357355
override def setConf(key: String, value: String): Unit = {
358356
super.setConf(key, value)
359-
hiveconf.set(key, value)
360357
executionHive.runSqlHive(s"SET $key=$value")
361358
metadataHive.runSqlHive(s"SET $key=$value")
359+
// If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf),
360+
// this setConf will be called in the constructor of the SQLContext.
361+
// Also, calling hiveconf will create a default session containing a HiveConf, which
362+
// will interfer with the creation of executionHive (which is a lazy val). So,
363+
// we put hiveconf.set at the end of this method.
364+
hiveconf.set(key, value)
362365
}
363366

364367
/* A catalyst metadata catalog that points to the Hive Metastore. */

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -707,20 +707,20 @@ private[hive] case class MetastoreRelation
707707
hiveQlTable.getMetadata
708708
)
709709

710-
implicit class SchemaAttribute(f: FieldSchema) {
710+
implicit class SchemaAttribute(f: HiveColumn) {
711711
def toAttribute: AttributeReference = AttributeReference(
712-
f.getName,
713-
HiveMetastoreTypes.toDataType(f.getType),
712+
f.name,
713+
HiveMetastoreTypes.toDataType(f.hiveType),
714714
// Since data can be dumped in randomly with no validation, everything is nullable.
715715
nullable = true
716716
)(qualifiers = Seq(alias.getOrElse(tableName)))
717717
}
718718

719-
// Must be a stable value since new attributes are born here.
720-
val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
719+
/** PartitionKey attributes */
720+
val partitionKeys = table.partitionColumns.map(_.toAttribute)
721721

722722
/** Non-partitionKey attributes */
723-
val attributes = hiveQlTable.getCols.map(_.toAttribute)
723+
val attributes = table.schema.map(_.toAttribute)
724724

725725
val output = attributes ++ partitionKeys
726726

0 commit comments

Comments
 (0)