Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/sql-programming-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1746,6 +1746,13 @@ The following options can be used to configure the version of Hive that is used
</ol>
</td>
</tr>
<tr>
<td><code>spark.sql.hive.metastore.mavenRepo</code></td>
<td><a href="http://www.datanucleus.org/downloads/maven2">http://www.datanucleus.org/downloads/maven2</a></td>
<td>
Maven repository to download Hive Metastore jars when <code>spark.sql.hive.metastore.jars</code> is set to <code>maven</code>.
</td>
</tr>
<tr>
<td><code>spark.sql.hive.metastore.sharedPrefixes</code></td>
<td><code>com.mysql.jdbc,<br/>org.postgresql,<br/>com.microsoft.sqlserver,<br/>oracle.jdbc</code></td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,13 @@ class HiveContext private[hive](
*/
protected[hive] def hiveMetastoreJars: String = getConf(HIVE_METASTORE_JARS)

/**
* The Maven repository where the jars are to be downloaded which should be used to instantiate
* the HiveMetastoreClient. This setting will take effect when HIVE_METASTORE_JARS is set to
* 'maven'.
*/
protected[hive] def hiveMetastoreMavenRepo: String = getConf(HIVE_METASTORE_MAVEN_REPO)

/**
* A comma separated list of class prefixes that should be loaded using the classloader that
* is shared between Spark SQL and a specific version of Hive. An example of classes that should
Expand Down Expand Up @@ -292,6 +299,7 @@ class HiveContext private[hive](
hiveMetastoreVersion = hiveMetastoreVersion,
hadoopVersion = VersionInfo.getVersion,
config = allConfig,
mavenRepo = Some(hiveMetastoreMavenRepo),
barrierPrefixes = hiveMetastoreBarrierPrefixes,
sharedPrefixes = hiveMetastoreSharedPrefixes)
} else {
Expand Down Expand Up @@ -685,6 +693,12 @@ private[hive] object HiveContext {
| Use Hive jars of specified version downloaded from Maven repositories.
| 3. A classpath in the standard format for both Hive and Hadoop.
""".stripMargin)

val HIVE_METASTORE_MAVEN_REPO = stringConf("spark.sql.hive.metastore.mavenRepo",
defaultValue = Some("http://www.datanucleus.org/downloads/maven2"),
doc = "Maven repositories where Hive metastore jars which are used to instantiate the" +
"HiveMetastoreClient are downloaded.")

val CONVERT_METASTORE_PARQUET = booleanConf("spark.sql.hive.convertMetastoreParquet",
defaultValue = Some(true),
doc = "When set to false, Spark SQL will use the Hive SerDe for parquet tables instead of " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ private[hive] object IsolatedClientLoader extends Logging {
hiveMetastoreVersion: String,
hadoopVersion: String,
config: Map[String, String] = Map.empty,
mavenRepo: Option[String],
ivyPath: Option[String] = None,
sharedPrefixes: Seq[String] = Seq.empty,
barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = synchronized {
Expand All @@ -54,7 +55,7 @@ private[hive] object IsolatedClientLoader extends Logging {
} else {
val (downloadedFiles, actualHadoopVersion) =
try {
(downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion)
(downloadVersion(resolvedVersion, hadoopVersion, mavenRepo, ivyPath), hadoopVersion)
} catch {
case e: RuntimeException if e.getMessage.contains("hadoop") =>
// If the error message contains hadoop, it is probably because the hadoop
Expand All @@ -68,7 +69,7 @@ private[hive] object IsolatedClientLoader extends Logging {
"It is recommended to set jars used by Hive metastore client through " +
"spark.sql.hive.metastore.jars in the production environment.")
sharesHadoopClasses = false
(downloadVersion(resolvedVersion, "2.4.0", ivyPath), "2.4.0")
(downloadVersion(resolvedVersion, "2.4.0", mavenRepo, ivyPath), "2.4.0")
}
resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles)
resolvedVersions((resolvedVersion, actualHadoopVersion))
Expand All @@ -95,6 +96,7 @@ private[hive] object IsolatedClientLoader extends Logging {
private def downloadVersion(
version: HiveVersion,
hadoopVersion: String,
mavenRepo: Option[String],
ivyPath: Option[String]): Seq[URL] = {
val hiveArtifacts = version.extraDeps ++
Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
Expand All @@ -105,7 +107,7 @@ private[hive] object IsolatedClientLoader extends Logging {
val classpath = quietly {
SparkSubmitUtils.resolveMavenCoordinates(
hiveArtifacts.mkString(","),
Some("http://www.datanucleus.org/downloads/maven2"),
mavenRepo,
ivyPath,
exclusions = version.exclusions)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class VersionsSuite extends SparkFunSuite with Logging {
Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath))
}

private val mavenRepo = HiveContext.HIVE_METASTORE_MAVEN_REPO.defaultValue

private def buildConf() = {
lazy val warehousePath = Utils.createTempDir()
lazy val metastorePath = Utils.createTempDir()
Expand All @@ -59,6 +61,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
hiveMetastoreVersion = HiveContext.hiveExecutionVersion,
hadoopVersion = VersionInfo.getVersion,
config = buildConf(),
mavenRepo = mavenRepo,
ivyPath = ivyPath).createClient()
val db = new HiveDatabase("default", "")
badClient.createDatabase(db)
Expand Down Expand Up @@ -93,6 +96,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
hiveMetastoreVersion = "13",
hadoopVersion = VersionInfo.getVersion,
config = buildConf(),
mavenRepo = mavenRepo,
ivyPath = ivyPath).createClient()
}
}
Expand All @@ -112,6 +116,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
hiveMetastoreVersion = version,
hadoopVersion = VersionInfo.getVersion,
config = buildConf(),
mavenRepo = mavenRepo,
ivyPath = ivyPath).createClient()
}

Expand Down