Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions dev/deps/spark-deps-hadoop-2.7-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -81,21 +81,21 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
hive-beeline/2.3.7//hive-beeline-2.3.7.jar
hive-cli/2.3.7//hive-cli-2.3.7.jar
hive-common/2.3.7//hive-common-2.3.7.jar
hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar
hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
hive-metastore/2.3.7//hive-metastore-2.3.7.jar
hive-serde/2.3.7//hive-serde-2.3.7.jar
hive-beeline/2.3.8//hive-beeline-2.3.8.jar
hive-cli/2.3.8//hive-cli-2.3.8.jar
hive-common/2.3.8//hive-common-2.3.8.jar
hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar
hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar
hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar
hive-metastore/2.3.8//hive-metastore-2.3.8.jar
hive-serde/2.3.8//hive-serde-2.3.8.jar
hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar
hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
hive-shims/2.3.7//hive-shims-2.3.7.jar
hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar
hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar
hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar
hive-shims/2.3.8//hive-shims-2.3.8.jar
hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar
hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar
hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar
hk2-api/2.6.1//hk2-api-2.6.1.jar
hk2-locator/2.6.1//hk2-locator-2.6.1.jar
hk2-utils/2.6.1//hk2-utils-2.6.1.jar
Expand Down
26 changes: 13 additions & 13 deletions dev/deps/spark-deps-hadoop-3.2-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,21 @@ gson/2.2.4//gson-2.2.4.jar
guava/14.0.1//guava-14.0.1.jar
hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar
hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar
hive-beeline/2.3.7//hive-beeline-2.3.7.jar
hive-cli/2.3.7//hive-cli-2.3.7.jar
hive-common/2.3.7//hive-common-2.3.7.jar
hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar
hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
hive-metastore/2.3.7//hive-metastore-2.3.7.jar
hive-serde/2.3.7//hive-serde-2.3.7.jar
hive-beeline/2.3.8//hive-beeline-2.3.8.jar
hive-cli/2.3.8//hive-cli-2.3.8.jar
hive-common/2.3.8//hive-common-2.3.8.jar
hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar
hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar
hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar
hive-metastore/2.3.8//hive-metastore-2.3.8.jar
hive-serde/2.3.8//hive-serde-2.3.8.jar
hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar
hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
hive-shims/2.3.7//hive-shims-2.3.7.jar
hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar
hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar
hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar
hive-shims/2.3.8//hive-shims-2.3.8.jar
hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar
hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar
hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar
hk2-api/2.6.1//hk2-api-2.6.1.jar
hk2-locator/2.6.1//hk2-locator-2.6.1.jar
hk2-utils/2.6.1//hk2-utils-2.6.1.jar
Expand Down
4 changes: 2 additions & 2 deletions docs/building-spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ Example:

To enable Hive integration for Spark SQL along with its JDBC server and CLI,
add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options.
By default Spark will build with Hive 2.3.7.
By default Spark will build with Hive 2.3.8.

# With Hive 2.3.7 support
# With Hive 2.3.8 support
./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package

## Packaging without Hadoop Dependencies for YARN
Expand Down
8 changes: 4 additions & 4 deletions docs/sql-data-sources-hive-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
<tr>
<td><code>spark.sql.hive.metastore.version</code></td>
<td><code>2.3.7</code></td>
<td><code>2.3.8</code></td>
<td>
Version of the Hive metastore. Available
options are <code>0.12.0</code> through <code>2.3.7</code> and <code>3.0.0</code> through <code>3.1.2</code>.
options are <code>0.12.0</code> through <code>2.3.8</code> and <code>3.0.0</code> through <code>3.1.2</code>.
</td>
<td>1.4.0</td>
</tr>
Expand All @@ -142,9 +142,9 @@ The following options can be used to configure the version of Hive that is used
property can be one of four options:
<ol>
<li><code>builtin</code></li>
Use Hive 2.3.7, which is bundled with the Spark assembly when <code>-Phive</code> is
Use Hive 2.3.8, which is bundled with the Spark assembly when <code>-Phive</code> is
enabled. When this option is chosen, <code>spark.sql.hive.metastore.version</code> must be
either <code>2.3.7</code> or not defined.
either <code>2.3.8</code> or not defined.
<li><code>maven</code></li>
Use Hive jars of specified version downloaded from Maven repositories. This configuration
is not generally recommended for production deployments.
Expand Down
2 changes: 1 addition & 1 deletion docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ Python UDF registration is unchanged.
Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
Currently, Hive SerDes and UDFs are based on built-in Hive,
and Spark SQL can be connected to different versions of Hive Metastore
(from 0.12.0 to 2.3.7 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
(from 0.12.0 to 2.3.8 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).

#### Deploying in Existing Hive Warehouses
{:.no_toc}
Expand Down
20 changes: 18 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@
<hive.group>org.apache.hive</hive.group>
<hive.classifier>core</hive.classifier>
<!-- Version used in Maven Hive dependency -->
<hive.version>2.3.7</hive.version>
<hive23.version>2.3.7</hive23.version>
<hive.version>2.3.8</hive.version>
<hive23.version>2.3.8</hive23.version>
<!-- Version used for internal directory structure -->
<hive.version.short>2.3</hive.version.short>
<!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
Expand Down Expand Up @@ -1887,6 +1887,22 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>net.hydromatic</groupId>
<artifactId>eigenbase-properties</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.pentaho</groupId>
<artifactId>pentaho-aggdesigner-algorithm</artifactId>
</exclusion>
Comment on lines +1890 to +1905
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These dependencies added by apache/hive@52a4ab8

<!-- End of Hive 2.3 exclusion -->
</exclusions>
</dependency>
Expand Down
11 changes: 6 additions & 5 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3724,20 +3724,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark

test("SPARK-33084: Add jar support Ivy URI in SQL") {
val sc = spark.sparkContext
val hiveVersion = "2.3.8"
// default transitive=false, only download specified jar
sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7")
sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion")
assert(sc.listJars()
.exists(_.contains("org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")))
.exists(_.contains(s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")))

// test download ivy URL jar return multiple jars
sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0?transitive=true")
assert(sc.listJars().exists(_.contains("scalajs-library_2.12")))
assert(sc.listJars().exists(_.contains("scalajs-test-interface_2.12")))

sql("ADD JAR ivy://org.apache.hive:hive-contrib:2.3.7" +
sql(s"ADD JAR ivy://org.apache.hive:hive-contrib:$hiveVersion" +
"?exclude=org.pentaho:pentaho-aggdesigner-algorithm&transitive=true")
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-contrib-2.3.7.jar")))
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-exec-2.3.7.jar")))
assert(sc.listJars().exists(_.contains(s"org.apache.hive_hive-contrib-$hiveVersion.jar")))
assert(sc.listJars().exists(_.contains(s"org.apache.hive_hive-exec-$hiveVersion.jar")))
assert(!sc.listJars().exists(_.contains("org.pentaho.pentaho_aggdesigner-algorithm")))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
conf += resultSet.getString(1) -> resultSet.getString(2)
}

assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.8"))
}
}

Expand All @@ -559,7 +559,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
conf += resultSet.getString(1) -> resultSet.getString(2)
}

assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.8"))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ private[spark] object HiveUtils extends Logging {

val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
.doc("Version of the Hive metastore. Available options are " +
"<code>0.12.0</code> through <code>2.3.7</code> and " +
"<code>0.12.0</code> through <code>2.3.8</code> and " +
"<code>3.0.0</code> through <code>3.1.2</code>.")
.version("1.4.0")
.stringConf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ private[hive] object IsolatedClientLoader extends Logging {
case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
case "2.2" | "2.2.0" => hive.v2_2
case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" =>
hive.v2_3
case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" |
"2.3.8" => hive.v2_3
case "3.0" | "3.0.0" => hive.v3_0
case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1
case version =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ package object client {
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))

// Since HIVE-14496, Hive materialized view need calcite-core.
// Since HIVE-23980, calcite-core included in Hive package jar.
// For spark, only VersionsSuite currently creates a hive materialized view for testing.
case object v2_3 extends HiveVersion("2.3.7",
exclusions = Seq("org.apache.calcite:calcite-druid",
case object v2_3 extends HiveVersion("2.3.8",
Copy link
Member

@viirya viirya Dec 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per the test internally, this needs to be changed to pass all tests. But let's see the Jenkins result first.

exclusions = Seq("org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"com.fasterxml.jackson.core:*",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))

Expand All @@ -114,7 +116,6 @@ package object client {
extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
"org.apache.derby:derby:10.14.1.0"),
exclusions = Seq("org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))

Expand All @@ -124,7 +125,6 @@ package object client {
extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
"org.apache.derby:derby:10.14.1.0"),
exclusions = Seq("org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
.map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark"))
private val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val hiveVersion = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
"2.3.7"
"2.3.8"
} else {
"1.2.1"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.Cast
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
import org.apache.spark.sql.hive.HiveUtils.{builtinHiveVersion => hiveVersion}
import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.internal.SQLConf
Expand Down Expand Up @@ -1223,17 +1224,17 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
test("SPARK-33084: Add jar support Ivy URI in SQL") {
val testData = TestHive.getHiveFile("data/files/sample.json").toURI
withTable("t") {
sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7")
sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion")
sql(
"""CREATE TABLE t(a string, b string)
|ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)
sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t""")
sql("SELECT * FROM src JOIN t on src.key = t.a")
assert(sql("LIST JARS").filter(_.getString(0).contains(
"org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0)
s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")).count() > 0)
assert(sql("LIST JAR").
filter(_.getString(0).contains(
"org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0)
s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")).count() > 0)
}
}
}
Expand Down