apache · scwf · Nov 6, 2014 · Nov 6, 2014 · Nov 6, 2014 · Nov 7, 2014
diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -119,6 +119,7 @@
               <artifact>*:*</artifact>
               <excludes>
                 <exclude>org/datanucleus/**</exclude>
+                <exclude>org/apache/hive/beeline/**</exclude>
                 <exclude>META-INF/*.SF</exclude>
                 <exclude>META-INF/*.DSA</exclude>
                 <exclude>META-INF/*.RSA</exclude>

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
@@ -56,21 +56,30 @@ if exist "%FWDIR%RELEASE" (
 
 set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
 
-rem When Hive support is needed, Datanucleus jars must be included on the classpath.
-rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-rem built with Hive, so look for them there.
 if exist "%FWDIR%RELEASE" (
   set datanucleus_dir=%FWDIR%lib
 ) else (
   set datanucleus_dir=%FWDIR%lib_managed\jars
 )
+
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
 set "datanucleus_jars="
 for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
   set datanucleus_jars=!datanucleus_jars!;%%d
 )
 set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
 
+rem Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
+rem from MANIFEST.MF.
+for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do (
+  set hivebeeline_jar=!hivebeeline_jar!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%hivebeeline_jar%
+
+
 set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes

diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
@@ -98,21 +98,25 @@ fi
 
 CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
 
-# When Hive support is needed, Datanucleus jars must be included on the classpath.
-# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
-# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
-# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 if [ -f "$FWDIR/RELEASE" ]; then
   datanucleus_dir="$FWDIR"/lib
 else
   datanucleus_dir="$FWDIR"/lib_managed/jars
 fi
 
+# When Hive support is needed, Datanucleus jars must be included on the classpath.
+# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
+# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
+# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
 datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
 
+# Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
+# from MANIFEST.MF.
+hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline*\\.jar")"
+
 if [ -n "$datanucleus_jars" ]; then
   hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
   if [ -n "$hive_files" ]; then
@@ -121,6 +125,10 @@ if [ -n "$datanucleus_jars" ]; then
   fi
 fi
 
+if [ -n "$hivebeeline_jar" ]; then
+    CLASSPATH="$CLASSPATH:$hivebeeline_jar"
+fi
+
 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"

diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
@@ -95,6 +95,30 @@
           <skip>true</skip>
         </configuration>
       </plugin>
+      <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <!-- basedir is spark/sql/hive/ -->
+              <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
+              <overWriteReleases>false</overWriteReleases>
+              <overWriteSnapshots>false</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+              <includeGroupIds>org.spark-project.hive</includeGroupIds>
+              <includeClassifiers>hive-beeline</includeClassifiers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 </project>