@@ -30,20 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
3030# Build up classpath
3131CLASSPATH=" $SPARK_CLASSPATH :$FWDIR /conf"
3232
33- # Support for interacting with Hive. Since hive pulls in a lot of dependencies that might break
34- # existing Spark applications, it is not included in the standard spark assembly. Instead, we only
35- # include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
36- # Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
37- # the future.
38- if [ " $SPARK_HIVE " = " true" ]; then
39- echo 1>&2 " SPARK_HIVE is set, including Hive support."
40-
41- # Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
42- DATANUCLEUSJARS=$( JARS=(" $FWDIR /lib_managed/jars" /datanucleus-* .jar); IFS=:; echo " ${JARS[*]} " )
43- CLASSPATH=$CLASSPATH :$DATANUCLEUSJARS
44- fi
45-
46- ASSEMBLY_DIR=" $FWDIR /assembly/target/scala-$SCALA_VERSION /"
33+ ASSEMBLY_DIR=" $FWDIR /assembly/target/scala-$SCALA_VERSION "
4734
4835# First check if we have a dependencies jar. If so, include binary classes with the deps jar
4936if [ -f " $ASSEMBLY_DIR " /spark-assembly* hadoop* -deps.jar ]; then
7057 CLASSPATH=" $CLASSPATH :$ASSEMBLY_JAR "
7158fi
7259
60+ # When Hive support is needed, Datanucleus jars must be included on the classpath.
61+ # Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
62+ # Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
63+ # built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
64+ # assembly is built for Hive, before actually populating the CLASSPATH with the jars.
65+ # Note that this check order is faster (by up to half a second) in the case where Hive is not used.
66+ num_datanucleus_jars=$( ls " $FWDIR " /lib_managed/jars/ | grep " datanucleus-.*\\ .jar" | wc -l)
67+ if [ $num_datanucleus_jars -gt 0 ]; then
68+ AN_ASSEMBLY_JAR=${ASSEMBLY_JAR:- $DEPS_ASSEMBLY_JAR }
69+ num_hive_files=$( jar tvf " $AN_ASSEMBLY_JAR " org/apache/hadoop/hive/ql/exec 2> /dev/null | wc -l)
70+ if [ $num_hive_files -gt 0 ]; then
71+ echo " Spark assembly has been built with Hive, including Datanucleus jars on classpath" 1>&2
72+ DATANUCLEUSJARS=$( echo " $FWDIR /lib_managed/jars" /datanucleus-* .jar | tr " " :)
73+ CLASSPATH=$CLASSPATH :$DATANUCLEUSJARS
74+ fi
75+ fi
76+
7377# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
7478if [[ $SPARK_TESTING == 1 ]]; then
7579 CLASSPATH=" $CLASSPATH :$FWDIR /core/target/scala-$SCALA_VERSION /test-classes"
0 commit comments