Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
<artifact>*:*</artifact>
<excludes>
<exclude>org/datanucleus/**</exclude>
<exclude>org/apache/hive/beeline/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
Expand Down
17 changes: 13 additions & 4 deletions bin/compute-classpath.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,30 @@ if exist "%FWDIR%RELEASE" (

set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%

rem When Hive support is needed, Datanucleus jars must be included on the classpath.
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
rem built with Hive, so look for them there.
if exist "%FWDIR%RELEASE" (
set datanucleus_dir=%FWDIR%lib
) else (
set datanucleus_dir=%FWDIR%lib_managed\jars
)

rem When Hive support is needed, Datanucleus jars must be included on the classpath.
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
rem built with Hive, so look for them there.
set "datanucleus_jars="
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
set datanucleus_jars=!datanucleus_jars!;%%d
)
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%

rem Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
rem from MANIFEST.MF.
for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do (
set hivebeeline_jar=!hivebeeline_jar!;%%d
)
set CLASSPATH=%CLASSPATH%;%hivebeeline_jar%


set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
Expand Down
20 changes: 14 additions & 6 deletions bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,21 +98,25 @@ fi

CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"

# When Hive support is needed, Datanucleus jars must be included on the classpath.
# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
if [ -f "$FWDIR/RELEASE" ]; then
datanucleus_dir="$FWDIR"/lib
else
datanucleus_dir="$FWDIR"/lib_managed/jars
fi

# When Hive support is needed, Datanucleus jars must be included on the classpath.
# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"

# Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
# from MANIFEST.MF.
hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline*\\.jar")"

if [ -n "$datanucleus_jars" ]; then
hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
if [ -n "$hive_files" ]; then
Expand All @@ -121,6 +125,10 @@ if [ -n "$datanucleus_jars" ]; then
fi
fi

if [ -n "$hivebeeline_jar" ]; then
CLASSPATH="$CLASSPATH:$hivebeeline_jar"
fi

# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
if [[ $SPARK_TESTING == 1 ]]; then
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"
Expand Down
24 changes: 24 additions & 0 deletions sql/hive-thriftserver/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,30 @@
<skip>true</skip>
</configuration>
</plugin>
<!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix this

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.4</version>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: you should just inherit version config from the parent pom

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems there is no maven-dependency-plugin version in parent Pom? @srowen

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, hm. I suggest adding it in <pluginManagement> -- just with the version. Also, use 2.9, not 2.4.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok

<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<!-- basedir is spark/sql/hive/ -->
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also here

<outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeGroupIds>org.spark-project.hive</includeGroupIds>
<includeClassifiers>hive-beeline</includeClassifiers>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should use includeArtifactIds

</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>