Skip to content

Commit 64b8195

Browse files
committed
[ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node
- rebasing
1 parent 0a2d90e commit 64b8195

File tree

2 files changed

+74
-61
lines changed

2 files changed

+74
-61
lines changed

spark/pom.xml

Lines changed: 71 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,77 @@
726726
</dependencies>
727727
</profile>
728728

729+
<profile>
730+
<id>yarn-pyspark</id>
731+
<properties>
732+
<spark.download.url>http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
733+
</spark.download.url>
734+
</properties>
735+
<build>
736+
<plugins>
737+
<plugin>
738+
<groupId>com.googlecode.maven-download-plugin</groupId>
739+
<artifactId>download-maven-plugin</artifactId>
740+
<version>1.2.1</version>
741+
<executions>
742+
<execution>
743+
<id>download-pyspark-files</id>
744+
<phase>validate</phase>
745+
<goals>
746+
<goal>wget</goal>
747+
</goals>
748+
<configuration>
749+
<url>${spark.download.url}</url>
750+
<unpack>true</unpack>
751+
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
752+
</configuration>
753+
</execution>
754+
</executions>
755+
</plugin>
756+
<plugin>
757+
<artifactId>maven-clean-plugin</artifactId>
758+
<configuration>
759+
<filesets>
760+
<fileset>
761+
<directory>${basedir}/../python/build</directory>
762+
</fileset>
763+
<fileset>
764+
<directory>${project.build.direcoty}/spark-dist</directory>
765+
</fileset>
766+
</filesets>
767+
</configuration>
768+
</plugin>
769+
<plugin>
770+
<groupId>org.apache.maven.plugins</groupId>
771+
<artifactId>maven-antrun-plugin</artifactId>
772+
<version>1.7</version>
773+
<executions>
774+
<execution>
775+
<id>download-and-zip-pyspark-files</id>
776+
<phase>generate-resources</phase>
777+
<goals>
778+
<goal>run</goal>
779+
</goals>
780+
<configuration>
781+
<target>
782+
<delete dir="../python"/>
783+
<copy todir="../python">
784+
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
785+
</copy>
786+
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
787+
dest="../python/build"/>
788+
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
789+
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
790+
includes="pyspark/*.py,pyspark/**/*.py"/>
791+
</target>
792+
</configuration>
793+
</execution>
794+
</executions>
795+
</plugin>
796+
</plugins>
797+
</build>
798+
</profile>
799+
729800
<!-- Build without Hadoop dependencies that are included in some runtime environments. -->
730801
<profile>
731802
<id>hadoop-provided</id>
@@ -907,67 +978,6 @@
907978
</executions>
908979
</plugin>
909980

910-
<!-- for pyspark -->
911-
<plugin>
912-
<groupId>com.googlecode.maven-download-plugin</groupId>
913-
<artifactId>download-maven-plugin</artifactId>
914-
<version>1.2.1</version>
915-
<executions>
916-
<execution>
917-
<id>download-pyspark-files</id>
918-
<phase>validate</phase>
919-
<goals>
920-
<goal>wget</goal>
921-
</goals>
922-
<configuration>
923-
<url>${spark.download.url}</url>
924-
<unpack>true</unpack>
925-
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
926-
</configuration>
927-
</execution>
928-
</executions>
929-
</plugin>
930-
<plugin>
931-
<artifactId>maven-clean-plugin</artifactId>
932-
<configuration>
933-
<filesets>
934-
<fileset>
935-
<directory>${basedir}/../python/build</directory>
936-
</fileset>
937-
<fileset>
938-
<directory>${project.build.direcoty}/spark-dist</directory>
939-
</fileset>
940-
</filesets>
941-
</configuration>
942-
</plugin>
943-
<plugin>
944-
<groupId>org.apache.maven.plugins</groupId>
945-
<artifactId>maven-antrun-plugin</artifactId>
946-
<version>1.7</version>
947-
<executions>
948-
<execution>
949-
<id>download-and-zip-pyspark-files</id>
950-
<phase>generate-resources</phase>
951-
<goals>
952-
<goal>run</goal>
953-
</goals>
954-
<configuration>
955-
<target>
956-
<delete dir="../python" />
957-
<copy todir="../python">
958-
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
959-
</copy>
960-
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
961-
dest="../python/build"/>
962-
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
963-
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
964-
includes="pyspark/*.py,pyspark/**/*.py"/>
965-
</target>
966-
</configuration>
967-
</execution>
968-
</executions>
969-
</plugin>
970-
971981
<!-- Plugin to compile Scala code -->
972982
<plugin>
973983
<groupId>org.scala-tools</groupId>

zeppelin-distribution/src/assemble/distribution.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@
7373
<fileSet>
7474
<directory>../notebook</directory>
7575
</fileSet>
76+
<fileSet>
77+
<directory>../python</directory>
78+
</fileSet>
7679
</fileSets>
7780
<!--<fileSet>
7881
<directory>zeppelin-cli/target</directory>

0 commit comments

Comments
 (0)