Skip to content

Commit 8a11345

Browse files
committed
Merge remote-tracking branch 'apache/master' into assemble-deps
Conflicts: project/SparkBuild.scala
2 parents faa3168 + ce92a9c commit 8a11345

File tree

381 files changed

+12199
-4786
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

381 files changed

+12199
-4786
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10-
.mima-excludes
10+
.generated-mima*
1111
/build/
1212
work/
1313
out/

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ target
33
.project
44
.classpath
55
.mima-excludes
6+
.generated-mima-excludes
67
.rat-excludes
78
.*md
89
derby.log

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-SNAPSHOT</version>
24+
<version>1.1.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-SNAPSHOT</version>
24+
<version>1.1.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
3838
sc.stop()
3939
sc = null
4040
}
41-
// To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
42-
System.clearProperty("spark.driver.port")
4341
}
4442

4543
test("halting by voting") {
@@ -82,7 +80,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
8280
test("large number of iterations") {
8381
// This tests whether jobs with a large number of iterations finish in a reasonable time,
8482
// because non-memoized recursion in RDD or DAGScheduler used to cause them to hang
85-
failAfter(10 seconds) {
83+
failAfter(30 seconds) {
8684
sc = new SparkContext("local", "test")
8785
val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
8886
val msgs = sc.parallelize(Array[(String, TestMessage)]())
@@ -103,7 +101,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
103101
sc = new SparkContext("local", "test")
104102
val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
105103
val msgs = sc.parallelize(Array[(String, TestMessage)]())
106-
val numSupersteps = 50
104+
val numSupersteps = 20
107105
val result =
108106
Bagel.run(sc, verts, msgs, sc.defaultParallelism, StorageLevel.DISK_ONLY) {
109107
(self: TestVertex, msgs: Option[Array[TestMessage]], superstep: Int) =>

bin/pyspark

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ fi
4545
. $FWDIR/bin/load-spark-env.sh
4646

4747
# Figure out which Python executable to use
48-
if [ -z "$PYSPARK_PYTHON" ] ; then
48+
if [[ -z "$PYSPARK_PYTHON" ]]; then
4949
PYSPARK_PYTHON="python"
5050
fi
5151
export PYSPARK_PYTHON
@@ -59,7 +59,7 @@ export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
5959
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
6060

6161
# If IPython options are specified, assume user wants to run IPython
62-
if [ -n "$IPYTHON_OPTS" ]; then
62+
if [[ -n "$IPYTHON_OPTS" ]]; then
6363
IPYTHON=1
6464
fi
6565

@@ -76,6 +76,16 @@ for i in "$@"; do
7676
done
7777
export PYSPARK_SUBMIT_ARGS
7878

79+
# For pyspark tests
80+
if [[ -n "$SPARK_TESTING" ]]; then
81+
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
82+
exec "$PYSPARK_PYTHON" -m doctest $1
83+
else
84+
exec "$PYSPARK_PYTHON" $1
85+
fi
86+
exit
87+
fi
88+
7989
# If a python file is provided, directly run spark-submit.
8090
if [[ "$1" =~ \.py$ ]]; then
8191
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2

bin/run-example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
5151
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
5252
fi
5353

54-
./bin/spark-submit \
54+
"$FWDIR"/bin/spark-submit \
5555
--master $EXAMPLE_MASTER \
5656
--class $EXAMPLE_CLASS \
5757
"$SPARK_EXAMPLES_JAR" \

core/pom.xml

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-SNAPSHOT</version>
24+
<version>1.1.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

@@ -235,7 +235,7 @@
235235
</dependency>
236236
<dependency>
237237
<groupId>org.easymock</groupId>
238-
<artifactId>easymock</artifactId>
238+
<artifactId>easymockclassextension</artifactId>
239239
<scope>test</scope>
240240
</dependency>
241241
<dependency>
@@ -258,35 +258,6 @@
258258
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
259259
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
260260
<plugins>
261-
<plugin>
262-
<groupId>org.apache.maven.plugins</groupId>
263-
<artifactId>maven-antrun-plugin</artifactId>
264-
<executions>
265-
<execution>
266-
<phase>test</phase>
267-
<goals>
268-
<goal>run</goal>
269-
</goals>
270-
<configuration>
271-
<exportAntProperties>true</exportAntProperties>
272-
<target>
273-
<property name="spark.classpath" refid="maven.test.classpath" />
274-
<property environment="env" />
275-
<fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
276-
<condition>
277-
<not>
278-
<or>
279-
<isset property="env.SCALA_HOME" />
280-
<isset property="env.SCALA_LIBRARY_PATH" />
281-
</or>
282-
</not>
283-
</condition>
284-
</fail>
285-
</target>
286-
</configuration>
287-
</execution>
288-
</executions>
289-
</plugin>
290261
<plugin>
291262
<groupId>org.scalatest</groupId>
292263
<artifactId>scalatest-maven-plugin</artifactId>

core/src/main/scala/org/apache/spark/ContextCleaner.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
9696
}
9797

9898
/** Register a ShuffleDependency for cleanup when it is garbage collected. */
99-
def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _]) {
99+
def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]) {
100100
registerForCleanup(shuffleDependency, CleanShuffle(shuffleDependency.shuffleId))
101101
}
102102

core/src/main/scala/org/apache/spark/Dependency.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark
2020
import org.apache.spark.annotation.DeveloperApi
2121
import org.apache.spark.rdd.RDD
2222
import org.apache.spark.serializer.Serializer
23+
import org.apache.spark.shuffle.ShuffleHandle
2324

2425
/**
2526
* :: DeveloperApi ::
@@ -50,19 +51,24 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
5051
* Represents a dependency on the output of a shuffle stage.
5152
* @param rdd the parent RDD
5253
* @param partitioner partitioner used to partition the shuffle output
53-
* @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to null,
54+
* @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to None,
5455
* the default serializer, as specified by `spark.serializer` config option, will
5556
* be used.
5657
*/
5758
@DeveloperApi
58-
class ShuffleDependency[K, V](
59+
class ShuffleDependency[K, V, C](
5960
@transient rdd: RDD[_ <: Product2[K, V]],
6061
val partitioner: Partitioner,
61-
val serializer: Serializer = null)
62+
val serializer: Option[Serializer] = None,
63+
val keyOrdering: Option[Ordering[K]] = None,
64+
val aggregator: Option[Aggregator[K, V, C]] = None)
6265
extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) {
6366

6467
val shuffleId: Int = rdd.context.newShuffleId()
6568

69+
val shuffleHandle: ShuffleHandle = rdd.context.env.shuffleManager.registerShuffle(
70+
shuffleId, rdd.partitions.size, this)
71+
6672
rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
6773
}
6874

0 commit comments

Comments
 (0)