Skip to content

Commit 0be5120

Browse files
committed
Merge branch 'master' into SPARK-3454_w_jersey
2 parents 2382bef + 9f3273b commit 0be5120

File tree

270 files changed

+3369
-1230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

270 files changed

+3369
-1230
lines changed

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent_2.10</artifactId>
24-
<version>1.3.0-SNAPSHOT</version>
24+
<version>1.4.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent_2.10</artifactId>
24-
<version>1.3.0-SNAPSHOT</version>
24+
<version>1.4.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent_2.10</artifactId>
24-
<version>1.3.0-SNAPSHOT</version>
24+
<version>1.4.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

@@ -290,7 +290,7 @@
290290
<dependency>
291291
<groupId>org.tachyonproject</groupId>
292292
<artifactId>tachyon-client</artifactId>
293-
<version>0.5.0</version>
293+
<version>0.6.1</version>
294294
<exclusions>
295295
<exclusion>
296296
<groupId>org.apache.hadoop</groupId>

core/src/main/scala/org/apache/spark/ContextCleaner.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
145145
}
146146

147147
/** Keep cleaning RDD, shuffle, and broadcast state. */
148-
private def keepCleaning(): Unit = Utils.logUncaughtExceptions {
148+
private def keepCleaning(): Unit = Utils.tryOrStopSparkContext(sc) {
149149
while (!stopped) {
150150
try {
151151
val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1736,7 +1736,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
17361736
}
17371737
}
17381738

1739-
listenerBus.start()
1739+
listenerBus.start(this)
17401740
}
17411741

17421742
/** Post the application start event */

core/src/main/scala/org/apache/spark/TaskEndReason.scala

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,7 @@ case object TaskKilled extends TaskFailedReason {
151151
* Task requested the driver to commit, but was denied.
152152
*/
153153
@DeveloperApi
154-
case class TaskCommitDenied(
155-
jobID: Int,
156-
partitionID: Int,
157-
attemptID: Int)
158-
extends TaskFailedReason {
154+
case class TaskCommitDenied(jobID: Int, partitionID: Int, attemptID: Int) extends TaskFailedReason {
159155
override def toErrorString: String = s"TaskCommitDenied (Driver denied task commit)" +
160156
s" for job: $jobID, partition: $partitionID, attempt: $attemptID"
161157
}

core/src/main/scala/org/apache/spark/TaskState.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ private[spark] object TaskState extends Enumeration {
2727

2828
type TaskState = Value
2929

30+
def isFailed(state: TaskState) = (LOST == state) || (FAILED == state)
31+
3032
def isFinished(state: TaskState) = FINISHED_STATES.contains(state)
3133

3234
def toMesos(state: TaskState): MesosTaskState = state match {
@@ -46,5 +48,6 @@ private[spark] object TaskState extends Enumeration {
4648
case MesosTaskState.TASK_FAILED => FAILED
4749
case MesosTaskState.TASK_KILLED => KILLED
4850
case MesosTaskState.TASK_LOST => LOST
51+
case MesosTaskState.TASK_ERROR => LOST
4952
}
5053
}

core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => J
3939
import org.apache.spark.partial.{BoundedDouble, PartialResult}
4040
import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
4141
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
42+
import org.apache.spark.serializer.Serializer
4243
import org.apache.spark.storage.StorageLevel
4344
import org.apache.spark.util.Utils
4445

@@ -227,24 +228,51 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
227228
* - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
228229
* - `mergeCombiners`, to combine two C's into a single one.
229230
*
230-
* In addition, users can control the partitioning of the output RDD, and whether to perform
231-
* map-side aggregation (if a mapper can produce multiple items with the same key).
231+
* In addition, users can control the partitioning of the output RDD, the serializer that is use
232+
* for the shuffle, and whether to perform map-side aggregation (if a mapper can produce multiple
233+
* items with the same key).
232234
*/
233235
def combineByKey[C](createCombiner: JFunction[V, C],
234-
mergeValue: JFunction2[C, V, C],
235-
mergeCombiners: JFunction2[C, C, C],
236-
partitioner: Partitioner): JavaPairRDD[K, C] = {
237-
implicit val ctag: ClassTag[C] = fakeClassTag
236+
mergeValue: JFunction2[C, V, C],
237+
mergeCombiners: JFunction2[C, C, C],
238+
partitioner: Partitioner,
239+
mapSideCombine: Boolean,
240+
serializer: Serializer): JavaPairRDD[K, C] = {
241+
implicit val ctag: ClassTag[C] = fakeClassTag
238242
fromRDD(rdd.combineByKey(
239243
createCombiner,
240244
mergeValue,
241245
mergeCombiners,
242-
partitioner
246+
partitioner,
247+
mapSideCombine,
248+
serializer
243249
))
244250
}
245251

246252
/**
247-
* Simplified version of combineByKey that hash-partitions the output RDD.
253+
* Generic function to combine the elements for each key using a custom set of aggregation
254+
* functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
255+
* "combined type" C * Note that V and C can be different -- for example, one might group an
256+
* RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
257+
* functions:
258+
*
259+
* - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
260+
* - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
261+
* - `mergeCombiners`, to combine two C's into a single one.
262+
*
263+
* In addition, users can control the partitioning of the output RDD. This method automatically
264+
* uses map-side aggregation in shuffling the RDD.
265+
*/
266+
def combineByKey[C](createCombiner: JFunction[V, C],
267+
mergeValue: JFunction2[C, V, C],
268+
mergeCombiners: JFunction2[C, C, C],
269+
partitioner: Partitioner): JavaPairRDD[K, C] = {
270+
combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner, true, null)
271+
}
272+
273+
/**
274+
* Simplified version of combineByKey that hash-partitions the output RDD and uses map-side
275+
* aggregation.
248276
*/
249277
def combineByKey[C](createCombiner: JFunction[V, C],
250278
mergeValue: JFunction2[C, V, C],
@@ -488,7 +516,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
488516

489517
/**
490518
* Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
491-
* partitioner/parallelism level.
519+
* partitioner/parallelism level and using map-side aggregation.
492520
*/
493521
def combineByKey[C](createCombiner: JFunction[V, C],
494522
mergeValue: JFunction2[C, V, C],

core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,23 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
101101

102102
/**
103103
* Return a sampled subset of this RDD.
104+
*
105+
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
106+
* @param fraction expected size of the sample as a fraction of this RDD's size
107+
* without replacement: probability that each element is chosen; fraction must be [0, 1]
108+
* with replacement: expected number of times each element is chosen; fraction must be >= 0
104109
*/
105110
def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
106111
sample(withReplacement, fraction, Utils.random.nextLong)
107112

108113
/**
109114
* Return a sampled subset of this RDD.
115+
*
116+
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
117+
* @param fraction expected size of the sample as a fraction of this RDD's size
118+
* without replacement: probability that each element is chosen; fraction must be [0, 1]
119+
* with replacement: expected number of times each element is chosen; fraction must be >= 0
120+
* @param seed seed for the random number generator
110121
*/
111122
def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
112123
wrapRDD(rdd.sample(withReplacement, fraction, seed))

core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import org.json4s.jackson.JsonMethods
3333

3434
import org.apache.spark.{Logging, SparkConf, SparkContext}
3535
import org.apache.spark.deploy.master.{RecoveryState, SparkCuratorUtil}
36+
import org.apache.spark.util.Utils
3637

3738
/**
3839
* This suite tests the fault tolerance of the Spark standalone scheduler, mainly the Master.
@@ -405,8 +406,7 @@ private object SparkDocker {
405406

406407
private def startNode(dockerCmd: ProcessBuilder) : (String, DockerId, File) = {
407408
val ipPromise = promise[String]()
408-
val outFile = File.createTempFile("fault-tolerance-test", "")
409-
outFile.deleteOnExit()
409+
val outFile = File.createTempFile("fault-tolerance-test", "", Utils.createTempDir())
410410
val outStream: FileWriter = new FileWriter(outFile)
411411
def findIpAndLog(line: String): Unit = {
412412
if (line.startsWith("CONTAINER_IP=")) {

0 commit comments

Comments
 (0)