apache
diff --git a/‎assembly/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎assembly/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bagel/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎bagel/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/pom.xml‎
Lines changed: 2 additions & 2 deletions b/‎core/pom.xml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/TaskEndReason.scala‎
Lines changed: 1 addition & 5 deletions b/‎core/src/main/scala/org/apache/spark/TaskEndReason.scala‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/TaskState.scala‎
Lines changed: 3 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/TaskState.scala‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala‎
Lines changed: 37 additions & 9 deletions b/‎core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala‎
Lines changed: 37 additions & 9 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala‎
Lines changed: 11 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala‎
Lines changed: 2 additions & 2 deletions
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -290,7 +290,7 @@
     <dependency>
       <groupId>org.tachyonproject</groupId>
       <artifactId>tachyon-client</artifactId>
-      <version>0.5.0</version>
+      <version>0.6.1</version>
       <exclusions>
         <exclusion>
           <groupId>org.apache.hadoop</groupId>
 
@@ -145,7 +145,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Keep cleaning RDD, shuffle, and broadcast state. */
-  private def keepCleaning(): Unit = Utils.logUncaughtExceptions {
+  private def keepCleaning(): Unit = Utils.tryOrStopSparkContext(sc) {
     while (!stopped) {
       try {
         val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))
 
@@ -1736,7 +1736,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
         }
     }
 
-    listenerBus.start()
+    listenerBus.start(this)
   }
 
   /** Post the application start event */
 
@@ -151,11 +151,7 @@ case object TaskKilled extends TaskFailedReason {
  * Task requested the driver to commit, but was denied.
  */
 @DeveloperApi
-case class TaskCommitDenied(
-    jobID: Int,
-    partitionID: Int,
-    attemptID: Int)
-  extends TaskFailedReason {
+case class TaskCommitDenied(jobID: Int, partitionID: Int, attemptID: Int) extends TaskFailedReason {
   override def toErrorString: String = s"TaskCommitDenied (Driver denied task commit)" +
     s" for job: $jobID, partition: $partitionID, attempt: $attemptID"
 }
 
@@ -27,6 +27,8 @@ private[spark] object TaskState extends Enumeration {
 
   type TaskState = Value
 
+  def isFailed(state: TaskState) = (LOST == state) || (FAILED == state)
+
   def isFinished(state: TaskState) = FINISHED_STATES.contains(state)
 
   def toMesos(state: TaskState): MesosTaskState = state match {
@@ -46,5 +48,6 @@ private[spark] object TaskState extends Enumeration {
     case MesosTaskState.TASK_FAILED => FAILED
     case MesosTaskState.TASK_KILLED => KILLED
     case MesosTaskState.TASK_LOST => LOST
+    case MesosTaskState.TASK_ERROR => LOST
   }
 }
@@ -39,6 +39,7 @@ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => J
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
 import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
+import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
@@ -227,24 +228,51 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
    * - `mergeCombiners`, to combine two C's into a single one.
    *
-   * In addition, users can control the partitioning of the output RDD, and whether to perform
-   * map-side aggregation (if a mapper can produce multiple items with the same key).
+   * In addition, users can control the partitioning of the output RDD, the serializer that is use
+   * for the shuffle, and whether to perform map-side aggregation (if a mapper can produce multiple
+   * items with the same key).
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
-    mergeValue: JFunction2[C, V, C],
-    mergeCombiners: JFunction2[C, C, C],
-    partitioner: Partitioner): JavaPairRDD[K, C] = {
-    implicit val ctag: ClassTag[C] = fakeClassTag
+      mergeValue: JFunction2[C, V, C],
+      mergeCombiners: JFunction2[C, C, C],
+      partitioner: Partitioner,
+      mapSideCombine: Boolean,
+      serializer: Serializer): JavaPairRDD[K, C] = {
+      implicit val ctag: ClassTag[C] = fakeClassTag
     fromRDD(rdd.combineByKey(
       createCombiner,
       mergeValue,
       mergeCombiners,
-      partitioner
+      partitioner,
+      mapSideCombine,
+      serializer
     ))
   }
 
   /**
-   * Simplified version of combineByKey that hash-partitions the output RDD.
+   * Generic function to combine the elements for each key using a custom set of aggregation
+   * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
+   * "combined type" C * Note that V and C can be different -- for example, one might group an
+   * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
+   * functions:
+   *
+   * - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
+   * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
+   * - `mergeCombiners`, to combine two C's into a single one.
+   *
+   * In addition, users can control the partitioning of the output RDD. This method automatically
+   * uses map-side aggregation in shuffling the RDD.
+   */
+  def combineByKey[C](createCombiner: JFunction[V, C],
+      mergeValue: JFunction2[C, V, C],
+      mergeCombiners: JFunction2[C, C, C],
+      partitioner: Partitioner): JavaPairRDD[K, C] = {
+    combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner, true, null)
+  }
+
+  /**
+   * Simplified version of combineByKey that hash-partitions the output RDD and uses map-side
+   * aggregation.
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
       mergeValue: JFunction2[C, V, C],
@@ -488,7 +516,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
 
   /**
    * Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
-   * partitioner/parallelism level.
+   * partitioner/parallelism level and using map-side aggregation.
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
     mergeValue: JFunction2[C, V, C],
 
@@ -101,12 +101,23 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
 
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's size
+   *  without replacement: probability that each element is chosen; fraction must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
     sample(withReplacement, fraction, Utils.random.nextLong)
 
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's size
+   *  without replacement: probability that each element is chosen; fraction must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   * @param seed seed for the random number generator
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
     wrapRDD(rdd.sample(withReplacement, fraction, seed))
 
@@ -33,6 +33,7 @@ import org.json4s.jackson.JsonMethods
 
 import org.apache.spark.{Logging, SparkConf, SparkContext}
 import org.apache.spark.deploy.master.{RecoveryState, SparkCuratorUtil}
+import org.apache.spark.util.Utils
 
 /**
  * This suite tests the fault tolerance of the Spark standalone scheduler, mainly the Master.
@@ -405,8 +406,7 @@ private object SparkDocker {
 
   private def startNode(dockerCmd: ProcessBuilder) : (String, DockerId, File) = {
     val ipPromise = promise[String]()
-    val outFile = File.createTempFile("fault-tolerance-test", "")
-    outFile.deleteOnExit()
+    val outFile = File.createTempFile("fault-tolerance-test", "", Utils.createTempDir())
     val outStream: FileWriter = new FileWriter(outFile)
     def findIpAndLog(line: String): Unit = {
       if (line.startsWith("CONTAINER_IP=")) {
Original file line number	Diff line number	Diff line change
`@@ -145,7 +145,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {`
`145`	`145`	`}`
`146`	`146`
`147`	`147`	`/** Keep cleaning RDD, shuffle, and broadcast state. */`
`148`		`- private def keepCleaning(): Unit = Utils.logUncaughtExceptions {`
	`148`	`+ private def keepCleaning(): Unit = Utils.tryOrStopSparkContext(sc) {`
`149`	`149`	`while (!stopped) {`
`150`	`150`	`try {`
`151`	`151`	`val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))`
Original file line number	Diff line number	Diff line change
`@@ -1736,7 +1736,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli`
`1736`	`1736`	`}`
`1737`	`1737`	`}`
`1738`	`1738`
`1739`		`- listenerBus.start()`
	`1739`	`+ listenerBus.start(this)`
`1740`	`1740`	`}`
`1741`	`1741`
`1742`	`1742`	`/** Post the application start event */`
Original file line number	Diff line number	Diff line change
`@@ -27,6 +27,8 @@ private[spark] object TaskState extends Enumeration {`
`27`	`27`
`28`	`28`	`type TaskState = Value`
`29`	`29`
	`30`	`+ def isFailed(state: TaskState) = (LOST == state) \|\| (FAILED == state)`
	`31`	`+`
`30`	`32`	`def isFinished(state: TaskState) = FINISHED_STATES.contains(state)`
`31`	`33`
`32`	`34`	`def toMesos(state: TaskState): MesosTaskState = state match {`
`@@ -46,5 +48,6 @@ private[spark] object TaskState extends Enumeration {`
`46`	`48`	`case MesosTaskState.TASK_FAILED => FAILED`
`47`	`49`	`case MesosTaskState.TASK_KILLED => KILLED`
`48`	`50`	`case MesosTaskState.TASK_LOST => LOST`
	`51`	`+ case MesosTaskState.TASK_ERROR => LOST`
`49`	`52`	`}`
`50`	`53`	`}`