apache · xuanyuanking · Mar 29, 2018 · Apr 2, 2018 · Apr 16, 2018 · Apr 18, 2018
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -82,11 +82,13 @@ private[spark] class TaskSetManager(
   val successful = new Array[Boolean](numTasks)
   private val numFailures = new Array[Int](numTasks)
 
-  // Set the coresponding index of Boolean var when the task killed by other attempt tasks,
+  // Set the corresponding index of Boolean var when the task killed by other attempt tasks,
   // this happened while we set the `spark.speculation` to true. The task killed by others
   // should not resubmit while executor lost.
   private val killedByOtherAttempt: Array[Boolean] = new Array[Boolean](numTasks)
 
+  private val fetchFailedTaskIndexSet = new HashSet[Int]
+
   val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
   private[scheduler] var tasksSuccessful = 0
 
@@ -750,6 +752,10 @@ private[spark] class TaskSetManager(
       if (tasksSuccessful == numTasks) {
         isZombie = true
       }
+    } else if (fetchFailedTaskIndexSet.contains(index)) {
+      logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
+        " because task " + index + " has already failed by FetchFailed")
+      return
     } else {
       logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
         " because task " + index + " has already completed successfully")
@@ -793,6 +799,7 @@ private[spark] class TaskSetManager(
           blacklistTracker.foreach(_.updateBlacklistForFetchFailure(
             fetchFailed.bmAddress.host, fetchFailed.bmAddress.executorId))
         }
+        fetchFailedTaskIndexSet.add(index)
 
         None
 

diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -745,7 +745,6 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     assert(resubmittedTasks === 0)
   }
 
-
   test("[SPARK-22074] Task killed by other attempt task should not be resubmitted") {
     val conf = new SparkConf().set("spark.speculation", "true")
     sc = new SparkContext("local", "test", conf)