apache
diff --git a/‎common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java‎
Lines changed: 10 additions & 0 deletions b/‎common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java‎
Lines changed: 9 additions & 0 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java‎
Lines changed: 230 additions & 71 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java‎
Lines changed: 230 additions & 71 deletions
diff --git a/‎common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java‎
Lines changed: 380 additions & 0 deletions b/‎common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java‎
Lines changed: 380 additions & 0 deletions
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/stagepage.js‎
Lines changed: 2 additions & 1 deletion b/‎core/src/main/resources/org/apache/spark/ui/static/stagepage.js‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ContextAwareIterator.scala‎
Lines changed: 40 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/ContextAwareIterator.scala‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 28 additions & 17 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 28 additions & 17 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 16 additions & 13 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 16 additions & 13 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala‎
Lines changed: 8 additions & 15 deletions b/‎core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala‎
Lines changed: 8 additions & 15 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/status/AppStatusStore.scala‎
Lines changed: 2 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/status/AppStatusStore.scala‎
Lines changed: 2 additions & 1 deletion
@@ -398,4 +398,14 @@ public long mergedIndexCacheSize() {
     return JavaUtils.byteStringAsBytes(
       conf.get("spark.shuffle.server.mergedIndexCacheSize", "100m"));
   }
+
+  /**
+   * The threshold for number of IOExceptions while merging shuffle blocks to a shuffle partition.
+   * When the number of IOExceptions while writing to merged shuffle data/index/meta file exceed
+   * this threshold then the shuffle server will respond back to client to stop pushing shuffle
+   * blocks for this shuffle partition.
+   */
+  public int ioExceptionsThresholdDuringMerge() {
+    return conf.getInt("spark.shuffle.server.ioExceptionsThresholdDuringMerge", 4);
+  }
 }
@@ -71,6 +71,15 @@ class BlockPushErrorHandler implements ErrorHandler {
     public static final String BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX =
       "Couldn't find an opportunity to write block";
 
+    /**
+     * String constant used for generating exception messages indicating the server encountered
+     * IOExceptions multiple times, greater than the configured threshold, while trying to merged
+     * shuffle blocks of the same shuffle partition. When the client receives this this response,
+     * it will stop pushing any more blocks for the same shuffle partition.
+     */
+    public static final String IOEXCEPTIONS_EXCEEDED_THRESHOLD_PREFIX =
+      "IOExceptions exceeded the threshold";
+
     @Override
     public boolean shouldRetryError(Throwable t) {
       // If it is a connection time out or a connection closed exception, no need to retry.
 
@@ -946,7 +946,8 @@ $(document).ready(function () {
                         },
                         {
                             data : function (row, type) {
-                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics && row.taskMetrics.shuffleReadMetrics.localBytesRead > 0) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics &&
+                                    (row.taskMetrics.shuffleReadMetrics.localBytesRead > 0 || row.taskMetrics.shuffleReadMetrics.remoteBytesRead > 0)) {
                                     var totalBytesRead = parseInt(row.taskMetrics.shuffleReadMetrics.localBytesRead) + parseInt(row.taskMetrics.shuffleReadMetrics.remoteBytesRead);
                                     if (type === 'display') {
                                         return formatBytes(totalBytesRead, type) + " / " + row.taskMetrics.shuffleReadMetrics.recordsRead;
 
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ * A TaskContext aware iterator.
+ *
+ * As the Python evaluation consumes the parent iterator in a separate thread,
+ * it could consume more data from the parent even after the task ends and the parent is closed.
+ * If an off-heap access exists in the parent iterator, it could cause segmentation fault
+ * which crashes the executor.
+ * Thus, we should use [[ContextAwareIterator]] to stop consuming after the task ends.
+ */
+@DeveloperApi
+class ContextAwareIterator[+T](val context: TaskContext, val delegate: Iterator[T])
+  extends Iterator[T] {
+
+  override def hasNext: Boolean =
+    !context.isCompleted() && !context.isInterrupted() && delegate.hasNext
+
+  override def next(): T = delegate.next()
+}
@@ -1929,7 +1929,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   private def addJar(path: String, addedOnSubmit: Boolean): Unit = {
-    def addLocalJarFile(file: File): String = {
+    def addLocalJarFile(file: File): Seq[String] = {
       try {
         if (!file.exists()) {
           throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found")
@@ -1938,15 +1938,15 @@ class SparkContext(config: SparkConf) extends Logging {
           throw new IllegalArgumentException(
             s"Directory ${file.getAbsoluteFile} is not allowed for addJar")
         }
-        env.rpcEnv.fileServer.addJar(file)
+        Seq(env.rpcEnv.fileServer.addJar(file))
       } catch {
         case NonFatal(e) =>
           logError(s"Failed to add $path to Spark environment", e)
-          null
+          Nil
       }
     }
 
-    def checkRemoteJarFile(path: String): String = {
+    def checkRemoteJarFile(path: String): Seq[String] = {
       val hadoopPath = new Path(path)
       val scheme = hadoopPath.toUri.getScheme
       if (!Array("http", "https", "ftp").contains(scheme)) {
@@ -1959,47 +1959,58 @@ class SparkContext(config: SparkConf) extends Logging {
             throw new IllegalArgumentException(
               s"Directory ${path} is not allowed for addJar")
           }
-          path
+          Seq(path)
         } catch {
           case NonFatal(e) =>
             logError(s"Failed to add $path to Spark environment", e)
-            null
+            Nil
         }
       } else {
-        path
+        Seq(path)
       }
     }
 
     if (path == null || path.isEmpty) {
       logWarning("null or empty path specified as parameter to addJar")
     } else {
-      val key = if (path.contains("\\") && Utils.isWindows) {
+      val (keys, scheme) = if (path.contains("\\") && Utils.isWindows) {
         // For local paths with backslashes on Windows, URI throws an exception
-        addLocalJarFile(new File(path))
+        (addLocalJarFile(new File(path)), "local")
       } else {
         val uri = new Path(path).toUri
         // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies
         Utils.validateURL(uri)
-        uri.getScheme match {
+        val uriScheme = uri.getScheme
+        val jarPaths = uriScheme match {
           // A JAR file which exists only on the driver node
           case null =>
             // SPARK-22585 path without schema is not url encoded
             addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists only on the driver node
           case "file" => addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
-          case "local" => "file:" + uri.getPath
+          case "local" => Seq("file:" + uri.getPath)
+          case "ivy" =>
+            // Since `new Path(path).toUri` will lose query information,
+            // so here we use `URI.create(path)`
+            DependencyUtils.resolveMavenDependencies(URI.create(path))
+              .flatMap(jar => addLocalJarFile(new File(jar)))
           case _ => checkRemoteJarFile(path)
         }
+        (jarPaths, uriScheme)
       }
-      if (key != null) {
+      if (keys.nonEmpty) {
         val timestamp = if (addedOnSubmit) startTime else System.currentTimeMillis
-        if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
-          logInfo(s"Added JAR $path at $key with timestamp $timestamp")
+        val (added, existed) = keys.partition(addedJars.putIfAbsent(_, timestamp).isEmpty)
+        if (added.nonEmpty) {
+          val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
+          logInfo(s"Added $jarMessage $path at ${added.mkString(",")} with timestamp $timestamp")
           postEnvironmentUpdate()
-        } else {
-          logWarning(s"The jar $path has been added already. Overwriting of added jars " +
-            "is not supported in the current version.")
+        }
+        if (existed.nonEmpty) {
+          val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
+          logInfo(s"The $jarMessage $path at ${existed.mkString(",")} has been added already." +
+            " Overwriting of added jar is not supported in the current version.")
         }
       }
     }
 
@@ -304,28 +304,29 @@ private[spark] class SparkSubmit extends Logging {
       // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
       // too for packages that include Python code
       val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(
-        args.packagesExclusions, args.packages, args.repositories, args.ivyRepoPath,
-        args.ivySettingsPath)
+        packagesTransitive = true, args.packagesExclusions, args.packages,
+        args.repositories, args.ivyRepoPath, args.ivySettingsPath)
 
-      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
+      if (resolvedMavenCoordinates.nonEmpty) {
         // In K8s client mode, when in the driver, add resolved jars early as we might need
         // them at the submit time for artifact downloading.
         // For example we might use the dependencies for downloading
         // files from a Hadoop Compatible fs e.g. S3. In this case the user might pass:
         // --packages com.amazonaws:aws-java-sdk:1.7.4:org.apache.hadoop:hadoop-aws:2.7.6
         if (isKubernetesClusterModeDriver) {
           val loader = getSubmitClassLoader(sparkConf)
-          for (jar <- resolvedMavenCoordinates.split(",")) {
+          for (jar <- resolvedMavenCoordinates) {
             addJarToClasspath(jar, loader)
           }
         } else if (isKubernetesCluster) {
           // We need this in K8s cluster mode so that we can upload local deps
           // via the k8s application, like in cluster mode driver
-          childClasspath ++= resolvedMavenCoordinates.split(",")
+          childClasspath ++= resolvedMavenCoordinates
         } else {
-          args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
+          args.jars = mergeFileLists(args.jars, mergeFileLists(resolvedMavenCoordinates: _*))
           if (args.isPython || isInternal(args.primaryResource)) {
-            args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
+            args.pyFiles = mergeFileLists(args.pyFiles,
+              mergeFileLists(resolvedMavenCoordinates: _*))
           }
         }
       }
@@ -1201,7 +1202,7 @@ private[spark] object SparkSubmitUtils {
    */
   def resolveDependencyPaths(
       artifacts: Array[AnyRef],
-      cacheDirectory: File): String = {
+      cacheDirectory: File): Seq[String] = {
     artifacts.map { artifactInfo =>
       val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId
       val extraAttrs = artifactInfo.asInstanceOf[Artifact].getExtraAttributes
@@ -1212,7 +1213,7 @@ private[spark] object SparkSubmitUtils {
       }
       cacheDirectory.getAbsolutePath + File.separator +
         s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}$classifier.jar"
-    }.mkString(",")
+    }
   }
 
   /** Adds the given maven coordinates to Ivy's module descriptor. */
@@ -1360,17 +1361,19 @@ private[spark] object SparkSubmitUtils {
    * Resolves any dependencies that were supplied through maven coordinates
    * @param coordinates Comma-delimited string of maven coordinates
    * @param ivySettings An IvySettings containing resolvers to use
+   * @param transitive Whether resolving transitive dependencies, default is true
    * @param exclusions Exclusions to apply when resolving transitive dependencies
-   * @return The comma-delimited path to the jars of the given maven artifacts including their
+   * @return Seq of path to the jars of the given maven artifacts including their
    *         transitive dependencies
    */
   def resolveMavenCoordinates(
       coordinates: String,
       ivySettings: IvySettings,
+      transitive: Boolean,
       exclusions: Seq[String] = Nil,
-      isTest: Boolean = false): String = {
+      isTest: Boolean = false): Seq[String] = {
     if (coordinates == null || coordinates.trim.isEmpty) {
-      ""
+      Nil
     } else {
       val sysOut = System.out
       // Default configuration name for ivy
@@ -1396,7 +1399,7 @@ private[spark] object SparkSubmitUtils {
         val ivy = Ivy.newInstance(ivySettings)
         // Set resolve options to download transitive dependencies as well
         val resolveOptions = new ResolveOptions
-        resolveOptions.setTransitive(true)
+        resolveOptions.setTransitive(transitive)
         val retrieveOptions = new RetrieveOptions
         // Turn downloading and logging off for testing
         if (isTest) {
 
@@ -19,10 +19,8 @@ package org.apache.spark.deploy.worker
 
 import java.io.File
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util._
@@ -79,21 +77,16 @@ object DriverWrapper extends Logging {
     val secMgr = new SecurityManager(sparkConf)
     val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
 
-    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) =
-      Seq(
-        "spark.jars.excludes",
-        "spark.jars.packages",
-        "spark.jars.repositories",
-        "spark.jars.ivy",
-        "spark.jars.ivySettings"
-      ).map(sys.props.get(_).orNull)
+    val ivyProperties = DependencyUtils.getIvyProperties()
 
-    val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(packagesExclusions,
-      packages, repositories, ivyRepoPath, Option(ivySettingsPath))
+    val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(true,
+      ivyProperties.packagesExclusions, ivyProperties.packages, ivyProperties.repositories,
+      ivyProperties.ivyRepoPath, Option(ivyProperties.ivySettingsPath))
     val jars = {
       val jarsProp = sys.props.get(config.JARS.key).orNull
-      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
-        DependencyUtils.mergeFileLists(jarsProp, resolvedMavenCoordinates)
+      if (resolvedMavenCoordinates.nonEmpty) {
+        DependencyUtils.mergeFileLists(jarsProp,
+          DependencyUtils.mergeFileLists(resolvedMavenCoordinates: _*))
       } else {
         jarsProp
       }
 
@@ -24,6 +24,7 @@ import scala.collection.mutable.HashMap
 
 import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.status.api.v1
+import org.apache.spark.storage.FallbackStorage.FALLBACK_BLOCK_MANAGER_ID
 import org.apache.spark.ui.scope._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
@@ -88,7 +89,7 @@ private[spark] class AppStatusStore(
     } else {
       base
     }
-    filtered.asScala.map(_.info).toSeq
+    filtered.asScala.map(_.info).filter(_.id != FALLBACK_BLOCK_MANAGER_ID.executorId).toSeq
   }
 
   def executorSummary(executorId: String): v1.ExecutorSummary = {
Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ import scala.collection.mutable.HashMap`
`24`	`24`
`25`	`25`	`import org.apache.spark.{JobExecutionStatus, SparkConf}`
`26`	`26`	`import org.apache.spark.status.api.v1`
	`27`	`+import org.apache.spark.storage.FallbackStorage.FALLBACK_BLOCK_MANAGER_ID`
`27`	`28`	`import org.apache.spark.ui.scope._`
`28`	`29`	`import org.apache.spark.util.Utils`
`29`	`30`	`import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}`
`@@ -88,7 +89,7 @@ private[spark] class AppStatusStore(`
`88`	`89`	`} else {`
`89`	`90`	`base`
`90`	`91`	`}`
`91`		`- filtered.asScala.map(_.info).toSeq`
	`92`	`+ filtered.asScala.map(_.info).filter(_.id != FALLBACK_BLOCK_MANAGER_ID.executorId).toSeq`
`92`	`93`	`}`
`93`	`94`
`94`	`95`	`def executorSummary(executorId: String): v1.ExecutorSummary = {`