apache
diff --git a/‎core/src/main/java/org/apache/spark/memory/MemoryConsumer.java‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/java/org/apache/spark/memory/MemoryConsumer.java‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala‎
Lines changed: 5 additions & 5 deletions b/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 11 additions & 5 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 1 addition & 6 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala‎
Lines changed: 0 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala‎
Lines changed: 4 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 11 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/collection/Spillable.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/util/collection/Spillable.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/test/scala/org/apache/spark/SparkContextSuite.scala‎
Lines changed: 40 additions & 0 deletions b/‎core/src/test/scala/org/apache/spark/SparkContextSuite.scala‎
Lines changed: 40 additions & 0 deletions
@@ -40,8 +40,8 @@ protected MemoryConsumer(TaskMemoryManager taskMemoryManager, long pageSize, Mem
     this.mode = mode;
   }
 
-  protected MemoryConsumer(TaskMemoryManager taskMemoryManager) {
-    this(taskMemoryManager, taskMemoryManager.pageSizeBytes(), MemoryMode.ON_HEAP);
+  protected MemoryConsumer(TaskMemoryManager taskMemoryManager, MemoryMode mode) {
+    this(taskMemoryManager, taskMemoryManager.pageSizeBytes(), mode);
   }
 
   /**
 
@@ -172,18 +172,18 @@ private[spark] class ContextCleaner(
     registerForCleanup(rdd, CleanCheckpoint(parentId))
   }
 
-  /** Register an object for cleanup. */
-  private def registerForCleanup(objectForCleanup: AnyRef, task: CleanupTask): Unit = {
-    referenceBuffer.add(new CleanupTaskWeakReference(task, objectForCleanup, referenceQueue))
-  }
-
   /** Register a SparkListener to be cleaned up when its owner is garbage collected. */
   def registerSparkListenerForCleanup(
       listenerOwner: AnyRef,
       listener: SparkListener): Unit = {
     registerForCleanup(listenerOwner, CleanSparkListener(listener))
   }
 
+  /** Register an object for cleanup. */
+  private def registerForCleanup(objectForCleanup: AnyRef, task: CleanupTask): Unit = {
+    referenceBuffer.add(new CleanupTaskWeakReference(task, objectForCleanup, referenceQueue))
+  }
+
   /** Keep cleaning RDD, shuffle, and broadcast state. */
   private def keepCleaning(): Unit = Utils.tryOrStopSparkContext(sc) {
     while (!stopped) {
 
@@ -1584,7 +1584,11 @@ class SparkContext(config: SparkConf) extends Logging {
       path: String, recursive: Boolean, addedOnSubmit: Boolean, isArchive: Boolean = false
     ): Unit = {
     val uri = if (!isArchive) {
-      new Path(path).toUri
+      if (Utils.isAbsoluteURI(path) && path.contains("%")) {
+        new URI(path)
+      } else {
+        new Path(path).toUri
+      }
     } else {
       Utils.resolveURI(path)
     }
@@ -1619,10 +1623,8 @@ class SparkContext(config: SparkConf) extends Logging {
       env.rpcEnv.fileServer.addFile(new File(uri.getPath))
     } else if (uri.getScheme == null) {
       schemeCorrectedURI.toString
-    } else if (isArchive) {
-      uri.toString
     } else {
-      path
+      uri.toString
     }
 
     val timestamp = if (addedOnSubmit) startTime else System.currentTimeMillis
@@ -1977,7 +1979,11 @@ class SparkContext(config: SparkConf) extends Logging {
         // For local paths with backslashes on Windows, URI throws an exception
         (addLocalJarFile(new File(path)), "local")
       } else {
-        val uri = new Path(path).toUri
+        val uri = if (Utils.isAbsoluteURI(path) && path.contains("%")) {
+          new URI(path)
+        } else {
+          new Path(path).toUri
+        }
         // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies
         Utils.validateURL(uri)
         val uriScheme = uri.getScheme
 
@@ -44,7 +44,7 @@ import org.apache.ivy.core.report.ResolveReport
 import org.apache.ivy.core.resolve.ResolveOptions
 import org.apache.ivy.core.retrieve.RetrieveOptions
 import org.apache.ivy.core.settings.IvySettings
-import org.apache.ivy.plugins.matcher.{GlobPatternMatcher, PatternMatcher}
+import org.apache.ivy.plugins.matcher.GlobPatternMatcher
 import org.apache.ivy.plugins.repository.file.FileRepository
 import org.apache.ivy.plugins.resolver.{ChainResolver, FileSystemResolver, IBiblioResolver}
 
@@ -366,7 +366,6 @@ private[spark] class SparkSubmit extends Logging {
     args.pyFiles = Option(args.pyFiles).map(resolveGlobPaths(_, hadoopConf)).orNull
     args.archives = Option(args.archives).map(resolveGlobPaths(_, hadoopConf)).orNull
 
-    lazy val secMgr = new SecurityManager(sparkConf)
 
     // In client mode, download remote files.
     var localPrimaryResource: String = null
@@ -1153,8 +1152,6 @@ private[spark] object SparkSubmitUtils extends Logging {
     // We need a chain resolver if we want to check multiple repositories
     val cr = new ChainResolver
     cr.setName("spark-list")
-    cr.setChangingMatcher(PatternMatcher.REGEXP)
-    cr.setChangingPattern(".*-SNAPSHOT")
 
     val localM2 = new IBiblioResolver
     localM2.setM2compatible(true)
@@ -1314,8 +1311,6 @@ private[spark] object SparkSubmitUtils extends Logging {
     remoteRepos.filterNot(_.trim.isEmpty).map(_.split(",")).foreach { repositoryList =>
       val cr = new ChainResolver
       cr.setName("user-list")
-      cr.setChangingMatcher(PatternMatcher.REGEXP)
-      cr.setChangingPattern(".*-SNAPSHOT")
 
       // add current default resolver, if any
       Option(ivySettings.getDefaultResolver).foreach(cr.add)
 
@@ -74,7 +74,6 @@ object DriverWrapper extends Logging {
 
   private def setupDependencies(loader: MutableURLClassLoader, userJar: String): Unit = {
     val sparkConf = new SparkConf()
-    val secMgr = new SecurityManager(sparkConf)
     val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
 
     val ivyProperties = DependencyUtils.getIvyProperties()
 
@@ -1037,7 +1037,7 @@ package object config {
       .doc("When true, HadoopRDD/NewHadoopRDD will not create partitions for empty input splits.")
       .version("2.3.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val SECRET_REDACTION_PATTERN =
     ConfigBuilder("spark.redaction.regex")
@@ -1047,7 +1047,7 @@ package object config {
         "like YARN and event logs.")
       .version("2.1.2")
       .regexConf
-      .createWithDefault("(?i)secret|password|token".r)
+      .createWithDefault("(?i)secret|password|token|access[.]key".r)
 
   private[spark] val STRING_REDACTION_PATTERN =
     ConfigBuilder("spark.redaction.string.regex")
 
@@ -51,15 +51,17 @@ private[spark] class BlockStoreShuffleReader[K, C](
       true
     }
     val useOldFetchProtocol = conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)
+    // SPARK-34790: Fetching continuous blocks in batch is incompatible with io encryption.
+    val ioEncryption = conf.get(config.IO_ENCRYPTION_ENABLED)
 
     val doBatchFetch = shouldBatchFetch && serializerRelocatable &&
-      (!compressed || codecConcatenation) && !useOldFetchProtocol
+      (!compressed || codecConcatenation) && !useOldFetchProtocol && !ioEncryption
     if (shouldBatchFetch && !doBatchFetch) {
       logDebug("The feature tag of continuous shuffle block fetching is set to true, but " +
         "we can not enable the feature because other conditions are not satisfied. " +
         s"Shuffle compress: $compressed, serializer relocatable: $serializerRelocatable, " +
         s"codec concatenation: $codecConcatenation, use old shuffle fetch protocol: " +
-        s"$useOldFetchProtocol.")
+        s"$useOldFetchProtocol, io encryption: $ioEncryption.")
     }
     doBatchFetch
   }
 
@@ -2063,6 +2063,17 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /** Check whether a path is an absolute URI. */
+  def isAbsoluteURI(path: String): Boolean = {
+    try {
+      val uri = new URI(path: String)
+      uri.isAbsolute
+    } catch {
+      case _: URISyntaxException =>
+        false
+    }
+  }
+
   /** Return all non-local paths from a comma-separated list of paths. */
   def nonLocalPaths(paths: String, testWindows: Boolean = false): Array[String] = {
     val windows = isWindows || testWindows
 
@@ -27,7 +27,7 @@ import org.apache.spark.memory.{MemoryConsumer, MemoryMode, TaskMemoryManager}
  * has been exceeded.
  */
 private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
-  extends MemoryConsumer(taskMemoryManager) with Logging {
+  extends MemoryConsumer(taskMemoryManager, MemoryMode.ON_HEAP) with Logging {
   /**
    * Spills the current in-memory collection to disk, and releases the memory.
    *
 
@@ -1197,6 +1197,46 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     assert(sc.hadoopConfiguration.get(bufferKey).toInt === 65536,
       "spark configs have higher priority than spark.hadoop configs")
   }
+
+  test("SPARK-34225: addFile/addJar shouldn't further encode URI if a URI form string is passed") {
+    withTempDir { dir =>
+      val jar1 = File.createTempFile("testprefix", "test jar.jar", dir)
+      val jarUrl1 = jar1.toURI.toString
+      val file1 = File.createTempFile("testprefix", "test file.txt", dir)
+      val fileUrl1 = file1.toURI.toString
+      val jar2 = File.createTempFile("testprefix", "test %20jar.jar", dir)
+      val file2 = File.createTempFile("testprefix", "test %20file.txt", dir)
+
+      try {
+        sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+        sc.addJar(jarUrl1)
+        sc.addFile(fileUrl1)
+        sc.addJar(jar2.toString)
+        sc.addFile(file2.toString)
+        sc.parallelize(Array(1), 1).map { x =>
+          val gottenJar1 = new File(SparkFiles.get(jar1.getName))
+          if (!gottenJar1.exists()) {
+            throw new SparkException("file doesn't exist : " + jar1)
+          }
+          val gottenFile1 = new File(SparkFiles.get(file1.getName))
+          if (!gottenFile1.exists()) {
+            throw new SparkException("file doesn't exist : " + file1)
+          }
+          val gottenJar2 = new File(SparkFiles.get(jar2.getName))
+          if (!gottenJar2.exists()) {
+            throw new SparkException("file doesn't exist : " + jar2)
+          }
+          val gottenFile2 = new File(SparkFiles.get(file2.getName))
+          if (!gottenFile2.exists()) {
+            throw new SparkException("file doesn't exist : " + file2)
+          }
+          x
+        }.collect()
+      } finally {
+        sc.stop()
+      }
+    }
+  }
 }
 
 object SparkContextSuite {
Original file line number	Diff line number	Diff line change
`@@ -40,8 +40,8 @@ protected MemoryConsumer(TaskMemoryManager taskMemoryManager, long pageSize, Mem`
`40`	`40`	`this.mode = mode;`
`41`	`41`	`}`
`42`	`42`
`43`		`- protected MemoryConsumer(TaskMemoryManager taskMemoryManager) {`
`44`		`- this(taskMemoryManager, taskMemoryManager.pageSizeBytes(), MemoryMode.ON_HEAP);`
	`43`	`+ protected MemoryConsumer(TaskMemoryManager taskMemoryManager, MemoryMode mode) {`
	`44`	`+ this(taskMemoryManager, taskMemoryManager.pageSizeBytes(), mode);`
`45`	`45`	`}`
`46`	`46`
`47`	`47`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -51,15 +51,17 @@ private[spark] class BlockStoreShuffleReader[K, C](`
`51`	`51`	`true`
`52`	`52`	`}`
`53`	`53`	`val useOldFetchProtocol = conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)`
	`54`	`+ // SPARK-34790: Fetching continuous blocks in batch is incompatible with io encryption.`
	`55`	`+ val ioEncryption = conf.get(config.IO_ENCRYPTION_ENABLED)`
`54`	`56`
`55`	`57`	`val doBatchFetch = shouldBatchFetch && serializerRelocatable &&`
`56`		`- (!compressed \|\| codecConcatenation) && !useOldFetchProtocol`
	`58`	`+ (!compressed \|\| codecConcatenation) && !useOldFetchProtocol && !ioEncryption`
`57`	`59`	`if (shouldBatchFetch && !doBatchFetch) {`
`58`	`60`	`logDebug("The feature tag of continuous shuffle block fetching is set to true, but " +`
`59`	`61`	`"we can not enable the feature because other conditions are not satisfied. " +`
`60`	`62`	`s"Shuffle compress: $compressed, serializer relocatable: $serializerRelocatable, " +`
`61`	`63`	`s"codec concatenation: $codecConcatenation, use old shuffle fetch protocol: " +`
`62`		`- s"$useOldFetchProtocol.")`
	`64`	`+ s"$useOldFetchProtocol, io encryption: $ioEncryption.")`
`63`	`65`	`}`
`64`	`66`	`doBatchFetch`
`65`	`67`	`}`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ import org.apache.spark.memory.{MemoryConsumer, MemoryMode, TaskMemoryManager}`
`27`	`27`	`* has been exceeded.`
`28`	`28`	`*/`
`29`	`29`	`private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)`
`30`		`- extends MemoryConsumer(taskMemoryManager) with Logging {`
	`30`	`+ extends MemoryConsumer(taskMemoryManager, MemoryMode.ON_HEAP) with Logging {`
`31`	`31`	`/**`
`32`	`32`	`* Spills the current in-memory collection to disk, and releases the memory.`
`33`	`33`	`*`