Skip to content

Commit 3bc2eff

Browse files
BryanCutlerMarcelo Vanzin
authored andcommitted
[SPARK-17568][CORE][DEPLOY] Add spark-submit option to override ivy settings used to resolve packages/artifacts
## What changes were proposed in this pull request? Adding option in spark-submit to allow overriding the default IvySettings used to resolve artifacts as part of the Spark Packages functionality. This will allow all artifact resolution to go through a central managed repository, such as Nexus or Artifactory, where site admins can better approve and control what is used with Spark apps. This change restructures the creation of the IvySettings object in two distinct ways. First, if the `spark.ivy.settings` option is not defined then `buildIvySettings` will create a default settings instance, as before, with defined repositories (Maven Central) included. Second, if the option is defined, the ivy settings file will be loaded from the given path and only repositories defined within will be used for artifact resolution. ## How was this patch tested? Existing tests for default behaviour, Manual tests that load a ivysettings.xml file with local and Nexus repositories defined. Added new test to load a simple Ivy settings file with a local filesystem resolver. Author: Bryan Cutler <[email protected]> Author: Ian Hummel <[email protected]> Closes #15119 from BryanCutler/spark-custom-IvySettings.
1 parent d749c06 commit 3bc2eff

File tree

4 files changed

+206
-74
lines changed

4 files changed

+206
-74
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 103 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717

1818
package org.apache.spark.deploy
1919

20-
import java.io.{File, PrintStream}
20+
import java.io.{File, IOException, PrintStream}
2121
import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException}
2222
import java.net.URL
2323
import java.security.PrivilegedExceptionAction
24+
import java.text.ParseException
2425

2526
import scala.annotation.tailrec
2627
import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
@@ -283,8 +284,17 @@ object SparkSubmit extends CommandLineUtils {
283284
} else {
284285
Nil
285286
}
287+
288+
// Create the IvySettings, either load from file or build defaults
289+
val ivySettings = args.sparkProperties.get("spark.jars.ivySettings").map { ivySettingsFile =>
290+
SparkSubmitUtils.loadIvySettings(ivySettingsFile, Option(args.repositories),
291+
Option(args.ivyRepoPath))
292+
}.getOrElse {
293+
SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath))
294+
}
295+
286296
val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages,
287-
Option(args.repositories), Option(args.ivyRepoPath), exclusions = exclusions)
297+
ivySettings, exclusions = exclusions)
288298
if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
289299
args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
290300
if (args.isPython) {
@@ -860,30 +870,13 @@ private[spark] object SparkSubmitUtils {
860870

861871
/**
862872
* Extracts maven coordinates from a comma-delimited string
863-
* @param remoteRepos Comma-delimited string of remote repositories
864-
* @param ivySettings The Ivy settings for this session
873+
* @param defaultIvyUserDir The default user path for Ivy
865874
* @return A ChainResolver used by Ivy to search for and resolve dependencies.
866875
*/
867-
def createRepoResolvers(remoteRepos: Option[String], ivySettings: IvySettings): ChainResolver = {
876+
def createRepoResolvers(defaultIvyUserDir: File): ChainResolver = {
868877
// We need a chain resolver if we want to check multiple repositories
869878
val cr = new ChainResolver
870-
cr.setName("list")
871-
872-
val repositoryList = remoteRepos.getOrElse("")
873-
// add any other remote repositories other than maven central
874-
if (repositoryList.trim.nonEmpty) {
875-
repositoryList.split(",").zipWithIndex.foreach { case (repo, i) =>
876-
val brr: IBiblioResolver = new IBiblioResolver
877-
brr.setM2compatible(true)
878-
brr.setUsepoms(true)
879-
brr.setRoot(repo)
880-
brr.setName(s"repo-${i + 1}")
881-
cr.add(brr)
882-
// scalastyle:off println
883-
printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}")
884-
// scalastyle:on println
885-
}
886-
}
879+
cr.setName("spark-list")
887880

888881
val localM2 = new IBiblioResolver
889882
localM2.setM2compatible(true)
@@ -893,7 +886,7 @@ private[spark] object SparkSubmitUtils {
893886
cr.add(localM2)
894887

895888
val localIvy = new FileSystemResolver
896-
val localIvyRoot = new File(ivySettings.getDefaultIvyUserDir, "local")
889+
val localIvyRoot = new File(defaultIvyUserDir, "local")
897890
localIvy.setLocal(true)
898891
localIvy.setRepository(new FileRepository(localIvyRoot))
899892
val ivyPattern = Seq(localIvyRoot.getAbsolutePath, "[organisation]", "[module]", "[revision]",
@@ -974,23 +967,102 @@ private[spark] object SparkSubmitUtils {
974967
}
975968
}
976969

970+
/**
971+
* Build Ivy Settings using options with default resolvers
972+
* @param remoteRepos Comma-delimited string of remote repositories other than maven central
973+
* @param ivyPath The path to the local ivy repository
974+
* @return An IvySettings object
975+
*/
976+
def buildIvySettings(remoteRepos: Option[String], ivyPath: Option[String]): IvySettings = {
977+
val ivySettings: IvySettings = new IvySettings
978+
processIvyPathArg(ivySettings, ivyPath)
979+
980+
// create a pattern matcher
981+
ivySettings.addMatcher(new GlobPatternMatcher)
982+
// create the dependency resolvers
983+
val repoResolver = createRepoResolvers(ivySettings.getDefaultIvyUserDir)
984+
ivySettings.addResolver(repoResolver)
985+
ivySettings.setDefaultResolver(repoResolver.getName)
986+
processRemoteRepoArg(ivySettings, remoteRepos)
987+
ivySettings
988+
}
989+
990+
/**
991+
* Load Ivy settings from a given filename, using supplied resolvers
992+
* @param settingsFile Path to Ivy settings file
993+
* @param remoteRepos Comma-delimited string of remote repositories other than maven central
994+
* @param ivyPath The path to the local ivy repository
995+
* @return An IvySettings object
996+
*/
997+
def loadIvySettings(
998+
settingsFile: String,
999+
remoteRepos: Option[String],
1000+
ivyPath: Option[String]): IvySettings = {
1001+
val file = new File(settingsFile)
1002+
require(file.exists(), s"Ivy settings file $file does not exist")
1003+
require(file.isFile(), s"Ivy settings file $file is not a normal file")
1004+
val ivySettings: IvySettings = new IvySettings
1005+
try {
1006+
ivySettings.load(file)
1007+
} catch {
1008+
case e @ (_: IOException | _: ParseException) =>
1009+
throw new SparkException(s"Failed when loading Ivy settings from $settingsFile", e)
1010+
}
1011+
processIvyPathArg(ivySettings, ivyPath)
1012+
processRemoteRepoArg(ivySettings, remoteRepos)
1013+
ivySettings
1014+
}
1015+
1016+
/* Set ivy settings for location of cache, if option is supplied */
1017+
private def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = {
1018+
ivyPath.filterNot(_.trim.isEmpty).foreach { alternateIvyDir =>
1019+
ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir))
1020+
ivySettings.setDefaultCache(new File(alternateIvyDir, "cache"))
1021+
}
1022+
}
1023+
1024+
/* Add any optional additional remote repositories */
1025+
private def processRemoteRepoArg(ivySettings: IvySettings, remoteRepos: Option[String]): Unit = {
1026+
remoteRepos.filterNot(_.trim.isEmpty).map(_.split(",")).foreach { repositoryList =>
1027+
val cr = new ChainResolver
1028+
cr.setName("user-list")
1029+
1030+
// add current default resolver, if any
1031+
Option(ivySettings.getDefaultResolver).foreach(cr.add)
1032+
1033+
// add additional repositories, last resolution in chain takes precedence
1034+
repositoryList.zipWithIndex.foreach { case (repo, i) =>
1035+
val brr: IBiblioResolver = new IBiblioResolver
1036+
brr.setM2compatible(true)
1037+
brr.setUsepoms(true)
1038+
brr.setRoot(repo)
1039+
brr.setName(s"repo-${i + 1}")
1040+
cr.add(brr)
1041+
// scalastyle:off println
1042+
printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}")
1043+
// scalastyle:on println
1044+
}
1045+
1046+
ivySettings.addResolver(cr)
1047+
ivySettings.setDefaultResolver(cr.getName)
1048+
}
1049+
}
1050+
9771051
/** A nice function to use in tests as well. Values are dummy strings. */
9781052
def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
9791053
ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0"))
9801054

9811055
/**
9821056
* Resolves any dependencies that were supplied through maven coordinates
9831057
* @param coordinates Comma-delimited string of maven coordinates
984-
* @param remoteRepos Comma-delimited string of remote repositories other than maven central
985-
* @param ivyPath The path to the local ivy repository
1058+
* @param ivySettings An IvySettings containing resolvers to use
9861059
* @param exclusions Exclusions to apply when resolving transitive dependencies
9871060
* @return The comma-delimited path to the jars of the given maven artifacts including their
9881061
* transitive dependencies
9891062
*/
9901063
def resolveMavenCoordinates(
9911064
coordinates: String,
992-
remoteRepos: Option[String],
993-
ivyPath: Option[String],
1065+
ivySettings: IvySettings,
9941066
exclusions: Seq[String] = Nil,
9951067
isTest: Boolean = false): String = {
9961068
if (coordinates == null || coordinates.trim.isEmpty) {
@@ -1001,32 +1073,14 @@ private[spark] object SparkSubmitUtils {
10011073
// To prevent ivy from logging to system out
10021074
System.setOut(printStream)
10031075
val artifacts = extractMavenCoordinates(coordinates)
1004-
// Default configuration name for ivy
1005-
val ivyConfName = "default"
1006-
// set ivy settings for location of cache
1007-
val ivySettings: IvySettings = new IvySettings
10081076
// Directories for caching downloads through ivy and storing the jars when maven coordinates
10091077
// are supplied to spark-submit
1010-
val alternateIvyCache = ivyPath.getOrElse("")
1011-
val packagesDirectory: File =
1012-
if (alternateIvyCache == null || alternateIvyCache.trim.isEmpty) {
1013-
new File(ivySettings.getDefaultIvyUserDir, "jars")
1014-
} else {
1015-
ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache))
1016-
ivySettings.setDefaultCache(new File(alternateIvyCache, "cache"))
1017-
new File(alternateIvyCache, "jars")
1018-
}
1078+
val packagesDirectory: File = new File(ivySettings.getDefaultIvyUserDir, "jars")
10191079
// scalastyle:off println
10201080
printStream.println(
10211081
s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}")
10221082
printStream.println(s"The jars for the packages stored in: $packagesDirectory")
10231083
// scalastyle:on println
1024-
// create a pattern matcher
1025-
ivySettings.addMatcher(new GlobPatternMatcher)
1026-
// create the dependency resolvers
1027-
val repoResolver = createRepoResolvers(remoteRepos, ivySettings)
1028-
ivySettings.addResolver(repoResolver)
1029-
ivySettings.setDefaultResolver(repoResolver.getName)
10301084

10311085
val ivy = Ivy.newInstance(ivySettings)
10321086
// Set resolve options to download transitive dependencies as well
@@ -1042,6 +1096,9 @@ private[spark] object SparkSubmitUtils {
10421096
resolveOptions.setDownload(true)
10431097
}
10441098

1099+
// Default configuration name for ivy
1100+
val ivyConfName = "default"
1101+
10451102
// A Module descriptor must be specified. Entries are dummy strings
10461103
val md = getModuleDescriptor
10471104
// clear ivy resolution from previous launches. The resolution file is usually at

0 commit comments

Comments
 (0)