Skip to content

Commit 73e6e6f

Browse files
topherinternationalTopher Anderson
andauthored
Support refreshing CondaEnvironment auth in specfile mode (#669)
* Add setPackageUrls method to CondaEnvironment * SparkContext can set packageUrls * clearer precondition check on setPackageUrls * scalastyle * Try adding to JavaSparkContext and context.py * style * Revert previous changes * Lengthen Conda test timeout a bit * Switch to setting UserInfo auth independently * style * naming * Validate packageUrls and auth in createWithFile * trying a test * fix test compile * More test * test style * fix test file * make conda dir for test * DO NOT MERGE logging * fix invariant * assign from if * Revert "DO NOT MERGE logging" This reverts commit 74ec455. * packageUrls cannot have auth * improve test * fix * fix assert condition in test Co-authored-by: Topher Anderson <[email protected]>
1 parent a9e675d commit 73e6e6f

File tree

7 files changed

+77
-8
lines changed

7 files changed

+77
-8
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,10 @@ class SparkContext(config: SparkConf) extends SafeLogging {
18791879
condaEnvironmentOrFail().setChannels(urls)
18801880
}
18811881

1882+
def setPackageUrlsUserInfo(userInfo: Option[String]): Unit = {
1883+
condaEnvironmentOrFail().setPackageUrlsUserInfo(userInfo)
1884+
}
1885+
18821886
private[spark] def buildCondaInstructions(): Option[CondaSetupInstructions] = {
18831887
condaEnvironment().map(_.buildSetupInstructions)
18841888
}

core/src/main/scala/org/apache/spark/api/conda/CondaEnvironment.scala

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ final class CondaEnvironment(
4141
bootstrapMode: CondaBootstrapMode,
4242
bootstrapPackages: Seq[String],
4343
bootstrapPackageUrls: Seq[String],
44+
private var bootstrapPackageUrlsUserInfo: Option[String],
4445
bootstrapChannels: Seq[String],
4546
extraArgs: Seq[String] = Nil,
4647
envVars: Map[String, String] = Map.empty) extends Logging {
@@ -74,6 +75,10 @@ final class CondaEnvironment(
7475
channels ++= urls.iterator.map(AuthenticatedChannel.apply)
7576
}
7677

78+
def setPackageUrlsUserInfo(userInfo: Option[String]): Unit = {
79+
bootstrapPackageUrlsUserInfo = userInfo
80+
}
81+
7782
def getTransitivePackageUrls(): List[String] = {
7883
manager.listPackagesExplicit(condaEnvDir.toAbsolutePath.toString)
7984
}
@@ -115,6 +120,7 @@ final class CondaEnvironment(
115120
bootstrapMode,
116121
packages.toList,
117122
bootstrapPackageUrls,
123+
bootstrapPackageUrlsUserInfo,
118124
channels.toList,
119125
extraArgs,
120126
envVars)
@@ -177,6 +183,7 @@ object CondaEnvironment {
177183
mode: CondaBootstrapMode,
178184
packages: Seq[String],
179185
packageUrls: Seq[String],
186+
packageUrlsUserInfo: Option[String],
180187
unauthenticatedChannels: Seq[UnauthenticatedChannel],
181188
extraArgs: Seq[String],
182189
envVars: Map[String, String])
@@ -200,11 +207,13 @@ object CondaEnvironment {
200207
mode: CondaBootstrapMode,
201208
packages: Seq[String],
202209
packageUrls: Seq[String],
210+
packageUrlsUserInfo: Option[String],
203211
channels: Seq[AuthenticatedChannel],
204212
extraArgs: Seq[String],
205213
envVars: Map[String, String]): CondaSetupInstructions = {
206214
val ChannelsWithCreds(unauthed, userInfos) = unauthenticateChannels(channels)
207-
CondaSetupInstructions(mode, packages, packageUrls, unauthed, extraArgs, envVars)(userInfos)
215+
CondaSetupInstructions(
216+
mode, packages, packageUrls, packageUrlsUserInfo, unauthed, extraArgs, envVars)(userInfos)
208217
}
209218
}
210219
}

core/src/main/scala/org/apache/spark/api/conda/CondaEnvironmentManager.scala

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
*/
1717
package org.apache.spark.api.conda
1818

19+
import java.net.URI
1920
import java.nio.file.Files
2021
import java.nio.file.Path
2122
import java.nio.file.Paths
2223
import java.nio.file.attribute.PosixFilePermission
2324
import java.util.regex.Pattern
25+
import javax.ws.rs.core.UriBuilder
2426

2527
import scala.collection.JavaConverters._
2628
import scala.collection.mutable
@@ -82,14 +84,16 @@ final class CondaEnvironmentManager(condaBinaryPath: String,
8284
condaMode: CondaBootstrapMode,
8385
condaPackages: Seq[String],
8486
condaPackageUrls: Seq[String],
87+
condaPackageUrlsUserInfo: Option[String],
8588
condaChannelUrls: Seq[String],
8689
condaExtraArgs: Seq[String] = Nil,
8790
condaEnvVars: Map[String, String] = Map.empty): CondaEnvironment = {
8891
condaMode match {
8992
case CondaBootstrapMode.Solve =>
9093
create(baseDir, condaPackages, condaChannelUrls, condaExtraArgs, condaEnvVars)
9194
case CondaBootstrapMode.File =>
92-
createWithFile(baseDir, condaPackageUrls, condaExtraArgs, condaEnvVars)
95+
createWithFile(
96+
baseDir, condaPackageUrls, condaPackageUrlsUserInfo, condaExtraArgs, condaEnvVars)
9397
}
9498
}
9599

@@ -131,16 +135,21 @@ final class CondaEnvironmentManager(condaBinaryPath: String,
131135
CondaBootstrapMode.Solve,
132136
condaPackages,
133137
Nil,
138+
None,
134139
condaChannelUrls,
135140
condaExtraArgs)
136141
}
137142

138143
def createWithFile(
139144
baseDir: String,
140145
condaPackageUrls: Seq[String],
146+
condaPackageUrlsUserInfo: Option[String],
141147
condaExtraArgs: Seq[String] = Nil,
142148
condaEnvVars: Map[String, String] = Map.empty): CondaEnvironment = {
143149
require(condaPackageUrls.nonEmpty, "Expected at least one conda package url.")
150+
require(condaPackageUrls.find(packageUrl => new URI(packageUrl).getUserInfo != null).isEmpty,
151+
"Cannot pass condaPackageUrls with inlined auth; pass UserInfo " +
152+
"via spark.conda.bootstrapPackageUrlsUserInfo.")
144153
val name = "conda-env"
145154

146155
// must link in /tmp to reduce path length in case baseDir is very long...
@@ -152,9 +161,18 @@ final class CondaEnvironmentManager(condaBinaryPath: String,
152161

153162
val verbosityFlags = 0.until(verbosity).map(_ => "-v").toList
154163

164+
// Authenticate URLs if we have a UserInfo argument
165+
val finalCondaPackageUrls = if (condaPackageUrlsUserInfo.isDefined) {
166+
condaPackageUrls.map { packageUrl =>
167+
UriBuilder.fromUri(packageUrl).userInfo(condaPackageUrlsUserInfo.get).build().toString
168+
}
169+
} else {
170+
condaPackageUrls
171+
}
172+
155173
// Create spec file with URLs
156174
val specFilePath = linkedBaseDir.resolve("spec-file")
157-
Files.write(specFilePath, ("@EXPLICIT" +: condaPackageUrls).asJava)
175+
Files.write(specFilePath, ("@EXPLICIT" +: finalCondaPackageUrls).asJava)
158176

159177
// Attempt to create environment
160178
runCondaProcess(
@@ -175,6 +193,7 @@ final class CondaEnvironmentManager(condaBinaryPath: String,
175193
CondaBootstrapMode.File,
176194
Nil,
177195
condaPackageUrls,
196+
condaPackageUrlsUserInfo,
178197
Nil,
179198
condaExtraArgs)
180199
}
@@ -312,7 +331,6 @@ object CondaEnvironmentManager extends Logging {
312331
private[this] def createCondaEnvironment(
313332
instructions: CondaSetupInstructions): CondaEnvironment = {
314333
val condaPackages = instructions.packages
315-
val condaPackageUrls = instructions.packageUrls
316334
val env = SparkEnv.get
317335
val condaEnvManager = CondaEnvironmentManager.fromConf(env.conf)
318336
val envDir = {
@@ -326,7 +344,8 @@ object CondaEnvironmentManager extends Logging {
326344
envDir,
327345
instructions.mode,
328346
condaPackages,
329-
condaPackageUrls,
347+
instructions.packageUrls,
348+
instructions.packageUrlsUserInfo,
330349
instructions.channels,
331350
instructions.extraArgs)
332351
}

core/src/main/scala/org/apache/spark/deploy/CondaRunner.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ object CondaRunner {
5252
if (CondaEnvironmentManager.isConfigured(sparkConf)) {
5353
val condaBootstrapMode = CondaBootstrapMode.fromString(sparkConf.get(CONDA_BOOTSTRAP_MODE))
5454
val condaBootstrapDeps = sparkConf.get(CONDA_BOOTSTRAP_PACKAGES)
55-
val condaBootstrapDepUrls = sparkConf.get(CONDA_BOOTSTRAP_PACKAGE_URLS)
55+
val condaBootstrapPackageUrls = sparkConf.get(CONDA_BOOTSTRAP_PACKAGE_URLS)
56+
val condaBootstrapPackageUrlsUserInfo = sparkConf.get(CONDA_BOOTSTRAP_PACKAGE_URLS_USER_INFO)
5657
val condaChannelUrls = sparkConf.get(CONDA_CHANNEL_URLS)
5758
val condaExtraArgs = sparkConf.get(CONDA_EXTRA_ARGUMENTS)
5859
val condaEnvVariables = extractEnvVariables(sparkConf)
@@ -63,7 +64,8 @@ object CondaRunner {
6364
condaBaseDir,
6465
condaBootstrapMode,
6566
condaBootstrapDeps,
66-
condaBootstrapDepUrls,
67+
condaBootstrapPackageUrls,
68+
condaBootstrapPackageUrlsUserInfo,
6769
condaChannelUrls,
6870
condaExtraArgs,
6971
condaEnvVariables)

core/src/main/scala/org/apache/spark/internal/config/package.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,13 @@ package object config {
524524
.toSequence
525525
.createWithDefault(Nil)
526526

527+
private[spark] val CONDA_BOOTSTRAP_PACKAGE_URLS_USER_INFO =
528+
ConfigBuilder("spark.conda.bootstrapPackageUrlsUserInfo")
529+
.doc("Basic auth information (in 'user:pw' format) to be added to package urls that will " +
530+
"be added to the conda environment. Only relevant when main class is CondaRunner.")
531+
.stringConf
532+
.createOptional
533+
527534
private[spark] val CONDA_CHANNEL_URLS = ConfigBuilder("spark.conda.channelUrls")
528535
.doc("The URLs the Conda channels to use when resolving the conda packages. "
529536
+ "Only relevant when main class is CondaRunner.")

core/src/test/scala/org/apache/spark/api/conda/CondaEnvironmentManagerTest.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package org.apache.spark.api.conda
1818

1919
import java.nio.file.Files
2020

21+
import org.apache.spark.SparkConf
22+
import org.apache.spark.internal.config._
2123
import org.apache.spark.util.TempDirectory
2224

2325
class CondaEnvironmentManagerTest extends org.apache.spark.SparkFunSuite with TempDirectory {
@@ -43,4 +45,30 @@ class CondaEnvironmentManagerTest extends org.apache.spark.SparkFunSuite with Te
4345
"[http://us_r:<password>@yy.bar:222"
4446
assert(CondaEnvironmentManager.redactCredentials(original) == redacted)
4547
}
48+
49+
test("CondaEnvironmentManager.failOnAuthenticatedPackageUrls") {
50+
val packageUrl =
51+
"https://myuser:[email protected]/whatever/else/linux-64/package-0.0.1-py_0.tar.bz2"
52+
val userInfo = "anotheruser:theirpassword"
53+
54+
val binaryPath = tempDir.toPath.resolve("dummy-conda.bin")
55+
val condaEnvDir = tempDir.toPath.resolve("test-conda-env")
56+
Files.createFile(binaryPath)
57+
Files.createDirectory(condaEnvDir)
58+
59+
val conf = new SparkConf()
60+
conf.set(CONDA_BINARY_PATH, binaryPath.toString)
61+
conf.set(CONDA_BOOTSTRAP_MODE, "File")
62+
conf.set(CONDA_BOOTSTRAP_PACKAGE_URLS, Seq(packageUrl))
63+
conf.set(CONDA_BOOTSTRAP_PACKAGE_URLS_USER_INFO, userInfo)
64+
65+
val thrown = intercept[IllegalArgumentException] {
66+
CondaEnvironmentManager.fromConf(conf)
67+
.createWithFile(condaEnvDir.toString, Seq(packageUrl), Some(userInfo))
68+
}
69+
70+
assert(("requirement failed: Cannot pass condaPackageUrls with inlined auth; pass UserInfo " +
71+
"via spark.conda.bootstrapPackageUrlsUserInfo.")
72+
.equals(thrown.getMessage))
73+
}
4674
}

resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
455455
extraEnv = extraEnvVars,
456456
extraConf = extraConf,
457457
outFile = outFile,
458-
timeoutDuration = 4.minutes) // give it a bit longer
458+
timeoutDuration = 5.minutes) // give it a bit longer
459459
checkResult(finalState, result, outFile = outFile)
460460
}
461461

0 commit comments

Comments
 (0)