Skip to content

Commit 78ecb6d

Browse files
author
Marcelo Vanzin
committed
[SPARK-24446][YARN] Properly quote library path for YARN.
Because the way YARN executes commands via bash -c, everything needs to be quoted so that the whole command is fully contained inside a bash string and is interpreted correctly when the string is read by bash. This is a bit different than the quoting done when executing things as if typing in a bash shell. Tweaked unit tests to exercise the bad behavior, which would cause existing tests to time out without the fix. Also tested on a real cluster, verifying the shell script created by YARN to run the container. Author: Marcelo Vanzin <[email protected]> Closes #21476 from vanzin/SPARK-24446.
1 parent 6a0b77a commit 78ecb6d

File tree

3 files changed

+34
-8
lines changed

3 files changed

+34
-8
lines changed

resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,8 @@ private[spark] class Client(
899899
val libraryPaths = Seq(sparkConf.get(DRIVER_LIBRARY_PATH),
900900
sys.props.get("spark.driver.libraryPath")).flatten
901901
if (libraryPaths.nonEmpty) {
902-
prefixEnv = Some(getClusterPath(sparkConf, Utils.libraryPathEnvPrefix(libraryPaths)))
902+
prefixEnv = Some(createLibraryPathPrefix(libraryPaths.mkString(File.pathSeparator),
903+
sparkConf))
903904
}
904905
if (sparkConf.get(AM_JAVA_OPTIONS).isDefined) {
905906
logWarning(s"${AM_JAVA_OPTIONS.key} will not take effect in cluster mode")
@@ -921,7 +922,7 @@ private[spark] class Client(
921922
.map(YarnSparkHadoopUtil.escapeForShell)
922923
}
923924
sparkConf.get(AM_LIBRARY_PATH).foreach { paths =>
924-
prefixEnv = Some(getClusterPath(sparkConf, Utils.libraryPathEnvPrefix(Seq(paths))))
925+
prefixEnv = Some(createLibraryPathPrefix(paths, sparkConf))
925926
}
926927
}
927928

@@ -1485,6 +1486,23 @@ private object Client extends Logging {
14851486
YarnAppReport(report.getYarnApplicationState(), report.getFinalApplicationStatus(), diagsOpt)
14861487
}
14871488

1489+
/**
1490+
* Create a properly quoted and escaped library path string to be added as a prefix to the command
1491+
* executed by YARN. This is different from normal quoting / escaping due to YARN executing the
1492+
* command through "bash -c".
1493+
*/
1494+
def createLibraryPathPrefix(libpath: String, conf: SparkConf): String = {
1495+
val cmdPrefix = if (Utils.isWindows) {
1496+
Utils.libraryPathEnvPrefix(Seq(libpath))
1497+
} else {
1498+
val envName = Utils.libraryPathEnvName
1499+
// For quotes, escape both the quote and the escape character when encoding in the command
1500+
// string.
1501+
val quoted = libpath.replace("\"", "\\\\\\\"")
1502+
envName + "=\\\"" + quoted + File.pathSeparator + "$" + envName + "\\\""
1503+
}
1504+
getClusterPath(conf, cmdPrefix)
1505+
}
14881506
}
14891507

14901508
private[spark] class YarnClusterApplication extends SparkApplication {

resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ private[yarn] class ExecutorRunnable(
131131
// Extra options for the JVM
132132
val javaOpts = ListBuffer[String]()
133133

134-
// Set the environment variable through a command prefix
135-
// to append to the existing value of the variable
136-
var prefixEnv: Option[String] = None
137-
138134
// Set the JVM memory
139135
val executorMemoryString = executorMemory + "m"
140136
javaOpts += "-Xmx" + executorMemoryString
@@ -144,8 +140,11 @@ private[yarn] class ExecutorRunnable(
144140
val subsOpt = Utils.substituteAppNExecIds(opts, appId, executorId)
145141
javaOpts ++= Utils.splitCommandString(subsOpt).map(YarnSparkHadoopUtil.escapeForShell)
146142
}
147-
sparkConf.get(EXECUTOR_LIBRARY_PATH).foreach { p =>
148-
prefixEnv = Some(Client.getClusterPath(sparkConf, Utils.libraryPathEnvPrefix(Seq(p))))
143+
144+
// Set the library path through a command prefix to append to the existing value of the
145+
// env variable.
146+
val prefixEnv = sparkConf.get(EXECUTOR_LIBRARY_PATH).map { libPath =>
147+
Client.createLibraryPathPrefix(libPath, sparkConf)
149148
}
150149

151150
javaOpts += "-Djava.io.tmpdir=" +

resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import org.scalatest.concurrent.Eventually._
3636
import org.apache.spark._
3737
import org.apache.spark.deploy.yarn.config._
3838
import org.apache.spark.internal.Logging
39+
import org.apache.spark.internal.config._
3940
import org.apache.spark.launcher._
4041
import org.apache.spark.util.Utils
4142

@@ -216,6 +217,14 @@ abstract class BaseYarnClusterSuite
216217
props.setProperty("spark.driver.extraJavaOptions", "-Dfoo=\"one two three\"")
217218
props.setProperty("spark.executor.extraJavaOptions", "-Dfoo=\"one two three\"")
218219

220+
// SPARK-24446: make sure special characters in the library path do not break containers.
221+
if (!Utils.isWindows) {
222+
val libPath = """/tmp/does not exist:$PWD/tmp:/tmp/quote":/tmp/ampersand&"""
223+
props.setProperty(AM_LIBRARY_PATH.key, libPath)
224+
props.setProperty(DRIVER_LIBRARY_PATH.key, libPath)
225+
props.setProperty(EXECUTOR_LIBRARY_PATH.key, libPath)
226+
}
227+
219228
yarnCluster.getConfig().asScala.foreach { e =>
220229
props.setProperty("spark.hadoop." + e.getKey(), e.getValue())
221230
}

0 commit comments

Comments
 (0)