Skip to content

Commit 5ae3516

Browse files
markgroverMarcelo Vanzin
authored andcommitted
[SPARK-19720][CORE] Redact sensitive information from SparkSubmit console
## What changes were proposed in this pull request? This change redacts senstive information (based on `spark.redaction.regex` property) from the Spark Submit console logs. Such sensitive information is already being redacted from event logs and yarn logs, etc. ## How was this patch tested? Testing was done manually to make sure that the console logs were not printing any sensitive information. Here's some output from the console: ``` Spark properties used, including those specified through --conf and those from the properties file /etc/spark2/conf/spark-defaults.conf: (spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD,*********(redacted)) (spark.authenticate,false) (spark.executorEnv.HADOOP_CREDSTORE_PASSWORD,*********(redacted)) ``` ``` System properties: (spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD,*********(redacted)) (spark.authenticate,false) (spark.executorEnv.HADOOP_CREDSTORE_PASSWORD,*********(redacted)) ``` There is a risk if new print statements were added to the console down the road, sensitive information may still get leaked, since there is no test that asserts on the console log output. I considered it out of the scope of this JIRA to write an integration test to make sure new leaks don't happen in the future. Running unit tests to make sure nothing else is broken by this change. Author: Mark Grover <[email protected]> Closes #17047 from markgrover/master_redaction.
1 parent 9cca3db commit 5ae3516

File tree

3 files changed

+31
-5
lines changed

3 files changed

+31
-5
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,8 @@ object SparkSubmit extends CommandLineUtils {
665665
if (verbose) {
666666
printStream.println(s"Main class:\n$childMainClass")
667667
printStream.println(s"Arguments:\n${childArgs.mkString("\n")}")
668-
printStream.println(s"System properties:\n${sysProps.mkString("\n")}")
668+
// sysProps may contain sensitive information, so redact before printing
669+
printStream.println(s"System properties:\n${Utils.redact(sysProps).mkString("\n")}")
669670
printStream.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
670671
printStream.println("\n")
671672
}

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,15 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
8484
// scalastyle:off println
8585
if (verbose) SparkSubmit.printStream.println(s"Using properties file: $propertiesFile")
8686
Option(propertiesFile).foreach { filename =>
87-
Utils.getPropertiesFromFile(filename).foreach { case (k, v) =>
87+
val properties = Utils.getPropertiesFromFile(filename)
88+
properties.foreach { case (k, v) =>
8889
defaultProperties(k) = v
89-
if (verbose) SparkSubmit.printStream.println(s"Adding default property: $k=$v")
90+
}
91+
// Property files may contain sensitive information, so redact before printing
92+
if (verbose) {
93+
Utils.redact(properties).foreach { case (k, v) =>
94+
SparkSubmit.printStream.println(s"Adding default property: $k=$v")
95+
}
9096
}
9197
}
9298
// scalastyle:on println
@@ -318,7 +324,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
318324
|
319325
|Spark properties used, including those specified through
320326
| --conf and those from the properties file $propertiesFile:
321-
|${sparkProperties.mkString(" ", "\n ", "\n")}
327+
|${Utils.redact(sparkProperties).mkString(" ", "\n ", "\n")}
322328
""".stripMargin
323329
}
324330

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import scala.io.Source
3939
import scala.reflect.ClassTag
4040
import scala.util.Try
4141
import scala.util.control.{ControlThrowable, NonFatal}
42+
import scala.util.matching.Regex
4243

4344
import _root_.io.netty.channel.unix.Errors.NativeIoException
4445
import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
@@ -2588,13 +2589,31 @@ private[spark] object Utils extends Logging {
25882589

25892590
def redact(conf: SparkConf, kvs: Seq[(String, String)]): Seq[(String, String)] = {
25902591
val redactionPattern = conf.get(SECRET_REDACTION_PATTERN).r
2592+
redact(redactionPattern, kvs)
2593+
}
2594+
2595+
private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
25912596
kvs.map { kv =>
25922597
redactionPattern.findFirstIn(kv._1)
2593-
.map { ignore => (kv._1, REDACTION_REPLACEMENT_TEXT) }
2598+
.map { _ => (kv._1, REDACTION_REPLACEMENT_TEXT) }
25942599
.getOrElse(kv)
25952600
}
25962601
}
25972602

2603+
/**
2604+
* Looks up the redaction regex from within the key value pairs and uses it to redact the rest
2605+
* of the key value pairs. No care is taken to make sure the redaction property itself is not
2606+
* redacted. So theoretically, the property itself could be configured to redact its own value
2607+
* when printing.
2608+
*/
2609+
def redact(kvs: Map[String, String]): Seq[(String, String)] = {
2610+
val redactionPattern = kvs.getOrElse(
2611+
SECRET_REDACTION_PATTERN.key,
2612+
SECRET_REDACTION_PATTERN.defaultValueString
2613+
).r
2614+
redact(redactionPattern, kvs.toArray)
2615+
}
2616+
25982617
}
25992618

26002619
private[util] object CallerContext extends Logging {

0 commit comments

Comments
 (0)