-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18535][UI][YARN] Redact sensitive information from Spark logs and UI #15971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5dd3630
b0ad319
78e4398
eed33db
84a7ef3
549881b
61a961c
49015ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -223,4 +223,13 @@ package object config { | |
| " bigger files.") | ||
| .longConf | ||
| .createWithDefault(4 * 1024 * 1024) | ||
|
|
||
| private[spark] val SECRET_REDACTION_PATTERN = | ||
| ConfigBuilder("spark.redaction.regex") | ||
| .doc("Regex to decide which Spark configuration properties and environment variables in " + | ||
| "driver and executor environments contain sensitive information. When this regex matches " + | ||
| "a property , its value is redacted from the environment UI and various logs like YARN " + | ||
| "and event logs") | ||
|
||
| .stringConf | ||
| .createWithDefault("(?i)secret|password") | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -153,7 +153,9 @@ private[spark] class EventLoggingListener( | |
|
|
||
| override def onTaskEnd(event: SparkListenerTaskEnd): Unit = logEvent(event) | ||
|
|
||
| override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = logEvent(event) | ||
| override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = { | ||
| logEvent(redactEvent(event)) | ||
| } | ||
|
|
||
| // Events that trigger a flush | ||
| override def onStageCompleted(event: SparkListenerStageCompleted): Unit = { | ||
|
|
@@ -231,6 +233,19 @@ private[spark] class EventLoggingListener( | |
| } | ||
| } | ||
|
|
||
|
|
||
|
||
| private def redactEvent(event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = { | ||
|
||
| // "Spark Properties" entry will always exist because the map is always populated with it. | ||
| val props = event | ||
| .environmentDetails | ||
| .get("Spark Properties") | ||
| .get | ||
|
||
| val redactedProps = Utils.redact(sparkConf, props) | ||
| val redactedEnvironmentDetails = event.environmentDetails + | ||
| ("Spark Properties" -> redactedProps) | ||
| SparkListenerEnvironmentUpdate(redactedEnvironmentDetails) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| private[spark] object EventLoggingListener extends Logging { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,7 +55,7 @@ import org.slf4j.Logger | |
| import org.apache.spark._ | ||
| import org.apache.spark.deploy.SparkHadoopUtil | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.internal.config.{DYN_ALLOCATION_INITIAL_EXECUTORS, DYN_ALLOCATION_MIN_EXECUTORS, EXECUTOR_INSTANCES} | ||
| import org.apache.spark.internal.config._ | ||
| import org.apache.spark.network.util.JavaUtils | ||
| import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance} | ||
| import org.apache.spark.util.logging.RollingFileAppender | ||
|
|
@@ -2555,6 +2555,19 @@ private[spark] object Utils extends Logging { | |
| sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten | ||
| } | ||
| } | ||
|
|
||
| private[util] val REDACTION_REPLACEMENT_TEXT = "*********(redacted)" | ||
|
|
||
| def redact(conf: SparkConf, kvs: Seq[(String, String)]): Seq[(String, String)] = { | ||
| val redactionPattern = conf.get(SECRET_REDACTION_PATTERN).r | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is very expensive. How about a version that takes a list of tuples and redacts them?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What part do you think is expensive? Going through all the configuration properties and matching them with the regex?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compiling the regex once for every item in the list being redacted, instead of doing it once for the whole list.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, good point. Let me fix this. |
||
| kvs.map { kv => | ||
| if (redactionPattern.findFirstIn(kv._1).isDefined) { | ||
| (kv._1, REDACTION_REPLACEMENT_TEXT) | ||
| } | ||
|
||
| else kv | ||
|
||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
| private[util] object CallerContext extends Logging { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -95,6 +95,30 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit | |
| } | ||
| } | ||
|
|
||
| test("Event logging with password redaction") { | ||
| val secretPassword = "secret_password" | ||
| val conf = getLoggingConf(testDirPath, None).set("spark.executorEnv.HADOOP_CREDSTORE_PASSWORD", | ||
| secretPassword) | ||
| sc = new SparkContext("local-cluster[2,2,1024]", "test", conf) | ||
|
||
| assert(sc.eventLogger.isDefined) | ||
| val eventLogger = sc.eventLogger.get | ||
|
|
||
| sc.parallelize(1 to 10000).count() | ||
| sc.stop() | ||
|
|
||
| val logData = EventLoggingListener.openEventLog(new Path(eventLogger.logPath), fileSystem) | ||
| val eventLog = Source.fromInputStream(logData).mkString | ||
| // Make sure nothing secret shows up anywhere | ||
| assert(!eventLog.contains(secretPassword), s"Secret password ($secretPassword) not redacted " + | ||
| s"from event logs:\n $eventLog") | ||
| val expected = """"spark.executorEnv.HADOOP_CREDSTORE_PASSWORD":"*********(redacted)"""" | ||
|
||
| // Make sure every occurrence of the property is accompanied by a redaction text. | ||
| val regex = """"spark.executorEnv.HADOOP_CREDSTORE_PASSWORD":"([^"]*)"""".r | ||
| val matches = regex.findAllIn(eventLog) | ||
| assert(matches.nonEmpty) | ||
| matches.foreach{ matched => assert(matched.equals(expected)) } | ||
| } | ||
|
|
||
| test("Log overwriting") { | ||
| val logUri = EventLoggingListener.getLogPath(testDir.toURI, "test", None) | ||
| val logPath = new URI(logUri).getPath | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -974,4 +974,28 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { | |
|
|
||
| assert(pValue > threshold) | ||
| } | ||
|
|
||
| test("redact sensitive information") { | ||
| val sparkConf = new SparkConf | ||
|
|
||
| // Set some secret keys | ||
| val secretKeys = Seq("" + | ||
|
||
| "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD", | ||
| "spark.my.password", | ||
| "spark.my.sECreT") | ||
| secretKeys.foreach { key => | ||
| sparkConf.set(key, "secret_password") | ||
| } | ||
| // Set a non-secret key | ||
| sparkConf.set("spark.regular.property", "not_a_secret") | ||
|
|
||
| // Redact sensitive information | ||
| val redactedConf = Utils.redact(sparkConf, sparkConf.getAll).toMap | ||
|
|
||
| // Assert that secret information got redacted while the regular property remained the same | ||
| secretKeys.foreach { key => | ||
| assert(redactedConf.get(key).get == Utils.REDACTION_REPLACEMENT_TEXT) | ||
|
||
| } | ||
| assert(redactedConf.get("spark.regular.property").get == "not_a_secret") | ||
|
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: no space before comma.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.