Skip to content

Commit bcb9a8c

Browse files
gerashegalovMarcelo Vanzin
authored andcommitted
[SPARK-25221][DEPLOY] Consistent trailing whitespace treatment of conf values
## What changes were proposed in this pull request? Stop trimming values of properties loaded from a file ## How was this patch tested? Added unit test demonstrating the issue hit in production. Closes #22213 from gerashegalov/gera/SPARK-25221. Authored-by: Gera Shegalov <[email protected]> Signed-off-by: Marcelo Vanzin <[email protected]>
1 parent 77579aa commit bcb9a8c

File tree

3 files changed

+103
-3
lines changed

3 files changed

+103
-3
lines changed

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package org.apache.spark.util
1919

2020
import java.io._
2121
import java.lang.{Byte => JByte}
22-
import java.lang.InternalError
2322
import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
2423
import java.lang.reflect.InvocationTargetException
2524
import java.math.{MathContext, RoundingMode}
@@ -2052,6 +2051,30 @@ private[spark] object Utils extends Logging {
20522051
}
20532052
}
20542053

2054+
/**
2055+
* Implements the same logic as JDK `java.lang.String#trim` by removing leading and trailing
2056+
* non-printable characters less or equal to '\u0020' (SPACE) but preserves natural line
2057+
* delimiters according to [[java.util.Properties]] load method. The natural line delimiters are
2058+
* removed by JDK during load. Therefore any remaining ones have been specifically provided and
2059+
* escaped by the user, and must not be ignored
2060+
*
2061+
* @param str
2062+
* @return the trimmed value of str
2063+
*/
2064+
private[util] def trimExceptCRLF(str: String): String = {
2065+
val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { ch =>
2066+
ch > ' ' || ch == '\r' || ch == '\n'
2067+
}
2068+
2069+
val firstPos = str.indexWhere(nonSpaceOrNaturalLineDelimiter)
2070+
val lastPos = str.lastIndexWhere(nonSpaceOrNaturalLineDelimiter)
2071+
if (firstPos >= 0 && lastPos >= 0) {
2072+
str.substring(firstPos, lastPos + 1)
2073+
} else {
2074+
""
2075+
}
2076+
}
2077+
20552078
/** Load properties present in the given file. */
20562079
def getPropertiesFromFile(filename: String): Map[String, String] = {
20572080
val file = new File(filename)
@@ -2062,8 +2085,10 @@ private[spark] object Utils extends Logging {
20622085
try {
20632086
val properties = new Properties()
20642087
properties.load(inReader)
2065-
properties.stringPropertyNames().asScala.map(
2066-
k => (k, properties.getProperty(k).trim)).toMap
2088+
properties.stringPropertyNames().asScala
2089+
.map { k => (k, trimExceptCRLF(properties.getProperty(k))) }
2090+
.toMap
2091+
20672092
} catch {
20682093
case e: IOException =>
20692094
throw new SparkException(s"Failed when loading Spark properties from $filename", e)

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,6 +1144,53 @@ class SparkSubmitSuite
11441144
conf1.get(PY_FILES.key) should be (s"s3a://${pyFile.getAbsolutePath}")
11451145
conf1.get("spark.submit.pyFiles") should (startWith("/"))
11461146
}
1147+
1148+
test("handles natural line delimiters in --properties-file and --conf uniformly") {
1149+
val delimKey = "spark.my.delimiter."
1150+
val LF = "\n"
1151+
val CR = "\r"
1152+
1153+
val lineFeedFromCommandLine = s"${delimKey}lineFeedFromCommandLine" -> LF
1154+
val leadingDelimKeyFromFile = s"${delimKey}leadingDelimKeyFromFile" -> s"${LF}blah"
1155+
val trailingDelimKeyFromFile = s"${delimKey}trailingDelimKeyFromFile" -> s"blah${CR}"
1156+
val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}"
1157+
val nonDelimSpaceFromFile = s"${delimKey}nonDelimSpaceFromFile" -> " blah\f"
1158+
1159+
val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile,
1160+
nonDelimSpaceFromFile)
1161+
1162+
val props = new java.util.Properties()
1163+
val propsFile = File.createTempFile("test-spark-conf", ".properties",
1164+
Utils.createTempDir())
1165+
val propsOutputStream = new FileOutputStream(propsFile)
1166+
try {
1167+
testProps.foreach { case (k, v) => props.put(k, v) }
1168+
props.store(propsOutputStream, "test whitespace")
1169+
} finally {
1170+
propsOutputStream.close()
1171+
}
1172+
1173+
val clArgs = Seq(
1174+
"--class", "org.SomeClass",
1175+
"--conf", s"${lineFeedFromCommandLine._1}=${lineFeedFromCommandLine._2}",
1176+
"--conf", "spark.master=yarn",
1177+
"--properties-file", propsFile.getPath,
1178+
"thejar.jar")
1179+
1180+
val appArgs = new SparkSubmitArguments(clArgs)
1181+
val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
1182+
1183+
Seq(
1184+
lineFeedFromCommandLine,
1185+
leadingDelimKeyFromFile,
1186+
trailingDelimKeyFromFile,
1187+
infixDelimFromFile
1188+
).foreach { case (k, v) =>
1189+
conf.get(k) should be (v)
1190+
}
1191+
1192+
conf.get(nonDelimSpaceFromFile._1) should be ("blah")
1193+
}
11471194
}
11481195

11491196
object SparkSubmitSuite extends SparkFunSuite with TimeLimits {

core/src/test/scala/org/apache/spark/util/UtilsSuite.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,34 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
12051205
assert(Utils.stringHalfWidth("\u0967\u0968\u0969") == 3)
12061206
// scalastyle:on nonascii
12071207
}
1208+
1209+
test("trimExceptCRLF standalone") {
1210+
val crlfSet = Set("\r", "\n")
1211+
val nonPrintableButCRLF = (0 to 32).map(_.toChar.toString).toSet -- crlfSet
1212+
1213+
// identity for CRLF
1214+
crlfSet.foreach { s => Utils.trimExceptCRLF(s) === s }
1215+
1216+
// empty for other non-printables
1217+
nonPrintableButCRLF.foreach { s => assert(Utils.trimExceptCRLF(s) === "") }
1218+
1219+
// identity for a printable string
1220+
assert(Utils.trimExceptCRLF("a") === "a")
1221+
1222+
// identity for strings with CRLF
1223+
crlfSet.foreach { s =>
1224+
assert(Utils.trimExceptCRLF(s"${s}a") === s"${s}a")
1225+
assert(Utils.trimExceptCRLF(s"a${s}") === s"a${s}")
1226+
assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b")
1227+
}
1228+
1229+
// trim nonPrintableButCRLF except when inside a string
1230+
nonPrintableButCRLF.foreach { s =>
1231+
assert(Utils.trimExceptCRLF(s"${s}a") === "a")
1232+
assert(Utils.trimExceptCRLF(s"a${s}") === "a")
1233+
assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b")
1234+
}
1235+
}
12081236
}
12091237

12101238
private class SimpleExtension

0 commit comments

Comments
 (0)