Skip to content

Commit 60a41dc

Browse files
committed
Preserve natural line delimiters
1 parent 1d4b0d4 commit 60a41dc

File tree

2 files changed

+44
-10
lines changed

2 files changed

+44
-10
lines changed

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package org.apache.spark.util
1919

2020
import java.io._
2121
import java.lang.{Byte => JByte}
22-
import java.lang.InternalError
2322
import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
2423
import java.lang.reflect.InvocationTargetException
2524
import java.math.{MathContext, RoundingMode}
@@ -2052,6 +2051,28 @@ private[spark] object Utils extends Logging {
20522051
}
20532052
}
20542053

2054+
2055+
private[this] val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { ch => ch > ' ' || ch == '\r' || ch == '\n' }
2056+
2057+
/**
2058+
* Implements the same logic as JDK java.lang.String#trim by removing leading and trailing non-printable characters
2059+
* less or equal to '\u0020' (SPACE) but preserves natural line delimiters according to
2060+
* [[java.util.Properties]] load method. The natural line delimiters are removed by JDK during load.
2061+
* Therefore any remaining ones have been specifically provided and escaped by the user, and must not be ignored
2062+
*
2063+
* @param str
2064+
* @return the trimmed value of str
2065+
*/
2066+
def trimExceptCRLF(str: String): String = {
2067+
val firstPos = str.indexWhere(nonSpaceOrNaturalLineDelimiter)
2068+
val lastPos = str.lastIndexWhere(nonSpaceOrNaturalLineDelimiter)
2069+
if (firstPos >= 0 && lastPos >= 0) {
2070+
str.substring(firstPos, lastPos + 1)
2071+
} else {
2072+
""
2073+
}
2074+
}
2075+
20552076
/** Load properties present in the given file. */
20562077
def getPropertiesFromFile(filename: String): Map[String, String] = {
20572078
val file = new File(filename)
@@ -2063,7 +2084,7 @@ private[spark] object Utils extends Logging {
20632084
val properties = new Properties()
20642085
properties.load(inReader)
20652086
properties.stringPropertyNames().asScala
2066-
.map(k => (k, properties.getProperty(k)))
2087+
.map(k => (k, trimExceptCRLF(properties.getProperty(k))))
20672088
.toMap
20682089

20692090
} catch {

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,31 +1145,44 @@ class SparkSubmitSuite
11451145
conf1.get("spark.submit.pyFiles") should (startWith("/"))
11461146
}
11471147

1148-
test("handles white space in --properties-file and --conf uniformly") {
1149-
val delimKey = "spark.my.delimiter"
1150-
val delimKeyFromFile = s"${delimKey}FromFile"
1151-
val newLine = "\n"
1148+
test("handles natural line delimiters in --properties-file and --conf uniformly") {
1149+
val delimKey = "spark.my.delimiter."
1150+
val LF = "\n"
1151+
val CR = "\r"
1152+
1153+
val leadingDelimKeyFromFile = s"${delimKey}leadingDelimKeyFromFile" -> s"${LF}blah"
1154+
val trailingDelimKeyFromFile = s"${delimKey}trailingDelimKeyFromFile" -> s"blah${CR}"
1155+
val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}"
1156+
val nonDelimSpaceFromFile = s"${delimKey}nonDelimSpaceFromFile" -> " blah\f"
1157+
1158+
val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile, nonDelimSpaceFromFile)
1159+
11521160
val props = new java.util.Properties()
11531161
val propsFile = File.createTempFile("test-spark-conf", ".properties", Utils.createTempDir())
11541162
val propsOutputStream = new FileOutputStream(propsFile)
11551163
try {
1156-
props.put(delimKeyFromFile, newLine)
1164+
testProps.foreach { case (k, v) => props.put(k, v) }
11571165
props.store(propsOutputStream, "test whitespace")
11581166
} finally {
11591167
propsOutputStream.close()
11601168
}
11611169

11621170
val clArgs = Seq(
11631171
"--class", "org.SomeClass",
1164-
"--conf", s"${delimKey}=$newLine",
1172+
"--conf", s"${delimKey}=$LF",
11651173
"--conf", "spark.master=yarn",
11661174
"--properties-file", propsFile.getPath,
11671175
"thejar.jar")
11681176

11691177
val appArgs = new SparkSubmitArguments(clArgs)
11701178
val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
1171-
conf.get(delimKey) should be (newLine)
1172-
conf.get(delimKeyFromFile) should be (newLine)
1179+
1180+
Seq((delimKey -> LF), leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile)
1181+
.foreach {
1182+
case (k, v) => conf.get(k) should be (v)
1183+
}
1184+
1185+
conf.get(nonDelimSpaceFromFile._1) should be ("blah")
11731186
}
11741187
}
11751188

0 commit comments

Comments
 (0)