Skip to content

Commit 44dcf00

Browse files
committed
[SPARK-34326][CORE][SQL] Fix UTs added in SPARK-31793 depending on the length of temp path
### What changes were proposed in this pull request? This PR proposes to fix the UTs being added in SPARK-31793, so that all things contributing the length limit are properly accounted. ### Why are the changes needed? The test `DataSourceScanExecRedactionSuite.SPARK-31793: FileSourceScanExec metadata should contain limited file paths` is failing conditionally, depending on the length of the temp directory. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Modified UTs explain the missing points, which also do the test. Closes #31449 from HeartSaVioR/SPARK-34326-v2. Authored-by: Jungtaek Lim (HeartSaVioR) <[email protected]> Signed-off-by: Jungtaek Lim <[email protected]>
1 parent 8e28218 commit 44dcf00

File tree

2 files changed

+27
-5
lines changed

2 files changed

+27
-5
lines changed

core/src/test/scala/org/apache/spark/util/UtilsSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,6 +1308,12 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
13081308
assert(Utils.buildLocationMetadata(paths, 10) == "[path0, path1]")
13091309
assert(Utils.buildLocationMetadata(paths, 15) == "[path0, path1, path2]")
13101310
assert(Utils.buildLocationMetadata(paths, 25) == "[path0, path1, path2, path3]")
1311+
1312+
// edge-case: we should consider the fact non-path chars including '[' and ", " are accounted
1313+
// 1. second path is not added due to the addition of '['
1314+
assert(Utils.buildLocationMetadata(paths, 6) == "[path0]")
1315+
// 2. third path is not added due to the addition of ", "
1316+
assert(Utils.buildLocationMetadata(paths, 13) == "[path0, path1]")
13111317
}
13121318

13131319
test("checkHost supports both IPV4 and IPV6") {

sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
1919
import java.io.File
2020

2121
import scala.collection.mutable
22+
import scala.util.Random
2223

2324
import org.apache.hadoop.fs.Path
2425

@@ -122,24 +123,39 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {
122123
test("SPARK-31793: FileSourceScanExec metadata should contain limited file paths") {
123124
withTempPath { path =>
124125
val dir = path.getCanonicalPath
126+
127+
// create a sub-directory with long name so that each root path will always exceed the limit
128+
// this is to ensure we always test the case for the path truncation
129+
val dataDirName = Random.alphanumeric.take(100).toList.mkString
130+
val dataDir = new File(path, dataDirName)
131+
dataDir.mkdir()
132+
125133
val partitionCol = "partitionCol"
126134
spark.range(10)
127135
.select("id", "id")
128136
.toDF("value", partitionCol)
129137
.write
130138
.partitionBy(partitionCol)
131-
.orc(dir)
132-
val paths = (0 to 9).map(i => new File(dir, s"$partitionCol=$i").getCanonicalPath)
139+
.orc(dataDir.getCanonicalPath)
140+
val paths = (0 to 9).map(i => new File(dataDir, s"$partitionCol=$i").getCanonicalPath)
133141
val plan = spark.read.orc(paths: _*).queryExecution.executedPlan
134142
val location = plan collectFirst {
135143
case f: FileSourceScanExec => f.metadata("Location")
136144
}
137145
assert(location.isDefined)
138146
// The location metadata should at least contain one path
139147
assert(location.get.contains(paths.head))
140-
// If the temp path length is larger than 100, the metadata length should not exceed
141-
// twice of the length; otherwise, the metadata length should be controlled within 200.
142-
assert(location.get.length < Math.max(paths.head.length, 100) * 2)
148+
149+
// The location metadata should have bracket wrapping paths
150+
assert(location.get.indexOf('[') > -1)
151+
assert(location.get.indexOf(']') > -1)
152+
153+
// extract paths in location metadata (removing classname, brackets, separators)
154+
val pathsInLocation = location.get.substring(
155+
location.get.indexOf('[') + 1, location.get.indexOf(']')).split(", ").toSeq
156+
157+
// the only one path should be available
158+
assert(pathsInLocation.size == 1)
143159
}
144160
}
145161
}

0 commit comments

Comments
 (0)