-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19761][SQL]create InMemoryFileIndex with an empty rootPaths when set PARALLEL_PARTITION_DISCOVERY_THRESHOLD to zero failed #17093
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
96898a2
a3ac29b
ec0afac
13b70f0
1cb997c
0d2334d
74d08a5
e1a9072
3c079ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -300,7 +300,7 @@ object PartitioningAwareFileIndex extends Logging { | |
| sparkSession: SparkSession): Seq[(Path, Seq[FileStatus])] = { | ||
|
|
||
| // Short-circuits parallel listing when serial listing is likely to be faster. | ||
| if (paths.size < sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) { | ||
| if (paths.size <= sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add equal is more clear to understand the conf |
||
| return paths.map { path => | ||
| (path, listLeafFiles(path, hadoopConf, filter, Some(sparkSession))) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem} | |
|
|
||
| import org.apache.spark.metrics.source.HiveCatalogMetrics | ||
| import org.apache.spark.sql.catalyst.util._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.test.SharedSQLContext | ||
|
|
||
| class FileIndexSuite extends SharedSQLContext { | ||
|
|
@@ -179,6 +180,21 @@ class FileIndexSuite extends SharedSQLContext { | |
| } | ||
| } | ||
|
|
||
| test("InMemoryFileIndex with empty rootPaths when PARALLEL_PARTITION_DISCOVERY_THRESHOLD" + | ||
| "is a nonpositive number") { | ||
| withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "0") { | ||
| new InMemoryFileIndex(spark, Seq.empty, Map.empty, None) | ||
| } | ||
|
|
||
| val e = intercept[IllegalArgumentException] { | ||
| withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "-1") { | ||
| new InMemoryFileIndex(spark, Seq.empty, Map.empty, None) | ||
| } | ||
| }.getMessage | ||
| assert(e.contains("The maximum number of paths allowed for listing files at " + | ||
| "driver side must not be negative")) | ||
|
||
| } | ||
|
|
||
| test("refresh for InMemoryFileIndex with FileStatusCache") { | ||
| withTempDir { dir => | ||
| val fileStatusCache = FileStatusCache.getOrCreate(spark) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not we just make sure
parallelPartitionDiscoveryThresholdis greater than 0? We can add a condition(viacheckValue) inSQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLDThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry, I didn't notice there is a
checkValuefunc, let me fix it. thanks!