Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ case class FileSourceScanExec(
createBucketedReadRDD(relation.bucketSpec.get, readFile, dynamicallySelectedPartitions,
relation)
} else {
createNonBucketedReadRDD(readFile, dynamicallySelectedPartitions, relation)
createReadRDD(readFile, dynamicallySelectedPartitions, relation)
}
sendDriverMetrics()
readRDD
Expand Down Expand Up @@ -518,7 +518,7 @@ case class FileSourceScanExec(

/**
* Create an RDD for bucketed reads.
* The non-bucketed variant of this function is [[createNonBucketedReadRDD]].
* The non-bucketed variant of this function is [[createReadRDD]].
*
* The algorithm is pretty simple: each RDD partition being returned should include all the files
* with the same bucket id from all the given Hive partitions.
Expand Down Expand Up @@ -580,7 +580,7 @@ case class FileSourceScanExec(
* @param selectedPartitions Hive-style partition that are part of the read.
* @param fsRelation [[HadoopFsRelation]] associated with the read.
*/
private def createNonBucketedReadRDD(
private def createReadRDD(
readFile: (PartitionedFile) => Iterator[InternalRow],
selectedPartitions: Array[PartitionDirectory],
fsRelation: HadoopFsRelation): RDD[InternalRow] = {
Expand All @@ -594,14 +594,8 @@ case class FileSourceScanExec(
val bucketingEnabled = fsRelation.sparkSession.sessionState.conf.bucketingEnabled
val shouldProcess: Path => Boolean = optionalBucketSet match {
case Some(bucketSet) if bucketingEnabled =>
filePath => {
BucketingUtils.getBucketId(filePath.getName) match {
case Some(id) => bucketSet.get(id)
case None =>
// Do not prune the file if bucket file name is invalid
true
}
}
// Do not prune the file if bucket file name is invalid
filePath => BucketingUtils.getBucketId(filePath.getName).forall(bucketSet.get)
case _ =>
_ => true
}
Expand Down