diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 38e63d425bb21..6fa4167384925 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -410,7 +410,7 @@ case class FileSourceScanExec( createBucketedReadRDD(relation.bucketSpec.get, readFile, dynamicallySelectedPartitions, relation) } else { - createNonBucketedReadRDD(readFile, dynamicallySelectedPartitions, relation) + createReadRDD(readFile, dynamicallySelectedPartitions, relation) } sendDriverMetrics() readRDD @@ -518,7 +518,7 @@ case class FileSourceScanExec( /** * Create an RDD for bucketed reads. - * The non-bucketed variant of this function is [[createNonBucketedReadRDD]]. + * The non-bucketed variant of this function is [[createReadRDD]]. * * The algorithm is pretty simple: each RDD partition being returned should include all the files * with the same bucket id from all the given Hive partitions. @@ -580,7 +580,7 @@ case class FileSourceScanExec( * @param selectedPartitions Hive-style partition that are part of the read. * @param fsRelation [[HadoopFsRelation]] associated with the read. */ - private def createNonBucketedReadRDD( + private def createReadRDD( readFile: (PartitionedFile) => Iterator[InternalRow], selectedPartitions: Array[PartitionDirectory], fsRelation: HadoopFsRelation): RDD[InternalRow] = { @@ -594,14 +594,8 @@ case class FileSourceScanExec( val bucketingEnabled = fsRelation.sparkSession.sessionState.conf.bucketingEnabled val shouldProcess: Path => Boolean = optionalBucketSet match { case Some(bucketSet) if bucketingEnabled => - filePath => { - BucketingUtils.getBucketId(filePath.getName) match { - case Some(id) => bucketSet.get(id) - case None => - // Do not prune the file if bucket file name is invalid - true - } - } + // Do not prune the file if bucket file name is invalid + filePath => BucketingUtils.getBucketId(filePath.getName).forall(bucketSet.get) case _ => _ => true }