1717
1818package org .apache .spark .sql .sources
1919
20+ import scala .util .Try
21+
2022import org .apache .hadoop .conf .Configuration
2123import org .apache .hadoop .fs .{FileStatus , Path }
2224import org .apache .hadoop .mapreduce .{Job , TaskAttemptContext }
2325
2426import org .apache .spark .annotation .{DeveloperApi , Experimental }
2527import org .apache .spark .deploy .SparkHadoopUtil
2628import org .apache .spark .rdd .RDD
29+ import org .apache .spark .sql ._
2730import org .apache .spark .sql .catalyst .expressions .{Attribute , Expression }
2831import org .apache .spark .sql .types .{StructField , StructType }
29- import org .apache .spark .sql ._
3032
3133/**
3234 * ::DeveloperApi::
@@ -87,7 +89,7 @@ trait SchemaRelationProvider {
8789 * ::DeveloperApi::
8890 * Implemented by objects that produce relations for a specific kind of data source
8991 * with a given schema and partitioned columns. When Spark SQL is given a DDL operation with a
90- * USING clause specified (to specify the implemented SchemaRelationProvider ), a user defined
92+ * USING clause specified (to specify the implemented [[ FSBasedRelationProvider ]] ), a user defined
9193 * schema, and an optional list of partition columns, this interface is used to pass in the
9294 * parameters specified by a user.
9395 *
@@ -114,7 +116,7 @@ trait FSBasedRelationProvider {
114116 sqlContext : SQLContext ,
115117 schema : Option [StructType ],
116118 partitionColumns : Option [StructType ],
117- parameters : Map [String , String ]): BaseRelation
119+ parameters : Map [String , String ]): FSBasedRelation
118120}
119121
120122@ DeveloperApi
@@ -282,12 +284,13 @@ abstract class OutputWriter {
282284 * Closes the [[OutputWriter ]]. Invoked on the executor side after all rows are persisted, before
283285 * the task output is committed.
284286 */
285- def close (): Unit = ()
287+ def close (): Unit
286288}
287289
288290/**
289291 * ::Experimental::
290- * A [[BaseRelation ]] that abstracts file system based data sources.
292+ * A [[BaseRelation ]] that provides much of the common code required for formats that store their
293+ * data to an HDFS compatible filesystem.
291294 *
292295 * For the read path, similar to [[PrunedFilteredScan ]], it can eliminate unneeded columns and
293296 * filter using selected predicates before producing an RDD containing all matching tuples as
@@ -338,16 +341,13 @@ abstract class FSBasedRelation private[sql](
338341 private var _partitionSpec : PartitionSpec = maybePartitionSpec.map { spec =>
339342 spec.copy(partitionColumns = spec.partitionColumns.asNullable)
340343 }.getOrElse {
341- if (partitionDiscoverEnabled ()) {
344+ if (sqlContext.conf.partitionDiscoveryEnabled ()) {
342345 discoverPartitions()
343346 } else {
344347 PartitionSpec (StructType (Nil ), Array .empty[Partition ])
345348 }
346349 }
347350
348- private def partitionDiscoverEnabled () =
349- sqlContext.conf.getConf(SQLConf .PARTITION_DISCOVERY_ENABLED , " true" ).toBoolean
350-
351351 private [sql] def partitionSpec : PartitionSpec = _partitionSpec
352352
353353 /**
@@ -356,7 +356,7 @@ abstract class FSBasedRelation private[sql](
356356 def partitionColumns : StructType = partitionSpec.partitionColumns
357357
358358 private [sql] def refresh (): Unit = {
359- if (partitionDiscoverEnabled ()) {
359+ if (sqlContext.conf.partitionDiscoveryEnabled ()) {
360360 _partitionSpec = discoverPartitions()
361361 }
362362 }
@@ -365,11 +365,10 @@ abstract class FSBasedRelation private[sql](
365365 val basePaths = paths.map(new Path (_))
366366 val leafDirs = basePaths.flatMap { path =>
367367 val fs = path.getFileSystem(hadoopConf)
368- if (fs.exists(path)) {
369- SparkHadoopUtil .get.listLeafDirStatuses(fs, fs.makeQualified(path))
370- } else {
371- Seq .empty[FileStatus ]
372- }
368+ Try (fs.getFileStatus(path.makeQualified(fs.getUri, fs.getWorkingDirectory)))
369+ .filter(_.isDir)
370+ .map(SparkHadoopUtil .get.listLeafDirStatuses(fs, _))
371+ .getOrElse(Seq .empty[FileStatus ])
373372 }.map(_.getPath)
374373
375374 if (leafDirs.nonEmpty) {
0 commit comments