apache · lxsmnv · Feb 20, 2017 · Feb 20, 2017 · Feb 20, 2017 · Feb 21, 2017
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -376,25 +376,7 @@ case class DataSource(
 
         val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format)
 
-        val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
-            catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
-          val defaultTableSize = sparkSession.sessionState.conf.defaultSizeInBytes
-          new CatalogFileIndex(
-            sparkSession,
-            catalogTable.get,
-            catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(defaultTableSize))
-        } else {
-          new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(partitionSchema))
-        }
-
-        HadoopFsRelation(
-          fileCatalog,
-          partitionSchema = partitionSchema,
-          dataSchema = dataSchema.asNullable,
-          bucketSpec = bucketSpec,
-          format,
-          caseInsensitiveOptions)(sparkSession)
-
+        createHadoopRelation(format, globbedPaths)
       case _ =>
         throw new AnalysisException(
           s"$className is not a valid Spark SQL Data Source.")
@@ -403,6 +385,35 @@ case class DataSource(
     relation
   }
 
+  /**
+    * Creates Hadoop relation based on format and globbed file paths
+    * @param format format of the data source file
+    * @param globPaths Path to the file resolved by Hadoop library
+    * @return Hadoop relation object
+    */
+  def createHadoopRelation(format: FileFormat,
+                           globPaths: Array[Path]): BaseRelation = {
+    val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format)
+    val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
+      catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
+      val defaultTableSize = sparkSession.sessionState.conf.defaultSizeInBytes
+      new CatalogFileIndex(
+        sparkSession,
+        catalogTable.get,
+        catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(defaultTableSize))
+    } else {
+      new InMemoryFileIndex(sparkSession, globPaths, options, Some(partitionSchema))
+    }
+
+    HadoopFsRelation(
+      fileCatalog,
+      partitionSchema = partitionSchema,
+      dataSchema = dataSchema.asNullable,
+      bucketSpec = bucketSpec,
+      format,
+      caseInsensitiveOptions)(sparkSession)
+  }
+
   /**
    * Writes the given [[DataFrame]] out in this [[FileFormat]].
    */

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -56,8 +56,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
     require(files.nonEmpty, "Cannot infer schema from an empty set of files")
 
     val csvOptions = new CSVOptions(options, sparkSession.sessionState.conf.sessionLocalTimeZone)
-    val paths = files.map(_.getPath.toString)
-    val lines: Dataset[String] = createBaseDataset(sparkSession, csvOptions, paths)
+    val lines: Dataset[String] = createBaseDataset(sparkSession, csvOptions, files)
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     Some(CSVInferSchema.infer(lines, caseSensitive, csvOptions))
   }
@@ -128,14 +127,16 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
   private def createBaseDataset(
       sparkSession: SparkSession,
       options: CSVOptions,
-      inputPaths: Seq[String]): Dataset[String] = {
+      inputPaths: Seq[FileStatus]): Dataset[String] = {
     if (Charset.forName(options.charset) == StandardCharsets.UTF_8) {
+      // Fix for SPARK-19340. resolveRelation replaces with createHadoopRelation
+      // to avoid pattern resolution for already resolved file path
       sparkSession.baseRelationToDataFrame(
         DataSource.apply(
           sparkSession,
-          paths = inputPaths,
+          paths = inputPaths.map(_.getPath().toString),
           className = classOf[TextFileFormat].getName
-        ).resolveRelation(checkFilesExist = false))
+        ).createHadoopRelation(new TextFileFormat, inputPaths.map(_.getPath).toArray))
         .select("value").as[String](Encoders.STRING)
     } else {
       val charset = options.charset