@@ -20,7 +20,10 @@ package org.apache.spark.sql.sources
2020import scala .language .{existentials , implicitConversions }
2121import scala .util .matching .Regex
2222
23+ import org .apache .hadoop .fs .Path
24+
2325import org .apache .spark .Logging
26+ import org .apache .spark .deploy .SparkHadoopUtil
2427import org .apache .spark .sql .catalyst .AbstractSparkSQLParser
2528import org .apache .spark .sql .catalyst .analysis .UnresolvedRelation
2629import org .apache .spark .sql .catalyst .expressions .{Attribute , AttributeReference , Row }
@@ -157,7 +160,7 @@ private[sql] class DDLParser(
157160 protected lazy val className : Parser [String ] = repsep(ident, " ." ) ^^ { case s => s.mkString(" ." )}
158161
159162 override implicit def regexToParser (regex : Regex ): Parser [String ] = acceptMatch(
160- s " identifier matching regex ${ regex} " , {
163+ s " identifier matching regex $regex" , {
161164 case lexical.Identifier (str) if regex.unapplySeq(str).isDefined => str
162165 case lexical.Keyword (str) if regex.unapplySeq(str).isDefined => str
163166 }
@@ -230,11 +233,18 @@ private[sql] object ResolvedDataSource {
230233 Some (partitionColumnsSchema(schema, partitionColumns))
231234 }
232235
236+ val caseInsensitiveOptions = new CaseInsensitiveMap (options)
237+ val paths = {
238+ val patternPath = new Path (caseInsensitiveOptions(" path" ))
239+ SparkHadoopUtil .get.globPath(patternPath).map(_.toString).toArray
240+ }
241+
233242 dataSource.createRelation(
234243 sqlContext,
244+ paths,
235245 Some (schema),
236246 maybePartitionsSchema,
237- new CaseInsensitiveMap (options) )
247+ caseInsensitiveOptions )
238248 case dataSource : org.apache.spark.sql.sources.RelationProvider =>
239249 throw new AnalysisException (s " $className does not allow user-specified schemas. " )
240250 case _ =>
@@ -245,7 +255,12 @@ private[sql] object ResolvedDataSource {
245255 case dataSource : RelationProvider =>
246256 dataSource.createRelation(sqlContext, new CaseInsensitiveMap (options))
247257 case dataSource : FSBasedRelationProvider =>
248- dataSource.createRelation(sqlContext, None , None , new CaseInsensitiveMap (options))
258+ val caseInsensitiveOptions = new CaseInsensitiveMap (options)
259+ val paths = {
260+ val patternPath = new Path (caseInsensitiveOptions(" path" ))
261+ SparkHadoopUtil .get.globPath(patternPath).map(_.toString).toArray
262+ }
263+ dataSource.createRelation(sqlContext, paths, None , None , caseInsensitiveOptions)
249264 case dataSource : org.apache.spark.sql.sources.SchemaRelationProvider =>
250265 throw new AnalysisException (
251266 s " A schema needs to be specified when using $className. " )
@@ -280,11 +295,22 @@ private[sql] object ResolvedDataSource {
280295 case dataSource : CreatableRelationProvider =>
281296 dataSource.createRelation(sqlContext, mode, options, data)
282297 case dataSource : FSBasedRelationProvider =>
298+ // Don't glob path for the write path. The contracts here are:
299+ // 1. Only one output path can be specified on the write path;
300+ // 2. Output path must be a legal HDFS style file system path;
301+ // 3. It's OK that the output path doesn't exist yet;
302+ val caseInsensitiveOptions = new CaseInsensitiveMap (options)
303+ val outputPath = {
304+ val path = new Path (caseInsensitiveOptions(" path" ))
305+ val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
306+ path.makeQualified(fs.getUri, fs.getWorkingDirectory)
307+ }
283308 val r = dataSource.createRelation(
284309 sqlContext,
310+ Array (outputPath.toString),
285311 Some (data.schema),
286312 Some (partitionColumnsSchema(data.schema, partitionColumns)),
287- options )
313+ caseInsensitiveOptions )
288314 sqlContext.executePlan(
289315 InsertIntoFSBasedRelation (
290316 r,
0 commit comments