@@ -19,10 +19,9 @@ package org.apache.spark.sql.sources
1919
2020import scala .language .implicitConversions
2121import scala .util .parsing .combinator .syntactical .StandardTokenParsers
22- import scala .util .parsing .combinator .{ RegexParsers , PackratParsers }
22+ import scala .util .parsing .combinator .PackratParsers
2323
2424import org .apache .spark .Logging
25- import org .apache .spark .annotation .DeveloperApi
2625import org .apache .spark .sql .SQLContext
2726import org .apache .spark .sql .catalyst .types ._
2827import org .apache .spark .sql .execution .RunnableCommand
@@ -44,18 +43,43 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi
4443 }
4544 }
4645
46+ def parseType (input : String ): DataType = {
47+ phrase(dataType)(new lexical.Scanner (input)) match {
48+ case Success (r, x) => r
49+ case x =>
50+ sys.error(s " Unsupported dataType: $x" )
51+ }
52+ }
53+
4754 protected case class Keyword (str : String )
4855
4956 protected implicit def asParser (k : Keyword ): Parser [String ] =
5057 lexical.allCaseVersions(k.str).map(x => x : Parser [String ]).reduce(_ | _)
5158
5259 protected val CREATE = Keyword (" CREATE" )
53- protected val DECIMAL = Keyword (" DECIMAL" )
5460 protected val TEMPORARY = Keyword (" TEMPORARY" )
5561 protected val TABLE = Keyword (" TABLE" )
5662 protected val USING = Keyword (" USING" )
5763 protected val OPTIONS = Keyword (" OPTIONS" )
5864
65+ // Data types.
66+ protected val STRING = Keyword (" STRING" )
67+ protected val FLOAT = Keyword (" FLOAT" )
68+ protected val INT = Keyword (" INT" )
69+ protected val TINYINT = Keyword (" TINYINT" )
70+ protected val SMALLINT = Keyword (" SMALLINT" )
71+ protected val DOUBLE = Keyword (" DOUBLE" )
72+ protected val BIGINT = Keyword (" BIGINT" )
73+ protected val BINARY = Keyword (" BINARY" )
74+ protected val BOOLEAN = Keyword (" BOOLEAN" )
75+ protected val DECIMAL = Keyword (" DECIMAL" )
76+ protected val DATE = Keyword (" DATE" )
77+ protected val TIMESTAMP = Keyword (" TIMESTAMP" )
78+ protected val VARCHAR = Keyword (" VARCHAR" )
79+ protected val ARRAY = Keyword (" ARRAY" )
80+ protected val MAP = Keyword (" MAP" )
81+ protected val STRUCT = Keyword (" STRUCT" )
82+
5983 // Use reflection to find the reserved words defined in this class.
6084 protected val reservedWords =
6185 this .getClass
@@ -77,20 +101,15 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi
77101 * OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")`
78102 */
79103 protected lazy val createTable : Parser [LogicalPlan ] =
80- ( CREATE ~ TEMPORARY ~ TABLE ~> ident ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
81- case tableName ~ provider ~ opts =>
82- CreateTableUsing (tableName, Seq .empty, provider, opts)
83- }
84- |
104+ (
85105 CREATE ~ TEMPORARY ~ TABLE ~> ident
86- ~ tableCols ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
87- case tableName ~ tableColumns ~ provider ~ opts =>
88- CreateTableUsing (tableName, tableColumns, provider, opts)
106+ ~ (tableCols).? ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
107+ case tableName ~ columns ~ provider ~ opts =>
108+ val tblColumns = if (columns.isEmpty) Seq .empty else columns.get
109+ CreateTableUsing (tableName, tblColumns, provider, opts)
89110 }
90111 )
91112
92- protected lazy val metastoreTypes = new MetastoreTypes
93-
94113 protected lazy val tableCols : Parser [Seq [StructField ]] = " (" ~> repsep(column, " ," ) <~ " )"
95114
96115 protected lazy val options : Parser [Map [String , String ]] =
@@ -101,96 +120,62 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi
101120 protected lazy val pair : Parser [(String , String )] = ident ~ stringLit ^^ { case k ~ v => (k,v) }
102121
103122 protected lazy val column : Parser [StructField ] =
104- ( ident ~ ident ^^ { case name ~ typ =>
105- StructField (name, metastoreTypes.toDataType( typ) )
123+ ident ~ dataType ^^ { case columnName ~ typ =>
124+ StructField (cleanIdentifier(columnName), typ)
106125 }
107- |
108- ident ~ (DECIMAL ~ " (" ~> numericLit) ~ (" ," ~> numericLit <~ " )" ) ^^ {
109- case name ~ precision ~ scale =>
110- StructField (name, DecimalType (precision.toInt, scale.toInt))
111- }
112- )
113- }
114126
115- /**
116- * :: DeveloperApi ::
117- * Provides a parser for data types.
118- */
119- @ DeveloperApi
120- private [sql] class MetastoreTypes extends RegexParsers {
121127 protected lazy val primitiveType : Parser [DataType ] =
122- " string " ^^^ StringType |
123- " float " ^^^ FloatType |
124- " int " ^^^ IntegerType |
125- " tinyint " ^^^ ByteType |
126- " smallint " ^^^ ShortType |
127- " double " ^^^ DoubleType |
128- " bigint " ^^^ LongType |
129- " binary " ^^^ BinaryType |
130- " boolean " ^^^ BooleanType |
131- fixedDecimalType | // decimal with precision/scale
132- " decimal " ^^^ DecimalType .Unlimited | // decimal with no precision/scale
133- " date " ^^^ DateType |
134- " timestamp " ^^^ TimestampType |
135- " varchar \\ (( \\ d+) \\ ) " .r ^^^ StringType
128+ STRING ^^^ StringType |
129+ BINARY ^^^ BinaryType |
130+ BOOLEAN ^^^ BooleanType |
131+ TINYINT ^^^ ByteType |
132+ SMALLINT ^^^ ShortType |
133+ INT ^^^ IntegerType |
134+ BIGINT ^^^ LongType |
135+ FLOAT ^^^ FloatType |
136+ DOUBLE ^^^ DoubleType |
137+ fixedDecimalType | // decimal with precision/scale
138+ DECIMAL ^^^ DecimalType .Unlimited | // decimal with no precision/scale
139+ DATE ^^^ DateType |
140+ TIMESTAMP ^^^ TimestampType |
141+ VARCHAR ~ " ( " ~ numericLit ~ " ) " ^^^ StringType
136142
137143 protected lazy val fixedDecimalType : Parser [DataType ] =
138- (" decimal" ~> " (" ~> " \\ d+" .r) ~ (" ," ~> " \\ d+" .r <~ " )" ) ^^ {
139- case precision ~ scale =>
140- DecimalType (precision.toInt, scale.toInt)
144+ (DECIMAL ~ " (" ~> numericLit) ~ (" ," ~> numericLit <~ " )" ) ^^ {
145+ case precision ~ scale => DecimalType (precision.toInt, scale.toInt)
141146 }
142147
143148 protected lazy val arrayType : Parser [DataType ] =
144- " array " ~> " <" ~> dataType <~ " >" ^^ {
149+ ARRAY ~> " <" ~> dataType <~ " >" ^^ {
145150 case tpe => ArrayType (tpe)
146151 }
147152
148153 protected lazy val mapType : Parser [DataType ] =
149- " map " ~> " <" ~> dataType ~ " ," ~ dataType <~ " >" ^^ {
154+ MAP ~> " <" ~> dataType ~ " ," ~ dataType <~ " >" ^^ {
150155 case t1 ~ _ ~ t2 => MapType (t1, t2)
151156 }
152157
153158 protected lazy val structField : Parser [StructField ] =
154- " [a-zA-Z0-9_]* " .r ~ " :" ~ dataType ^^ {
155- case name ~ _ ~ tpe => StructField (name , tpe, nullable = true )
159+ ident ~ " :" ~ dataType ^^ {
160+ case fieldName ~ _ ~ tpe => StructField (cleanIdentifier(fieldName) , tpe, nullable = true )
156161 }
157162
158163 protected lazy val structType : Parser [DataType ] =
159- " struct " ~> " <" ~> repsep(structField," ," ) <~ " >" ^^ {
164+ STRUCT ~> " <" ~> repsep(structField, " ," ) <~ " >" ^^ {
160165 case fields => new StructType (fields)
161166 }
162167
163168 private [sql] lazy val dataType : Parser [DataType ] =
164169 arrayType |
165- mapType |
166- structType |
167- primitiveType
168-
169- def toDataType (metastoreType : String ): DataType = parseAll(dataType, metastoreType) match {
170- case Success (result, _) => result
171- case failure : NoSuccess => sys.error(s " Unsupported dataType: $metastoreType" )
172- }
173-
174- def toMetastoreType (dt : DataType ): String = dt match {
175- case ArrayType (elementType, _) => s " array< ${toMetastoreType(elementType)}> "
176- case StructType (fields) =>
177- s " struct< ${fields.map(f => s " ${f.name}: ${toMetastoreType(f.dataType)}" ).mkString(" ," )}> "
178- case MapType (keyType, valueType, _) =>
179- s " map< ${toMetastoreType(keyType)}, ${toMetastoreType(valueType)}> "
180- case StringType => " string"
181- case FloatType => " float"
182- case IntegerType => " int"
183- case ByteType => " tinyint"
184- case ShortType => " smallint"
185- case DoubleType => " double"
186- case LongType => " bigint"
187- case BinaryType => " binary"
188- case BooleanType => " boolean"
189- case DateType => " date"
190- case d : DecimalType => " decimal"
191- case TimestampType => " timestamp"
192- case NullType => " void"
193- case udt : UserDefinedType [_] => toMetastoreType(udt.sqlType)
170+ mapType |
171+ structType |
172+ primitiveType
173+
174+ protected val escapedIdentifier = " `([^`]+)`" .r
175+ /** Strips backticks from ident if present */
176+ protected def cleanIdentifier (ident : String ): String = ident match {
177+ case escapedIdentifier(i) => i
178+ case plainIdent => plainIdent
194179 }
195180}
196181
0 commit comments