diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index e5ca7e9d10d59..6488ad9cd34c9 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -127,7 +127,7 @@ By default `spark.sql.ansi.enabled` is false.
Below is a list of all the keywords in Spark SQL.
-|Keyword|Spark SQL
ANSI Mode|Spark SQL
Default Mode|SQL-2011|
+|Keyword|Spark SQL
ANSI Mode|Spark SQL
Default Mode|SQL-2016|
|-------|----------------------|-------------------------|--------|
|ADD|non-reserved|non-reserved|non-reserved|
|AFTER|non-reserved|non-reserved|non-reserved|
@@ -149,7 +149,7 @@ Below is a list of all the keywords in Spark SQL.
|BUCKETS|non-reserved|non-reserved|non-reserved|
|BY|non-reserved|non-reserved|reserved|
|CACHE|non-reserved|non-reserved|non-reserved|
-|CASCADE|non-reserved|non-reserved|reserved|
+|CASCADE|non-reserved|non-reserved|non-reserved|
|CASE|reserved|non-reserved|reserved|
|CAST|reserved|non-reserved|reserved|
|CHANGE|non-reserved|non-reserved|non-reserved|
@@ -193,7 +193,7 @@ Below is a list of all the keywords in Spark SQL.
|DIRECTORY|non-reserved|non-reserved|non-reserved|
|DISTINCT|reserved|non-reserved|reserved|
|DISTRIBUTE|non-reserved|non-reserved|non-reserved|
-|DIV|non-reserved|non-reserved|non-reserved|
+|DIV|non-reserved|non-reserved|not a keyword|
|DROP|non-reserved|non-reserved|reserved|
|ELSE|reserved|non-reserved|reserved|
|END|reserved|non-reserved|reserved|
@@ -228,7 +228,7 @@ Below is a list of all the keywords in Spark SQL.
|GROUPING|non-reserved|non-reserved|reserved|
|HAVING|reserved|non-reserved|reserved|
|HOUR|reserved|non-reserved|reserved|
-|IF|non-reserved|non-reserved|reserved|
+|IF|non-reserved|non-reserved|not a keyword|
|IGNORE|non-reserved|non-reserved|non-reserved|
|IMPORT|non-reserved|non-reserved|non-reserved|
|IN|reserved|non-reserved|reserved|
@@ -302,12 +302,14 @@ Below is a list of all the keywords in Spark SQL.
|PROPERTIES|non-reserved|non-reserved|non-reserved|
|PURGE|non-reserved|non-reserved|non-reserved|
|QUERY|non-reserved|non-reserved|non-reserved|
+|RANGE|non-reserved|non-reserved|reserved|
|RECORDREADER|non-reserved|non-reserved|non-reserved|
|RECORDWRITER|non-reserved|non-reserved|non-reserved|
|RECOVER|non-reserved|non-reserved|non-reserved|
|REDUCE|non-reserved|non-reserved|non-reserved|
|REFERENCES|reserved|non-reserved|reserved|
|REFRESH|non-reserved|non-reserved|non-reserved|
+|REGEXP|non-reserved|non-reserved|not a keyword|
|RENAME|non-reserved|non-reserved|non-reserved|
|REPAIR|non-reserved|non-reserved|non-reserved|
|REPLACE|non-reserved|non-reserved|non-reserved|
@@ -323,6 +325,7 @@ Below is a list of all the keywords in Spark SQL.
|ROW|non-reserved|non-reserved|reserved|
|ROWS|non-reserved|non-reserved|reserved|
|SCHEMA|non-reserved|non-reserved|non-reserved|
+|SCHEMAS|non-reserved|non-reserved|not a keyword|
|SECOND|reserved|non-reserved|reserved|
|SELECT|reserved|non-reserved|reserved|
|SEMI|non-reserved|strict-non-reserved|non-reserved|
@@ -348,6 +351,7 @@ Below is a list of all the keywords in Spark SQL.
|TABLES|non-reserved|non-reserved|non-reserved|
|TABLESAMPLE|non-reserved|non-reserved|reserved|
|TBLPROPERTIES|non-reserved|non-reserved|non-reserved|
+|TEMP|non-reserved|non-reserved|not a keyword|
|TEMPORARY|non-reserved|non-reserved|non-reserved|
|TERMINATED|non-reserved|non-reserved|non-reserved|
|THEN|reserved|non-reserved|reserved|
@@ -360,6 +364,7 @@ Below is a list of all the keywords in Spark SQL.
|TRIM|non-reserved|non-reserved|non-reserved|
|TRUE|non-reserved|non-reserved|reserved|
|TRUNCATE|non-reserved|non-reserved|reserved|
+|TYPE|non-reserved|non-reserved|non-reserved|
|UNARCHIVE|non-reserved|non-reserved|non-reserved|
|UNBOUNDED|non-reserved|non-reserved|non-reserved|
|UNCACHE|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b383e037e1ed8..66dde85af467d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -1531,6 +1531,7 @@ DIRECTORIES: 'DIRECTORIES';
DIRECTORY: 'DIRECTORY';
DISTINCT: 'DISTINCT';
DISTRIBUTE: 'DISTRIBUTE';
+DIV: 'DIV';
DROP: 'DROP';
ELSE: 'ELSE';
END: 'END';
@@ -1738,7 +1739,6 @@ MINUS: '-';
ASTERISK: '*';
SLASH: '/';
PERCENT: '%';
-DIV: 'DIV';
TILDE: '~';
AMPERSAND: '&';
PIPE: '|';
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
new file mode 100644
index 0000000000000..3d41d02b23df5
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import java.io.File
+import java.nio.file.Files
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.fileToString
+
+trait SQLKeywordUtils extends SQLHelper {
+
+ val sqlSyntaxDefs = {
+ val sqlBasePath = {
+ java.nio.file.Paths.get(sparkHome, "sql", "catalyst", "src", "main", "antlr4", "org",
+ "apache", "spark", "sql", "catalyst", "parser", "SqlBase.g4").toFile
+ }
+ fileToString(sqlBasePath).split("\n")
+ }
+
+ // each element is an array of 4 string: the keyword name, reserve or not in Spark ANSI mode,
+ // Spark non-ANSI mode, and the SQL standard.
+ val keywordsInDoc: Array[Array[String]] = {
+ val docPath = {
+ java.nio.file.Paths.get(sparkHome, "docs", "sql-ref-ansi-compliance.md").toFile
+ }
+ fileToString(docPath).split("\n")
+ .dropWhile(!_.startsWith("|Keyword|")).drop(2).takeWhile(_.startsWith("|"))
+ .map(_.stripPrefix("|").split("\\|").map(_.trim))
+ }
+
+ private def parseAntlrGrammars[T](startTag: String, endTag: String)
+ (f: PartialFunction[String, Seq[T]]): Set[T] = {
+ val keywords = new mutable.ArrayBuffer[T]
+ val default = (_: String) => Nil
+ var startTagFound = false
+ var parseFinished = false
+ val lineIter = sqlSyntaxDefs.toIterator
+ while (!parseFinished && lineIter.hasNext) {
+ val line = lineIter.next()
+ if (line.trim.startsWith(startTag)) {
+ startTagFound = true
+ } else if (line.trim.startsWith(endTag)) {
+ parseFinished = true
+ } else if (startTagFound) {
+ f.applyOrElse(line, default).foreach { symbol =>
+ keywords += symbol
+ }
+ }
+ }
+ assert(keywords.nonEmpty && startTagFound && parseFinished, "cannot extract keywords from " +
+ s"the `SqlBase.g4` file, so please check if the start/end tags (`$startTag` and `$endTag`) " +
+ "are placed correctly in the file.")
+ keywords.toSet
+ }
+
+ // If a symbol does not have the same string with its literal (e.g., `SETMINUS: 'MINUS';`),
+ // we need to map a symbol to actual literal strings.
+ val symbolsToExpandIntoDifferentLiterals = {
+ val kwDef = """([A-Z_]+):(.+);""".r
+ val keywords = parseAntlrGrammars(
+ "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") {
+ case kwDef(symbol, literalDef) =>
+ val splitDefs = literalDef.split("""\|""")
+ val hasMultipleLiterals = splitDefs.length > 1
+ // The case where a symbol has multiple literal definitions,
+ // e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`.
+ if (hasMultipleLiterals) {
+ // Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';`
+ val litDef = """([A-Z_]+)""".r
+ val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap {
+ case litDef(lit) => Some(lit)
+ case _ => None
+ }
+ (symbol, literals) :: Nil
+ } else {
+ val literal = literalDef.replaceAll("'", "").trim
+ // The case where a symbol string and its literal string are different,
+ // e.g., `SETMINUS: 'MINUS';`.
+ if (symbol != literal) {
+ (symbol, literal :: Nil) :: Nil
+ } else {
+ Nil
+ }
+ }
+ }
+ keywords.toMap
+ }
+
+ // All the SQL keywords defined in `SqlBase.g4`
+ val allCandidateKeywords: Set[String] = {
+ val kwDef = """([A-Z_]+):.+;""".r
+ parseAntlrGrammars(
+ "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") {
+ // Parses a pattern, e.g., `AFTER: 'AFTER';`
+ case kwDef(symbol) =>
+ if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) {
+ symbolsToExpandIntoDifferentLiterals(symbol)
+ } else {
+ symbol :: Nil
+ }
+ }
+ }
+
+ val nonReservedKeywordsInAnsiMode: Set[String] = {
+ val kwDef = """\s*[\|:]\s*([A-Z_]+)\s*""".r
+ parseAntlrGrammars("//--ANSI-NON-RESERVED-START", "//--ANSI-NON-RESERVED-END") {
+ // Parses a pattern, e.g., ` | AFTER`
+ case kwDef(symbol) =>
+ if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) {
+ symbolsToExpandIntoDifferentLiterals(symbol)
+ } else {
+ symbol :: Nil
+ }
+ }
+ }
+
+ val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode
+}
+
+class SQLKeywordSuite extends SparkFunSuite with SQLKeywordUtils {
+ test("all keywords are documented") {
+ val documentedKeywords = keywordsInDoc.map(_.head).toSet
+ if (allCandidateKeywords != documentedKeywords) {
+ val undocumented = (allCandidateKeywords -- documentedKeywords).toSeq.sorted
+ fail("Some keywords are not documented: " + undocumented.mkString(", "))
+ }
+ }
+
+ test("Spark keywords are documented correctly") {
+ val reservedKeywordsInDoc = keywordsInDoc.filter(_.apply(1) == "reserved").map(_.head).toSet
+ if (reservedKeywordsInAnsiMode != reservedKeywordsInDoc) {
+ val misImplemented = (reservedKeywordsInDoc -- reservedKeywordsInAnsiMode).toSeq.sorted
+ fail("Some keywords are documented as reserved but actually not: " +
+ misImplemented.mkString(", "))
+ }
+ }
+
+ test("SQL 2016 keywords are documented correctly") {
+ withTempDir { dir =>
+ val tmpFile = new File(dir, "tmp")
+ val is = Thread.currentThread().getContextClassLoader
+ .getResourceAsStream("ansi-sql-2016-reserved-keywords.txt")
+ Files.copy(is, tmpFile.toPath)
+ val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath)
+ .asScala.filterNot(_.startsWith("--")).map(_.trim).toSet
+ val documented = keywordsInDoc.filter(_.last == "reserved").map(_.head).toSet
+ assert((documented -- reservedKeywordsInSql2016).isEmpty)
+ }
+ }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index a721e17aef02d..f037ce7b9e793 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -16,19 +16,11 @@
*/
package org.apache.spark.sql.catalyst.parser
-import java.io.File
-import java.nio.file.Files
-
-import scala.collection.JavaConverters._
-import scala.collection.mutable
-
import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util.fileToString
+import org.apache.spark.sql.catalyst.{SQLKeywordUtils, TableIdentifier}
import org.apache.spark.sql.internal.SQLConf
-class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
+class TableIdentifierParserSuite extends SparkFunSuite with SQLKeywordUtils {
import CatalystSqlParser._
// Add "$elem$", "$value$" & "$key$"
@@ -292,121 +284,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
"where",
"with")
- private val sqlSyntaxDefs = {
- val sqlBasePath = {
- java.nio.file.Paths.get(sparkHome, "sql", "catalyst", "src", "main", "antlr4", "org",
- "apache", "spark", "sql", "catalyst", "parser", "SqlBase.g4").toFile
- }
- fileToString(sqlBasePath).split("\n")
- }
-
- private def parseAntlrGrammars[T](startTag: String, endTag: String)
- (f: PartialFunction[String, Seq[T]]): Set[T] = {
- val keywords = new mutable.ArrayBuffer[T]
- val default = (_: String) => Nil
- var startTagFound = false
- var parseFinished = false
- val lineIter = sqlSyntaxDefs.toIterator
- while (!parseFinished && lineIter.hasNext) {
- val line = lineIter.next()
- if (line.trim.startsWith(startTag)) {
- startTagFound = true
- } else if (line.trim.startsWith(endTag)) {
- parseFinished = true
- } else if (startTagFound) {
- f.applyOrElse(line, default).foreach { symbol =>
- keywords += symbol
- }
- }
- }
- assert(keywords.nonEmpty && startTagFound && parseFinished, "cannot extract keywords from " +
- s"the `SqlBase.g4` file, so please check if the start/end tags (`$startTag` and `$endTag`) " +
- "are placed correctly in the file.")
- keywords.toSet
- }
-
- // If a symbol does not have the same string with its literal (e.g., `SETMINUS: 'MINUS';`),
- // we need to map a symbol to actual literal strings.
- val symbolsToExpandIntoDifferentLiterals = {
- val kwDef = """([A-Z_]+):(.+);""".r
- val keywords = parseAntlrGrammars(
- "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") {
- case kwDef(symbol, literalDef) =>
- val splitDefs = literalDef.split("""\|""")
- val hasMultipleLiterals = splitDefs.length > 1
- // The case where a symbol has multiple literal definitions,
- // e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`.
- if (hasMultipleLiterals) {
- // Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';`
- val litDef = """([A-Z_]+)""".r
- val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap {
- case litDef(lit) => Some(lit)
- case _ => None
- }
- (symbol, literals) :: Nil
- } else {
- val literal = literalDef.replaceAll("'", "").trim
- // The case where a symbol string and its literal string are different,
- // e.g., `SETMINUS: 'MINUS';`.
- if (symbol != literal) {
- (symbol, literal :: Nil) :: Nil
- } else {
- Nil
- }
- }
- }
- keywords.toMap
- }
-
- // All the SQL keywords defined in `SqlBase.g4`
- val allCandidateKeywords = {
- val kwDef = """([A-Z_]+):.+;""".r
- val keywords = parseAntlrGrammars(
- "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") {
- // Parses a pattern, e.g., `AFTER: 'AFTER';`
- case kwDef(symbol) =>
- if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) {
- symbolsToExpandIntoDifferentLiterals(symbol)
- } else {
- symbol :: Nil
- }
- }
- keywords
- }
-
- val nonReservedKeywordsInAnsiMode = {
- val kwDef = """\s*[\|:]\s*([A-Z_]+)\s*""".r
- parseAntlrGrammars("//--ANSI-NON-RESERVED-START", "//--ANSI-NON-RESERVED-END") {
- // Parses a pattern, e.g., ` | AFTER`
- case kwDef(symbol) =>
- if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) {
- symbolsToExpandIntoDifferentLiterals(symbol)
- } else {
- symbol :: Nil
- }
- }
- }
-
- val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode
-
- test("check # of reserved keywords") {
- val numReservedKeywords = 74
- assert(reservedKeywordsInAnsiMode.size == numReservedKeywords,
- s"The expected number of reserved keywords is $numReservedKeywords, but " +
- s"${reservedKeywordsInAnsiMode.size} found.")
- }
-
- test("reserved keywords in Spark are also reserved in SQL 2016") {
- withTempDir { dir =>
- val tmpFile = new File(dir, "tmp")
- val is = Thread.currentThread().getContextClassLoader
- .getResourceAsStream("ansi-sql-2016-reserved-keywords.txt")
- Files.copy(is, tmpFile.toPath)
- val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath)
- .asScala.filterNot(_.startsWith("--")).map(_.trim).toSet
- assert((reservedKeywordsInAnsiMode -- reservedKeywordsInSql2016).isEmpty)
- }
- }
test("table identifier") {
// Regular names.