diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index e5ca7e9d10d59..6488ad9cd34c9 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -127,7 +127,7 @@ By default `spark.sql.ansi.enabled` is false. Below is a list of all the keywords in Spark SQL. -|Keyword|Spark SQL
ANSI Mode|Spark SQL
Default Mode|SQL-2011| +|Keyword|Spark SQL
ANSI Mode|Spark SQL
Default Mode|SQL-2016| |-------|----------------------|-------------------------|--------| |ADD|non-reserved|non-reserved|non-reserved| |AFTER|non-reserved|non-reserved|non-reserved| @@ -149,7 +149,7 @@ Below is a list of all the keywords in Spark SQL. |BUCKETS|non-reserved|non-reserved|non-reserved| |BY|non-reserved|non-reserved|reserved| |CACHE|non-reserved|non-reserved|non-reserved| -|CASCADE|non-reserved|non-reserved|reserved| +|CASCADE|non-reserved|non-reserved|non-reserved| |CASE|reserved|non-reserved|reserved| |CAST|reserved|non-reserved|reserved| |CHANGE|non-reserved|non-reserved|non-reserved| @@ -193,7 +193,7 @@ Below is a list of all the keywords in Spark SQL. |DIRECTORY|non-reserved|non-reserved|non-reserved| |DISTINCT|reserved|non-reserved|reserved| |DISTRIBUTE|non-reserved|non-reserved|non-reserved| -|DIV|non-reserved|non-reserved|non-reserved| +|DIV|non-reserved|non-reserved|not a keyword| |DROP|non-reserved|non-reserved|reserved| |ELSE|reserved|non-reserved|reserved| |END|reserved|non-reserved|reserved| @@ -228,7 +228,7 @@ Below is a list of all the keywords in Spark SQL. |GROUPING|non-reserved|non-reserved|reserved| |HAVING|reserved|non-reserved|reserved| |HOUR|reserved|non-reserved|reserved| -|IF|non-reserved|non-reserved|reserved| +|IF|non-reserved|non-reserved|not a keyword| |IGNORE|non-reserved|non-reserved|non-reserved| |IMPORT|non-reserved|non-reserved|non-reserved| |IN|reserved|non-reserved|reserved| @@ -302,12 +302,14 @@ Below is a list of all the keywords in Spark SQL. |PROPERTIES|non-reserved|non-reserved|non-reserved| |PURGE|non-reserved|non-reserved|non-reserved| |QUERY|non-reserved|non-reserved|non-reserved| +|RANGE|non-reserved|non-reserved|reserved| |RECORDREADER|non-reserved|non-reserved|non-reserved| |RECORDWRITER|non-reserved|non-reserved|non-reserved| |RECOVER|non-reserved|non-reserved|non-reserved| |REDUCE|non-reserved|non-reserved|non-reserved| |REFERENCES|reserved|non-reserved|reserved| |REFRESH|non-reserved|non-reserved|non-reserved| +|REGEXP|non-reserved|non-reserved|not a keyword| |RENAME|non-reserved|non-reserved|non-reserved| |REPAIR|non-reserved|non-reserved|non-reserved| |REPLACE|non-reserved|non-reserved|non-reserved| @@ -323,6 +325,7 @@ Below is a list of all the keywords in Spark SQL. |ROW|non-reserved|non-reserved|reserved| |ROWS|non-reserved|non-reserved|reserved| |SCHEMA|non-reserved|non-reserved|non-reserved| +|SCHEMAS|non-reserved|non-reserved|not a keyword| |SECOND|reserved|non-reserved|reserved| |SELECT|reserved|non-reserved|reserved| |SEMI|non-reserved|strict-non-reserved|non-reserved| @@ -348,6 +351,7 @@ Below is a list of all the keywords in Spark SQL. |TABLES|non-reserved|non-reserved|non-reserved| |TABLESAMPLE|non-reserved|non-reserved|reserved| |TBLPROPERTIES|non-reserved|non-reserved|non-reserved| +|TEMP|non-reserved|non-reserved|not a keyword| |TEMPORARY|non-reserved|non-reserved|non-reserved| |TERMINATED|non-reserved|non-reserved|non-reserved| |THEN|reserved|non-reserved|reserved| @@ -360,6 +364,7 @@ Below is a list of all the keywords in Spark SQL. |TRIM|non-reserved|non-reserved|non-reserved| |TRUE|non-reserved|non-reserved|reserved| |TRUNCATE|non-reserved|non-reserved|reserved| +|TYPE|non-reserved|non-reserved|non-reserved| |UNARCHIVE|non-reserved|non-reserved|non-reserved| |UNBOUNDED|non-reserved|non-reserved|non-reserved| |UNCACHE|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index b383e037e1ed8..66dde85af467d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1531,6 +1531,7 @@ DIRECTORIES: 'DIRECTORIES'; DIRECTORY: 'DIRECTORY'; DISTINCT: 'DISTINCT'; DISTRIBUTE: 'DISTRIBUTE'; +DIV: 'DIV'; DROP: 'DROP'; ELSE: 'ELSE'; END: 'END'; @@ -1738,7 +1739,6 @@ MINUS: '-'; ASTERISK: '*'; SLASH: '/'; PERCENT: '%'; -DIV: 'DIV'; TILDE: '~'; AMPERSAND: '&'; PIPE: '|'; diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala new file mode 100644 index 0000000000000..3d41d02b23df5 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst + +import java.io.File +import java.nio.file.Files + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.catalyst.util.fileToString + +trait SQLKeywordUtils extends SQLHelper { + + val sqlSyntaxDefs = { + val sqlBasePath = { + java.nio.file.Paths.get(sparkHome, "sql", "catalyst", "src", "main", "antlr4", "org", + "apache", "spark", "sql", "catalyst", "parser", "SqlBase.g4").toFile + } + fileToString(sqlBasePath).split("\n") + } + + // each element is an array of 4 string: the keyword name, reserve or not in Spark ANSI mode, + // Spark non-ANSI mode, and the SQL standard. + val keywordsInDoc: Array[Array[String]] = { + val docPath = { + java.nio.file.Paths.get(sparkHome, "docs", "sql-ref-ansi-compliance.md").toFile + } + fileToString(docPath).split("\n") + .dropWhile(!_.startsWith("|Keyword|")).drop(2).takeWhile(_.startsWith("|")) + .map(_.stripPrefix("|").split("\\|").map(_.trim)) + } + + private def parseAntlrGrammars[T](startTag: String, endTag: String) + (f: PartialFunction[String, Seq[T]]): Set[T] = { + val keywords = new mutable.ArrayBuffer[T] + val default = (_: String) => Nil + var startTagFound = false + var parseFinished = false + val lineIter = sqlSyntaxDefs.toIterator + while (!parseFinished && lineIter.hasNext) { + val line = lineIter.next() + if (line.trim.startsWith(startTag)) { + startTagFound = true + } else if (line.trim.startsWith(endTag)) { + parseFinished = true + } else if (startTagFound) { + f.applyOrElse(line, default).foreach { symbol => + keywords += symbol + } + } + } + assert(keywords.nonEmpty && startTagFound && parseFinished, "cannot extract keywords from " + + s"the `SqlBase.g4` file, so please check if the start/end tags (`$startTag` and `$endTag`) " + + "are placed correctly in the file.") + keywords.toSet + } + + // If a symbol does not have the same string with its literal (e.g., `SETMINUS: 'MINUS';`), + // we need to map a symbol to actual literal strings. + val symbolsToExpandIntoDifferentLiterals = { + val kwDef = """([A-Z_]+):(.+);""".r + val keywords = parseAntlrGrammars( + "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") { + case kwDef(symbol, literalDef) => + val splitDefs = literalDef.split("""\|""") + val hasMultipleLiterals = splitDefs.length > 1 + // The case where a symbol has multiple literal definitions, + // e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`. + if (hasMultipleLiterals) { + // Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';` + val litDef = """([A-Z_]+)""".r + val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap { + case litDef(lit) => Some(lit) + case _ => None + } + (symbol, literals) :: Nil + } else { + val literal = literalDef.replaceAll("'", "").trim + // The case where a symbol string and its literal string are different, + // e.g., `SETMINUS: 'MINUS';`. + if (symbol != literal) { + (symbol, literal :: Nil) :: Nil + } else { + Nil + } + } + } + keywords.toMap + } + + // All the SQL keywords defined in `SqlBase.g4` + val allCandidateKeywords: Set[String] = { + val kwDef = """([A-Z_]+):.+;""".r + parseAntlrGrammars( + "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") { + // Parses a pattern, e.g., `AFTER: 'AFTER';` + case kwDef(symbol) => + if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) { + symbolsToExpandIntoDifferentLiterals(symbol) + } else { + symbol :: Nil + } + } + } + + val nonReservedKeywordsInAnsiMode: Set[String] = { + val kwDef = """\s*[\|:]\s*([A-Z_]+)\s*""".r + parseAntlrGrammars("//--ANSI-NON-RESERVED-START", "//--ANSI-NON-RESERVED-END") { + // Parses a pattern, e.g., ` | AFTER` + case kwDef(symbol) => + if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) { + symbolsToExpandIntoDifferentLiterals(symbol) + } else { + symbol :: Nil + } + } + } + + val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode +} + +class SQLKeywordSuite extends SparkFunSuite with SQLKeywordUtils { + test("all keywords are documented") { + val documentedKeywords = keywordsInDoc.map(_.head).toSet + if (allCandidateKeywords != documentedKeywords) { + val undocumented = (allCandidateKeywords -- documentedKeywords).toSeq.sorted + fail("Some keywords are not documented: " + undocumented.mkString(", ")) + } + } + + test("Spark keywords are documented correctly") { + val reservedKeywordsInDoc = keywordsInDoc.filter(_.apply(1) == "reserved").map(_.head).toSet + if (reservedKeywordsInAnsiMode != reservedKeywordsInDoc) { + val misImplemented = (reservedKeywordsInDoc -- reservedKeywordsInAnsiMode).toSeq.sorted + fail("Some keywords are documented as reserved but actually not: " + + misImplemented.mkString(", ")) + } + } + + test("SQL 2016 keywords are documented correctly") { + withTempDir { dir => + val tmpFile = new File(dir, "tmp") + val is = Thread.currentThread().getContextClassLoader + .getResourceAsStream("ansi-sql-2016-reserved-keywords.txt") + Files.copy(is, tmpFile.toPath) + val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath) + .asScala.filterNot(_.startsWith("--")).map(_.trim).toSet + val documented = keywordsInDoc.filter(_.last == "reserved").map(_.head).toSet + assert((documented -- reservedKeywordsInSql2016).isEmpty) + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index a721e17aef02d..f037ce7b9e793 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -16,19 +16,11 @@ */ package org.apache.spark.sql.catalyst.parser -import java.io.File -import java.nio.file.Files - -import scala.collection.JavaConverters._ -import scala.collection.mutable - import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util.fileToString +import org.apache.spark.sql.catalyst.{SQLKeywordUtils, TableIdentifier} import org.apache.spark.sql.internal.SQLConf -class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { +class TableIdentifierParserSuite extends SparkFunSuite with SQLKeywordUtils { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" @@ -292,121 +284,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { "where", "with") - private val sqlSyntaxDefs = { - val sqlBasePath = { - java.nio.file.Paths.get(sparkHome, "sql", "catalyst", "src", "main", "antlr4", "org", - "apache", "spark", "sql", "catalyst", "parser", "SqlBase.g4").toFile - } - fileToString(sqlBasePath).split("\n") - } - - private def parseAntlrGrammars[T](startTag: String, endTag: String) - (f: PartialFunction[String, Seq[T]]): Set[T] = { - val keywords = new mutable.ArrayBuffer[T] - val default = (_: String) => Nil - var startTagFound = false - var parseFinished = false - val lineIter = sqlSyntaxDefs.toIterator - while (!parseFinished && lineIter.hasNext) { - val line = lineIter.next() - if (line.trim.startsWith(startTag)) { - startTagFound = true - } else if (line.trim.startsWith(endTag)) { - parseFinished = true - } else if (startTagFound) { - f.applyOrElse(line, default).foreach { symbol => - keywords += symbol - } - } - } - assert(keywords.nonEmpty && startTagFound && parseFinished, "cannot extract keywords from " + - s"the `SqlBase.g4` file, so please check if the start/end tags (`$startTag` and `$endTag`) " + - "are placed correctly in the file.") - keywords.toSet - } - - // If a symbol does not have the same string with its literal (e.g., `SETMINUS: 'MINUS';`), - // we need to map a symbol to actual literal strings. - val symbolsToExpandIntoDifferentLiterals = { - val kwDef = """([A-Z_]+):(.+);""".r - val keywords = parseAntlrGrammars( - "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") { - case kwDef(symbol, literalDef) => - val splitDefs = literalDef.split("""\|""") - val hasMultipleLiterals = splitDefs.length > 1 - // The case where a symbol has multiple literal definitions, - // e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`. - if (hasMultipleLiterals) { - // Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';` - val litDef = """([A-Z_]+)""".r - val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap { - case litDef(lit) => Some(lit) - case _ => None - } - (symbol, literals) :: Nil - } else { - val literal = literalDef.replaceAll("'", "").trim - // The case where a symbol string and its literal string are different, - // e.g., `SETMINUS: 'MINUS';`. - if (symbol != literal) { - (symbol, literal :: Nil) :: Nil - } else { - Nil - } - } - } - keywords.toMap - } - - // All the SQL keywords defined in `SqlBase.g4` - val allCandidateKeywords = { - val kwDef = """([A-Z_]+):.+;""".r - val keywords = parseAntlrGrammars( - "//--SPARK-KEYWORD-LIST-START", "//--SPARK-KEYWORD-LIST-END") { - // Parses a pattern, e.g., `AFTER: 'AFTER';` - case kwDef(symbol) => - if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) { - symbolsToExpandIntoDifferentLiterals(symbol) - } else { - symbol :: Nil - } - } - keywords - } - - val nonReservedKeywordsInAnsiMode = { - val kwDef = """\s*[\|:]\s*([A-Z_]+)\s*""".r - parseAntlrGrammars("//--ANSI-NON-RESERVED-START", "//--ANSI-NON-RESERVED-END") { - // Parses a pattern, e.g., ` | AFTER` - case kwDef(symbol) => - if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) { - symbolsToExpandIntoDifferentLiterals(symbol) - } else { - symbol :: Nil - } - } - } - - val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode - - test("check # of reserved keywords") { - val numReservedKeywords = 74 - assert(reservedKeywordsInAnsiMode.size == numReservedKeywords, - s"The expected number of reserved keywords is $numReservedKeywords, but " + - s"${reservedKeywordsInAnsiMode.size} found.") - } - - test("reserved keywords in Spark are also reserved in SQL 2016") { - withTempDir { dir => - val tmpFile = new File(dir, "tmp") - val is = Thread.currentThread().getContextClassLoader - .getResourceAsStream("ansi-sql-2016-reserved-keywords.txt") - Files.copy(is, tmpFile.toPath) - val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath) - .asScala.filterNot(_.startsWith("--")).map(_.trim).toSet - assert((reservedKeywordsInAnsiMode -- reservedKeywordsInSql2016).isEmpty) - } - } test("table identifier") { // Regular names.