-
Notifications
You must be signed in to change notification settings - Fork 16.7k
chore: 100% test coverage for SQL parsing #33568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| # .coveragerc to control coverage.py | ||
| [run] | ||
| branch = True | ||
| source = superset | ||
| # omit = bad_file.py | ||
|
|
||
| [paths] | ||
| source = | ||
| superset/ | ||
| */site-packages/ | ||
|
|
||
| [report] | ||
| # Regexes for lines to exclude from consideration | ||
| exclude_lines = | ||
| # Have to re-enable the standard pragma | ||
| pragma: no cover | ||
|
|
||
| # Don't complain about missing debug-only code: | ||
| def __repr__ | ||
| if self\.debug | ||
|
|
||
| # Don't complain if tests don't hit defensive assertion code: | ||
| raise AssertionError | ||
| raise NotImplementedError | ||
|
|
||
| # Don't complain if non-runnable code isn't run: | ||
| if 0: | ||
| if __name__ == .__main__.: | ||
|
|
||
| # Ignore importlib backport | ||
| from importlib | ||
|
|
||
| if TYPE_CHECKING: | ||
|
|
||
| #fail_under = 100 | ||
| show_missing = True |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,7 +48,7 @@ def _negate_range( | |
| self, | ||
| this: exp.Expression | None = None, | ||
| ) -> exp.Expression | None: | ||
| if not this: | ||
| if not this: # pragma: no cover | ||
| return this | ||
|
|
||
| return self.expression(exp.Not, this=self.expression(exp.Paren, this=this)) | ||
|
|
@@ -109,42 +109,15 @@ def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: | |
| expressions = self._parse_wrapped_csv(self._parse_expression) | ||
| offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) | ||
|
|
||
| alias = self._parse_table_alias() if with_alias else None | ||
|
|
||
| if alias: | ||
| if self.dialect.UNNEST_COLUMN_ONLY: | ||
| if alias.args.get("columns"): | ||
| self.raise_error("Unexpected extra column alias in unnest.") | ||
|
|
||
| alias.set("columns", [alias.this]) | ||
| alias.set("this", None) | ||
|
|
||
| columns = alias.args.get("columns") or [] | ||
| if offset and len(expressions) < len(columns): | ||
| offset = columns.pop() | ||
|
|
||
| if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): | ||
| self._match(TokenType.ALIAS) | ||
| offset = self._parse_id_var( | ||
| any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS | ||
| ) or exp.to_identifier("offset") | ||
|
|
||
| return self.expression( | ||
| exp.Unnest, | ||
| expressions=expressions, | ||
| alias=alias, | ||
| offset=offset, | ||
| ) | ||
|
|
||
| class Generator(Firebolt.Generator): | ||
| def join_sql(self, expression: exp.Join) -> str: | ||
| if not self.SEMI_ANTI_JOIN_WITH_SIDE and expression.kind in ( | ||
| "SEMI", | ||
| "ANTI", | ||
| ): | ||
| side = None | ||
| else: | ||
| side = expression.side | ||
|
Comment on lines
-141
to
-147
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No |
||
| side = expression.side | ||
|
|
||
| op_sql = " ".join( | ||
| op | ||
|
|
@@ -168,9 +141,6 @@ def join_sql(self, expression: exp.Join) -> str: | |
| this = expression.this | ||
| this_sql = self.sql(this) | ||
|
|
||
| if exprs := self.expressions(expression): | ||
| this_sql = f"{this_sql},{self.seg(exprs)}" | ||
|
Comment on lines
-171
to
-172
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added tests covering all the syntax in the docs and never hit this line. Looked at |
||
|
|
||
| if on_sql: | ||
| on_sql = self.indent(on_sql, skip_first=True) | ||
| space = self.seg(" " * self.pad) if self.pretty else " " | ||
|
|
@@ -189,7 +159,6 @@ def join_sql(self, expression: exp.Join) -> str: | |
|
|
||
| return f", {this_sql}" | ||
|
|
||
| if op_sql != "STRAIGHT_JOIN": | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No |
||
| op_sql = f"{op_sql} JOIN" if op_sql else "JOIN" | ||
| op_sql = f"{op_sql} JOIN" if op_sql else "JOIN" | ||
|
|
||
| return f"{self.seg(op_sql)} {this_sql}{match_cond}{on_sql}" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -551,7 +551,7 @@ def _parse(cls, script: str, engine: str) -> list[exp.Expression]: | |
| last_statement = statements.pop() | ||
| target = statements[-1] | ||
| for node in statements[-1].walk(): | ||
| if hasattr(node, "comments"): | ||
| if hasattr(node, "comments"): # pragma: no cover | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There were a couple lines that I couldn't hit with tests, but I thought it was safer to keep them, so I added the pragma. |
||
| target = node | ||
|
|
||
| target.comments = target.comments or [] | ||
|
|
@@ -565,47 +565,9 @@ def split_script( | |
| script: str, | ||
| engine: str, | ||
| ) -> list[SQLStatement]: | ||
| if dialect := SQLGLOT_DIALECTS.get(engine): | ||
| try: | ||
| return [ | ||
| cls(ast.sql(), engine, ast) | ||
| for ast in cls._parse(script, engine) | ||
| if ast | ||
| ] | ||
| except ValueError: | ||
| # `ast.sql()` might raise an error on some cases (eg, `SHOW TABLES | ||
| # FROM`). In this case, we rely on the tokenizer to generate the | ||
| # statements. | ||
| pass | ||
|
|
||
| # When we don't have a sqlglot dialect we can't rely on `ast.sql()` to correctly | ||
| # generate the SQL of each statement, so we tokenize the script and split it | ||
| # based on the location of semi-colons. | ||
| statements = [] | ||
| start = 0 | ||
| remainder = script | ||
|
|
||
| try: | ||
| tokens = sqlglot.tokenize(script, dialect) | ||
| except sqlglot.errors.TokenError as ex: | ||
| raise SupersetParseError( | ||
| script, | ||
| engine, | ||
| message="Unable to tokenize script", | ||
| ) from ex | ||
|
|
||
| for token in tokens: | ||
| if token.token_type == sqlglot.TokenType.SEMICOLON: | ||
| statement, start = script[start : token.start], token.end + 1 | ||
| ast = cls._parse(statement, engine)[0] | ||
| statements.append(cls(statement.strip(), engine, ast)) | ||
| remainder = script[start:] | ||
|
|
||
| if remainder.strip(): | ||
| ast = cls._parse(remainder, engine)[0] | ||
| statements.append(cls(remainder.strip(), engine, ast)) | ||
|
|
||
| return statements | ||
| return [ | ||
| cls(ast=ast, engine=engine) for ast in cls._parse(script, engine) if ast | ||
| ] | ||
|
|
||
| @classmethod | ||
| def _parse_statement( | ||
|
|
@@ -618,7 +580,11 @@ def _parse_statement( | |
| """ | ||
| statements = cls.split_script(statement, engine) | ||
| if len(statements) != 1: | ||
| raise SupersetParseError("SQLStatement should have exactly one statement") | ||
| raise SupersetParseError( | ||
| statement, | ||
| engine, | ||
| message="SQLStatement should have exactly one statement", | ||
| ) | ||
|
|
||
| return statements[0]._parsed # pylint: disable=protected-access | ||
|
|
||
|
|
@@ -657,10 +623,13 @@ def is_mutating(self) -> bool: | |
| exp.Create, | ||
| exp.Drop, | ||
| exp.TruncateTable, | ||
| exp.Alter, | ||
| ), | ||
| ): | ||
| return True | ||
|
|
||
| # depending on the dialect (Oracle, MS SQL) the `ALTER` is parsed as a | ||
| # command, not an expression | ||
| if isinstance(node, exp.Command) and node.name == "ALTER": | ||
| return True | ||
|
|
||
|
|
@@ -821,9 +790,16 @@ def has_subquery(self) -> bool: | |
| """ | ||
| Check if the statement has a subquery. | ||
|
|
||
| :return: True if the statement has a subquery at the top level. | ||
| :return: True if the statement has a subquery. | ||
| """ | ||
| return bool(self._parsed.find(exp.Subquery)) | ||
| return bool(self._parsed.find(exp.Subquery)) or ( | ||
| isinstance(self._parsed, exp.Select) | ||
| and any( | ||
| isinstance(expression, exp.Select) | ||
| for expression in self._parsed.walk() | ||
| if expression != self._parsed | ||
| ) | ||
| ) | ||
|
|
||
| def parse_predicate(self, predicate: str) -> exp.Expression: | ||
| """ | ||
|
|
@@ -933,11 +909,8 @@ def tokenize_kql(kql: str) -> list[tuple[KQLTokenType, str]]: | |
| ) | ||
| buffer = ch | ||
| elif ch == "`" and script[i - 2 : i] == "``": | ||
| if buffer: | ||
| tokens.extend(classify_non_string_kql(buffer)) | ||
| buffer = "" | ||
| state = KQLSplitState.INSIDE_MULTILINE_STRING | ||
| buffer = "`" | ||
| buffer = "```" | ||
| else: | ||
| buffer += ch | ||
| else: | ||
|
|
@@ -1042,11 +1015,19 @@ def _parse_statement( | |
| engine: str, | ||
| ) -> str: | ||
| if engine != "kustokql": | ||
| raise SupersetParseError(f"Invalid engine: {engine}") | ||
| raise SupersetParseError( | ||
| statement, | ||
| engine, | ||
| message=f"Invalid engine: {engine}", | ||
| ) | ||
|
|
||
| statements = split_kql(statement) | ||
| if len(statements) != 1: | ||
| raise SupersetParseError("SQLStatement should have exactly one statement") | ||
| raise SupersetParseError( | ||
| statement, | ||
| engine, | ||
| message="KustoKQLStatement should have exactly one statement", | ||
| ) | ||
|
|
||
| return statements[0].strip() | ||
|
|
||
|
|
@@ -1122,7 +1103,7 @@ def check_functions_present(self, functions: set[str]) -> bool: | |
| :return: True if any of the functions are present | ||
| """ | ||
| logger.warning("Kusto KQL doesn't support checking for functions present.") | ||
| return True | ||
| return False | ||
|
|
||
| def get_limit_value(self) -> int | None: | ||
| """ | ||
|
|
@@ -1150,7 +1131,11 @@ def set_limit_value( | |
| Add a limit to the statement. | ||
| """ | ||
| if method != LimitMethod.FORCE_LIMIT: | ||
| raise SupersetParseError("Kusto KQL only supports the FORCE_LIMIT method.") | ||
| raise SupersetParseError( | ||
| self._parsed, | ||
| self.engine, | ||
| message="Kusto KQL only supports the FORCE_LIMIT method.", | ||
| ) | ||
|
|
||
| tokens = tokenize_kql(self._parsed) | ||
| found_limit_token = False | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's no aliased
UNNESTin Firebolt, notWITH/OFFSETafter it [docs].