apache · betodealmeida · Jun 5, 2025 · May 22, 2025 · betodealmeida · May 30, 2025
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,36 @@
+# .coveragerc to control coverage.py
+[run]
+branch = True
+source = superset
+# omit = bad_file.py
+
+[paths]
+source =
+    superset/
+    */site-packages/
+
+[report]
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
+
+    # Ignore importlib backport
+    from importlib
+
+    if TYPE_CHECKING:
+
+#fail_under = 100
+show_missing = True
@@ -45,6 +45,13 @@ jobs:
           SUPERSET_SECRET_KEY: not-a-secret
         run: |
           pytest --durations-min=0.5 --cov-report= --cov=superset ./tests/common ./tests/unit_tests --cache-clear --maxfail=50
+      - name: Python 100% coverage unit tests
+        if: steps.check.outputs.python
+        env:
+          SUPERSET_TESTENV: true
+          SUPERSET_SECRET_KEY: not-a-secret
+        run: |
+          pytest --durations-min=0.5 --cov-report= --cov=superset/sql/ ./tests/unit_tests/sql/ --cache-clear --cov-fail-under=100
       - name: Upload code coverage
         uses: codecov/codecov-action@v5
         with:

diff --git a/superset/exceptions.py b/superset/exceptions.py
@@ -334,6 +334,9 @@ def __init__(  # pylint: disable=too-many-arguments
         )
         super().__init__(error)
 
+    def __str__(self) -> str:
+        return self.error.message
+
 
 class OAuth2RedirectError(SupersetErrorException):
     """

diff --git a/superset/sql/dialects/firebolt.py b/superset/sql/dialects/firebolt.py
@@ -48,7 +48,7 @@ def _negate_range(
             self,
             this: exp.Expression | None = None,
         ) -> exp.Expression | None:
-            if not this:
+            if not this:  # pragma: no cover
                 return this
 
             return self.expression(exp.Not, this=self.expression(exp.Paren, this=this))
@@ -109,42 +109,15 @@ def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None:
             expressions = self._parse_wrapped_csv(self._parse_expression)
             offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
 
-            alias = self._parse_table_alias() if with_alias else None
-
-            if alias:
-                if self.dialect.UNNEST_COLUMN_ONLY:
-                    if alias.args.get("columns"):
-                        self.raise_error("Unexpected extra column alias in unnest.")
-
-                    alias.set("columns", [alias.this])
-                    alias.set("this", None)
-
-                columns = alias.args.get("columns") or []
-                if offset and len(expressions) < len(columns):
-                    offset = columns.pop()
-
-            if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
-                self._match(TokenType.ALIAS)
-                offset = self._parse_id_var(
-                    any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
-                ) or exp.to_identifier("offset")
-
             return self.expression(
                 exp.Unnest,
                 expressions=expressions,
-                alias=alias,
                 offset=offset,
             )
 
     class Generator(Firebolt.Generator):
         def join_sql(self, expression: exp.Join) -> str:
-            if not self.SEMI_ANTI_JOIN_WITH_SIDE and expression.kind in (
-                "SEMI",
-                "ANTI",
-            ):
-                side = None
-            else:
-                side = expression.side
+            side = expression.side
 
             op_sql = " ".join(
                 op
@@ -168,9 +141,6 @@ def join_sql(self, expression: exp.Join) -> str:
             this = expression.this
             this_sql = self.sql(this)
 
-            if exprs := self.expressions(expression):
-                this_sql = f"{this_sql},{self.seg(exprs)}"
-
             if on_sql:
                 on_sql = self.indent(on_sql, skip_first=True)
                 space = self.seg(" " * self.pad) if self.pretty else " "
@@ -189,7 +159,6 @@ def join_sql(self, expression: exp.Join) -> str:
 
                 return f", {this_sql}"
 
-            if op_sql != "STRAIGHT_JOIN":
-                op_sql = f"{op_sql} JOIN" if op_sql else "JOIN"
+            op_sql = f"{op_sql} JOIN" if op_sql else "JOIN"
 
             return f"{self.seg(op_sql)} {this_sql}{match_cond}{on_sql}"
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
@@ -551,7 +551,7 @@ def _parse(cls, script: str, engine: str) -> list[exp.Expression]:
             last_statement = statements.pop()
             target = statements[-1]
             for node in statements[-1].walk():
-                if hasattr(node, "comments"):
+                if hasattr(node, "comments"):  # pragma: no cover
                     target = node
 
             target.comments = target.comments or []
@@ -565,47 +565,9 @@ def split_script(
         script: str,
         engine: str,
     ) -> list[SQLStatement]:
-        if dialect := SQLGLOT_DIALECTS.get(engine):
-            try:
-                return [
-                    cls(ast.sql(), engine, ast)
-                    for ast in cls._parse(script, engine)
-                    if ast
-                ]
-            except ValueError:
-                # `ast.sql()` might raise an error on some cases (eg, `SHOW TABLES
-                # FROM`). In this case, we rely on the tokenizer to generate the
-                # statements.
-                pass
-
-        # When we don't have a sqlglot dialect we can't rely on `ast.sql()` to correctly
-        # generate the SQL of each statement, so we tokenize the script and split it
-        # based on the location of semi-colons.
-        statements = []
-        start = 0
-        remainder = script
-
-        try:
-            tokens = sqlglot.tokenize(script, dialect)
-        except sqlglot.errors.TokenError as ex:
-            raise SupersetParseError(
-                script,
-                engine,
-                message="Unable to tokenize script",
-            ) from ex
-
-        for token in tokens:
-            if token.token_type == sqlglot.TokenType.SEMICOLON:
-                statement, start = script[start : token.start], token.end + 1
-                ast = cls._parse(statement, engine)[0]
-                statements.append(cls(statement.strip(), engine, ast))
-                remainder = script[start:]
-
-        if remainder.strip():
-            ast = cls._parse(remainder, engine)[0]
-            statements.append(cls(remainder.strip(), engine, ast))
-
-        return statements
+        return [
+            cls(ast=ast, engine=engine) for ast in cls._parse(script, engine) if ast
+        ]
 
     @classmethod
     def _parse_statement(
@@ -618,7 +580,11 @@ def _parse_statement(
         """
         statements = cls.split_script(statement, engine)
         if len(statements) != 1:
-            raise SupersetParseError("SQLStatement should have exactly one statement")
+            raise SupersetParseError(
+                statement,
+                engine,
+                message="SQLStatement should have exactly one statement",
+            )
 
         return statements[0]._parsed  # pylint: disable=protected-access
 
@@ -657,10 +623,13 @@ def is_mutating(self) -> bool:
                     exp.Create,
                     exp.Drop,
                     exp.TruncateTable,
+                    exp.Alter,
                 ),
             ):
                 return True
 
+            # depending on the dialect (Oracle, MS SQL) the `ALTER` is parsed as a
+            # command, not an expression
             if isinstance(node, exp.Command) and node.name == "ALTER":
                 return True
 
@@ -821,9 +790,16 @@ def has_subquery(self) -> bool:
         """
         Check if the statement has a subquery.
 
-        :return: True if the statement has a subquery at the top level.
+        :return: True if the statement has a subquery.
         """
-        return bool(self._parsed.find(exp.Subquery))
+        return bool(self._parsed.find(exp.Subquery)) or (
+            isinstance(self._parsed, exp.Select)
+            and any(
+                isinstance(expression, exp.Select)
+                for expression in self._parsed.walk()
+                if expression != self._parsed
+            )
+        )
 
     def parse_predicate(self, predicate: str) -> exp.Expression:
         """
@@ -933,11 +909,8 @@ def tokenize_kql(kql: str) -> list[tuple[KQLTokenType, str]]:
                 )
                 buffer = ch
             elif ch == "`" and script[i - 2 : i] == "``":
-                if buffer:
-                    tokens.extend(classify_non_string_kql(buffer))
-                    buffer = ""
                 state = KQLSplitState.INSIDE_MULTILINE_STRING
-                buffer = "`"
+                buffer = "```"
             else:
                 buffer += ch
         else:
@@ -1042,11 +1015,19 @@ def _parse_statement(
         engine: str,
     ) -> str:
         if engine != "kustokql":
-            raise SupersetParseError(f"Invalid engine: {engine}")
+            raise SupersetParseError(
+                statement,
+                engine,
+                message=f"Invalid engine: {engine}",
+            )
 
         statements = split_kql(statement)
         if len(statements) != 1:
-            raise SupersetParseError("SQLStatement should have exactly one statement")
+            raise SupersetParseError(
+                statement,
+                engine,
+                message="KustoKQLStatement should have exactly one statement",
+            )
 
         return statements[0].strip()
 
@@ -1122,7 +1103,7 @@ def check_functions_present(self, functions: set[str]) -> bool:
         :return: True if any of the functions are present
         """
         logger.warning("Kusto KQL doesn't support checking for functions present.")
-        return True
+        return False
 
     def get_limit_value(self) -> int | None:
         """
@@ -1150,7 +1131,11 @@ def set_limit_value(
         Add a limit to the statement.
         """
         if method != LimitMethod.FORCE_LIMIT:
-            raise SupersetParseError("Kusto KQL only supports the FORCE_LIMIT method.")
+            raise SupersetParseError(
+                self._parsed,
+                self.engine,
+                message="Kusto KQL only supports the FORCE_LIMIT method.",
+            )
 
         tokens = tokenize_kql(self._parsed)
         found_limit_token = False

diff --git a/tests/unit_tests/sql/dialects/__init__.py b/tests/unit_tests/sql/dialects/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.