From d836b6ed6ffec7add20c6aace73ad69fcf2175a5 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 21:16:41 +0300 Subject: [PATCH 01/18] fix: quote spaced column names --- superset/models/helpers.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 87f354a6f30a..32256b8146be 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -886,6 +886,12 @@ def _process_select_expression( be properly parsed and validated. """ if expression: + # Fix for issue #35493: Quote column names with spaces to prevent SQLGlot + # from misinterpreting them as "column AS alias" syntax + if " " in expression and not any(char in expression for char in ['"', "'", "`", "[", "("]): + # This appears to be a simple column name with spaces, quote it + expression = self.database.quote_identifier(expression) + expression = f"SELECT {expression}" if processed := self._process_sql_expression( From 0a0ff3d1783ff4059fc03a9e502df9a64596cbdd Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 21:32:53 +0300 Subject: [PATCH 02/18] fix: format --- superset/models/helpers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 32256b8146be..7a2a9a3fc833 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -888,10 +888,12 @@ def _process_select_expression( if expression: # Fix for issue #35493: Quote column names with spaces to prevent SQLGlot # from misinterpreting them as "column AS alias" syntax - if " " in expression and not any(char in expression for char in ['"', "'", "`", "[", "("]): + if " " in expression and not any( + char in expression for char in ['"', "'", "`", "[", "("] + ): # This appears to be a simple column name with spaces, quote it expression = self.database.quote_identifier(expression) - + expression = f"SELECT {expression}" if processed := self._process_sql_expression( From 0b656ca0ccebb7605e386a1a1a44cd4ee16dfc06 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 22:02:13 +0300 Subject: [PATCH 03/18] fix: only quote column names --- superset/models/helpers.py | 49 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 7a2a9a3fc833..c22100ace024 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -888,8 +888,53 @@ def _process_select_expression( if expression: # Fix for issue #35493: Quote column names with spaces to prevent SQLGlot # from misinterpreting them as "column AS alias" syntax - if " " in expression and not any( - char in expression for char in ['"', "'", "`", "[", "("] + # Only quote if it appears to be a simple identifier (no SQL operators or functions) + if ( + " " in expression + and not any( + char in expression + for char in [ + '"', + "'", + "`", + "[", + "(", + ")", + "*", + "+", + "-", + "/", + "=", + "<", + ">", + ",", + ] + ) + and not any( + keyword in expression.upper() + for keyword in [ + "SELECT", + "FROM", + "WHERE", + "AND", + "OR", + "AS", + "CASE", + "WHEN", + "THEN", + "ELSE", + "END", + "CAST", + "CONVERT", + "FUNCTION", + "SUM", + "COUNT", + "AVG", + "MAX", + "MIN", + "DISTINCT", + ] + ) ): # This appears to be a simple column name with spaces, quote it expression = self.database.quote_identifier(expression) From 1f3662299f54ea1b360378ad29b78e430d4bb275 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 22:46:20 +0300 Subject: [PATCH 04/18] test: add a test case --- tests/unit_tests/models/helpers_test.py | 109 ++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index 8154e5013649..18cba475d7f9 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -474,9 +474,9 @@ def condition_factory(col_name: str, expr): has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql - assert has_single_quotes or has_double_quotes, ( - "Others literal should be quoted with either single or double quotes" - ) + assert ( + has_single_quotes or has_double_quotes + ), "Others literal should be quoted with either single or double quotes" # Verify the structure of the generated SQL assert "CASE WHEN" in select_sql @@ -1118,10 +1118,101 @@ def test_process_select_expression_end_to_end(database: Database) -> None: # sqlglot may normalize the SQL slightly, so we check the result exists # and doesn't contain the SELECT prefix assert result is not None, f"Failed to process: {expression}" - assert not result.upper().startswith("SELECT"), ( - f"Result still has SELECT prefix: {result}" - ) + assert not result.upper().startswith( + "SELECT" + ), f"Result still has SELECT prefix: {result}" # The result should contain the core expression (case-insensitive check) - assert expected.replace(" ", "").lower() in result.replace(" ", "").lower(), ( - f"Expected '{expected}' to be in result '{result}' for input '{expression}'" - ) + assert ( + expected.replace(" ", "").lower() in result.replace(" ", "").lower() + ), f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + + +def test_process_select_expression_column_names_with_spaces( + database: Database, +) -> None: + """ + Test for issue #35493: Column names with spaces should be quoted + to prevent SQLGlot from misinterpreting them as "column AS alias" syntax. + """ + from superset.connectors.sqla.models import SqlaTable + + table = SqlaTable( + table_name="test_table", + database=database, + ) + + # Test 1: Simple column name with spaces - should be quoted and not misinterpreted + result = table._process_select_expression( + expression="Test Column", + database_id=database.id, + engine="sqlite", + schema="", + template_processor=None, + ) + + # The result should be a quoted identifier, not "Test AS Column" + assert result is not None + assert ( + "AS" not in result or result.count("AS") <= 1 + ) # Allow one AS if it's part of a proper alias + # Should contain the full column name in some quoted form + assert ( + "Test Column" in result + or '"Test Column"' in result + or "'Test Column'" in result + ) + + # Test 2: Complex expression with spaces - should NOT be pre-quoted (let SQLGlot handle it) + result = table._process_select_expression( + expression="col1 * 10", + database_id=database.id, + engine="sqlite", + schema="", + template_processor=None, + ) + + # Should process the expression without breaking it + assert result is not None + assert "*" in result # The multiplication should be preserved + + # Test 3: Expression with SQL keywords - should NOT be pre-quoted + result = table._process_select_expression( + expression="UPPER(name)", + database_id=database.id, + engine="sqlite", + schema="", + template_processor=None, + ) + + # Should process the function call correctly + assert result is not None + assert "UPPER" in result + assert "name" in result + + # Test 4: Already quoted column - should NOT be quoted again + result = table._process_select_expression( + expression='"Already Quoted Column"', + database_id=database.id, + engine="sqlite", + schema="", + template_processor=None, + ) + + # Should preserve the original quoting + assert result is not None + assert '"Already Quoted Column"' in result + + # Test 5: Column name with spaces that should trigger the fix + # This simulates the exact issue from #35493 + result = table._process_select_expression( + expression="Customer Name", + database_id=database.id, + engine="sqlite", + schema="", + template_processor=None, + ) + + # The key test: should NOT result in "Customer AS Name" + assert result is not None + assert result != "Customer AS Name" + assert "Customer Name" in result or '"Customer Name"' in result From b8cb88e5f2268552f1f83f6d36d164f1d7ab1fbe Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 22:46:57 +0300 Subject: [PATCH 05/18] fix: format --- superset/models/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index c22100ace024..bdebcccf7579 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -888,7 +888,8 @@ def _process_select_expression( if expression: # Fix for issue #35493: Quote column names with spaces to prevent SQLGlot # from misinterpreting them as "column AS alias" syntax - # Only quote if it appears to be a simple identifier (no SQL operators or functions) + # Only quote if it appears to be a simple identifier + # (no SQL operators or functions) if ( " " in expression and not any( From 0e637e0d8d2a2a5a8d5cbd71c8f11142e28097ad Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 23:00:19 +0300 Subject: [PATCH 06/18] refactor: should quote identifier function --- superset/models/helpers.py | 74 +++++++++++++------------------------- 1 file changed, 24 insertions(+), 50 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index bdebcccf7579..7832be129842 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -43,6 +43,7 @@ import pandas as pd import pytz import sqlalchemy as sa +import sqlglot import yaml from flask import current_app as app, g from flask_appbuilder import Model @@ -871,6 +872,28 @@ def _process_sql_expression( # pylint: disable=too-many-arguments raise QueryObjectValidationError(ex.message) from ex return expression + def _should_quote_identifier(self, expr: str) -> bool: + """ + Determine if an expression should be quoted because it's an identifier with spaces. + + Uses SQLGlot parsing to detect if the expression gets misinterpreted as an alias + when it should be treated as a single identifier. + """ + if " " not in expr: + return False + try: + parsed = sqlglot.parse_one(expr) + # If SQLGlot interprets it as an alias (column AS alias), we should quote it + # because it was meant to be a single column name with spaces + alias = parsed.find(sqlglot.expressions.Alias) + if alias: + # Check if this looks like an unintended alias parsing + # (i.e., no explicit AS keyword in the original expression) + return "AS" not in expr.upper() + return False + except (sqlglot.errors.ParseError, AttributeError): + return False + def _process_select_expression( self, expression: Optional[str], @@ -888,56 +911,7 @@ def _process_select_expression( if expression: # Fix for issue #35493: Quote column names with spaces to prevent SQLGlot # from misinterpreting them as "column AS alias" syntax - # Only quote if it appears to be a simple identifier - # (no SQL operators or functions) - if ( - " " in expression - and not any( - char in expression - for char in [ - '"', - "'", - "`", - "[", - "(", - ")", - "*", - "+", - "-", - "/", - "=", - "<", - ">", - ",", - ] - ) - and not any( - keyword in expression.upper() - for keyword in [ - "SELECT", - "FROM", - "WHERE", - "AND", - "OR", - "AS", - "CASE", - "WHEN", - "THEN", - "ELSE", - "END", - "CAST", - "CONVERT", - "FUNCTION", - "SUM", - "COUNT", - "AVG", - "MAX", - "MIN", - "DISTINCT", - ] - ) - ): - # This appears to be a simple column name with spaces, quote it + if self._should_quote_identifier(expression): expression = self.database.quote_identifier(expression) expression = f"SELECT {expression}" From c5fb7c30543c126342ae53d4ffd6cc6b2f8a18b2 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 30 Oct 2025 23:52:45 +0300 Subject: [PATCH 07/18] fix: ci --- superset/models/helpers.py | 5 +++-- tests/unit_tests/models/helpers_test.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 7832be129842..f3634bc50699 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -874,8 +874,9 @@ def _process_sql_expression( # pylint: disable=too-many-arguments def _should_quote_identifier(self, expr: str) -> bool: """ - Determine if an expression should be quoted because it's an identifier with spaces. - + Determine if an expression should be quoted because it's an + identifier with spaces. + Uses SQLGlot parsing to detect if the expression gets misinterpreted as an alias when it should be treated as a single identifier. """ diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index 18cba475d7f9..5a4d57e7d7e3 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -1162,7 +1162,8 @@ def test_process_select_expression_column_names_with_spaces( or "'Test Column'" in result ) - # Test 2: Complex expression with spaces - should NOT be pre-quoted (let SQLGlot handle it) + # Test 2: Complex expression with spaces - should NOT be pre-quoted + # (let SQLGlot handle it) result = table._process_select_expression( expression="col1 * 10", database_id=database.id, From f66f76c78b586e95eb260f19c3157317e2e6dd02 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Fri, 31 Oct 2025 10:15:27 +0300 Subject: [PATCH 08/18] fix: ci --- tests/unit_tests/models/helpers_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index 5a4d57e7d7e3..0a3393a20734 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -474,9 +474,9 @@ def condition_factory(col_name: str, expr): has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql - assert ( - has_single_quotes or has_double_quotes - ), "Others literal should be quoted with either single or double quotes" + assert has_single_quotes or has_double_quotes, ( + "Others literal should be quoted with either single or double quotes" + ) # Verify the structure of the generated SQL assert "CASE WHEN" in select_sql @@ -1118,13 +1118,13 @@ def test_process_select_expression_end_to_end(database: Database) -> None: # sqlglot may normalize the SQL slightly, so we check the result exists # and doesn't contain the SELECT prefix assert result is not None, f"Failed to process: {expression}" - assert not result.upper().startswith( - "SELECT" - ), f"Result still has SELECT prefix: {result}" + assert not result.upper().startswith("SELECT"), ( + f"Result still has SELECT prefix: {result}" + ) # The result should contain the core expression (case-insensitive check) - assert ( - expected.replace(" ", "").lower() in result.replace(" ", "").lower() - ), f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + assert expected.replace(" ", "").lower() in result.replace(" ", "").lower(), ( + f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + ) def test_process_select_expression_column_names_with_spaces( From f64c610c785af6525db7d8c6dd9764e6fb66beb3 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Mon, 3 Nov 2025 13:58:45 +0300 Subject: [PATCH 09/18] chore: remove previous fix --- superset/models/helpers.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index f3634bc50699..87f354a6f30a 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -43,7 +43,6 @@ import pandas as pd import pytz import sqlalchemy as sa -import sqlglot import yaml from flask import current_app as app, g from flask_appbuilder import Model @@ -872,29 +871,6 @@ def _process_sql_expression( # pylint: disable=too-many-arguments raise QueryObjectValidationError(ex.message) from ex return expression - def _should_quote_identifier(self, expr: str) -> bool: - """ - Determine if an expression should be quoted because it's an - identifier with spaces. - - Uses SQLGlot parsing to detect if the expression gets misinterpreted as an alias - when it should be treated as a single identifier. - """ - if " " not in expr: - return False - try: - parsed = sqlglot.parse_one(expr) - # If SQLGlot interprets it as an alias (column AS alias), we should quote it - # because it was meant to be a single column name with spaces - alias = parsed.find(sqlglot.expressions.Alias) - if alias: - # Check if this looks like an unintended alias parsing - # (i.e., no explicit AS keyword in the original expression) - return "AS" not in expr.upper() - return False - except (sqlglot.errors.ParseError, AttributeError): - return False - def _process_select_expression( self, expression: Optional[str], @@ -910,11 +886,6 @@ def _process_select_expression( be properly parsed and validated. """ if expression: - # Fix for issue #35493: Quote column names with spaces to prevent SQLGlot - # from misinterpreting them as "column AS alias" syntax - if self._should_quote_identifier(expression): - expression = self.database.quote_identifier(expression) - expression = f"SELECT {expression}" if processed := self._process_sql_expression( From 0360a37e0a9db0bfe30d95f582b905dd242792f8 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Mon, 3 Nov 2025 20:59:47 +0300 Subject: [PATCH 10/18] fix: decide on columns earlier in the flow --- .../superset-ui-core/src/query/normalizeTimeColumn.ts | 1 + .../packages/superset-ui-core/src/query/types/Column.ts | 5 +++++ superset/connectors/sqla/models.py | 8 +++++++- superset/superset_typing.py | 2 ++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/superset-frontend/packages/superset-ui-core/src/query/normalizeTimeColumn.ts b/superset-frontend/packages/superset-ui-core/src/query/normalizeTimeColumn.ts index 33c27f233763..b5bde0c88dbf 100644 --- a/superset-frontend/packages/superset-ui-core/src/query/normalizeTimeColumn.ts +++ b/superset-frontend/packages/superset-ui-core/src/query/normalizeTimeColumn.ts @@ -67,6 +67,7 @@ export function normalizeTimeColumn( sqlExpression: formData.x_axis, label: formData.x_axis, expressionType: 'SQL', + isColumnReference: true, }; } diff --git a/superset-frontend/packages/superset-ui-core/src/query/types/Column.ts b/superset-frontend/packages/superset-ui-core/src/query/types/Column.ts index a12990dc7485..0fe1317e3dea 100644 --- a/superset-frontend/packages/superset-ui-core/src/query/types/Column.ts +++ b/superset-frontend/packages/superset-ui-core/src/query/types/Column.ts @@ -27,6 +27,7 @@ export interface AdhocColumn { optionName?: string; sqlExpression: string; expressionType: 'SQL'; + isColumnReference?: boolean; columnType?: 'BASE_AXIS' | 'SERIES'; timeGrain?: string; datasourceWarning?: boolean; @@ -74,6 +75,10 @@ export function isAdhocColumn(column?: any): column is AdhocColumn { ); } +export function isAdhocColumnReference(column?: any): column is AdhocColumn { + return isAdhocColumn(column) && column?.isColumnReference === true; +} + export function isQueryFormColumn(column: any): column is QueryFormColumn { return isPhysicalColumn(column) || isAdhocColumn(column); } diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 0ef9572c5072..a4c12dd00cc8 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1502,8 +1502,14 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals """ label = utils.get_column_name(col) try: + sql_expression = col["sqlExpression"] + + # For column references, conditionally quote identifiers that need it + if col.get("isColumnReference"): + sql_expression = self.database.quote_identifier(sql_expression) + expression = self._process_select_expression( - expression=col["sqlExpression"], + expression=sql_expression, database_id=self.database_id, engine=self.database.backend, schema=self.schema, diff --git a/superset/superset_typing.py b/superset/superset_typing.py index 02a4a32f2498..7d00b977d23d 100644 --- a/superset/superset_typing.py +++ b/superset/superset_typing.py @@ -59,6 +59,8 @@ class AdhocColumn(TypedDict, total=False): hasCustomLabel: Optional[bool] label: str sqlExpression: str + expressionType: Optional[Literal["SQL"]] + isColumnReference: Optional[bool] columnType: Optional[Literal["BASE_AXIS", "SERIES"]] timeGrain: Optional[str] From e8c6cc6d80142f2953ea1e08984bc2d0eb816c69 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Mon, 3 Nov 2025 22:27:02 +0300 Subject: [PATCH 11/18] fix: update frontend tests --- .../test/query/normalizeTimeColumn.test.ts | 1 + .../test/Timeseries/buildQuery.test.ts | 59 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/superset-frontend/packages/superset-ui-core/test/query/normalizeTimeColumn.test.ts b/superset-frontend/packages/superset-ui-core/test/query/normalizeTimeColumn.test.ts index e00fa683c028..b1d0eebb8c39 100644 --- a/superset-frontend/packages/superset-ui-core/test/query/normalizeTimeColumn.test.ts +++ b/superset-frontend/packages/superset-ui-core/test/query/normalizeTimeColumn.test.ts @@ -86,6 +86,7 @@ test('should support different columns for x-axis and granularity', () => { { timeGrain: 'P1Y', columnType: 'BASE_AXIS', + isColumnReference: true, sqlExpression: 'time_column_in_x_axis', label: 'time_column_in_x_axis', expressionType: 'SQL', diff --git a/superset-frontend/plugins/plugin-chart-echarts/test/Timeseries/buildQuery.test.ts b/superset-frontend/plugins/plugin-chart-echarts/test/Timeseries/buildQuery.test.ts index b97163336ce0..ffabe67df149 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/test/Timeseries/buildQuery.test.ts +++ b/superset-frontend/plugins/plugin-chart-echarts/test/Timeseries/buildQuery.test.ts @@ -101,36 +101,35 @@ describe('queryObject conversion', () => { it('should convert queryObject', () => { const { queries } = buildQuery({ ...formData, x_axis: 'time_column' }); - expect(queries[0]).toEqual( - expect.objectContaining({ - granularity: 'time_column', - time_range: '1 year ago : 2013', - extras: { having: '', where: '', time_grain_sqla: 'P1Y' }, - columns: [ - { - columnType: 'BASE_AXIS', - expressionType: 'SQL', - label: 'time_column', - sqlExpression: 'time_column', - timeGrain: 'P1Y', - }, - 'col1', - ], - series_columns: ['col1'], - metrics: ['count(*)'], - post_processing: [ - { - operation: 'pivot', - options: { - aggregates: { 'count(*)': { operator: 'mean' } }, - columns: ['col1'], - drop_missing_columns: true, - index: ['time_column'], - }, + expect(queries[0]).toMatchObject({ + granularity: 'time_column', + time_range: '1 year ago : 2013', + extras: { having: '', where: '', time_grain_sqla: 'P1Y' }, + columns: [ + { + columnType: 'BASE_AXIS', + expressionType: 'SQL', + label: 'time_column', + sqlExpression: 'time_column', + timeGrain: 'P1Y', + isColumnReference: true, + }, + 'col1', + ], + series_columns: ['col1'], + metrics: ['count(*)'], + post_processing: [ + { + operation: 'pivot', + options: { + aggregates: { 'count(*)': { operator: 'mean' } }, + columns: ['col1'], + drop_missing_columns: true, + index: ['time_column'], }, - { operation: 'flatten' }, - ], - }), - ); + }, + { operation: 'flatten' }, + ], + }); }); }); From 979d76767df3626b431567d91f0a1ce3845da579 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Mon, 3 Nov 2025 23:47:54 +0300 Subject: [PATCH 12/18] fix: update backend test --- tests/unit_tests/models/helpers_test.py | 126 +++++++++--------------- 1 file changed, 48 insertions(+), 78 deletions(-) diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index 0a3393a20734..c462c530268c 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -474,9 +474,9 @@ def condition_factory(col_name: str, expr): has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql - assert has_single_quotes or has_double_quotes, ( - "Others literal should be quoted with either single or double quotes" - ) + assert ( + has_single_quotes or has_double_quotes + ), "Others literal should be quoted with either single or double quotes" # Verify the structure of the generated SQL assert "CASE WHEN" in select_sql @@ -1118,21 +1118,21 @@ def test_process_select_expression_end_to_end(database: Database) -> None: # sqlglot may normalize the SQL slightly, so we check the result exists # and doesn't contain the SELECT prefix assert result is not None, f"Failed to process: {expression}" - assert not result.upper().startswith("SELECT"), ( - f"Result still has SELECT prefix: {result}" - ) + assert not result.upper().startswith( + "SELECT" + ), f"Result still has SELECT prefix: {result}" # The result should contain the core expression (case-insensitive check) - assert expected.replace(" ", "").lower() in result.replace(" ", "").lower(), ( - f"Expected '{expected}' to be in result '{result}' for input '{expression}'" - ) + assert ( + expected.replace(" ", "").lower() in result.replace(" ", "").lower() + ), f"Expected '{expected}' to be in result '{result}' for input '{expression}'" -def test_process_select_expression_column_names_with_spaces( - database: Database, -) -> None: +def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: """ - Test for issue #35493: Column names with spaces should be quoted - to prevent SQLGlot from misinterpreting them as "column AS alias" syntax. + Test that adhoc_column_to_sqla properly quotes column identifiers when isColumnReference is true. + + This tests the fix for column names with spaces being properly quoted + before being processed by SQLGlot to prevent "column AS alias" misinterpretation. """ from superset.connectors.sqla.models import SqlaTable @@ -1141,79 +1141,49 @@ def test_process_select_expression_column_names_with_spaces( database=database, ) - # Test 1: Simple column name with spaces - should be quoted and not misinterpreted - result = table._process_select_expression( - expression="Test Column", - database_id=database.id, - engine="sqlite", - schema="", - template_processor=None, - ) + # Test 1: Column reference with spaces should be quoted + col_with_spaces = { + "sqlExpression": "Customer Name", + "label": "Customer Name", + "isColumnReference": True, + } - # The result should be a quoted identifier, not "Test AS Column" + result = table.adhoc_column_to_sqla(col_with_spaces) + + # Should contain the quoted column name assert result is not None - assert ( - "AS" not in result or result.count("AS") <= 1 - ) # Allow one AS if it's part of a proper alias - # Should contain the full column name in some quoted form - assert ( - "Test Column" in result - or '"Test Column"' in result - or "'Test Column'" in result - ) + result_str = str(result) - # Test 2: Complex expression with spaces - should NOT be pre-quoted - # (let SQLGlot handle it) - result = table._process_select_expression( - expression="col1 * 10", - database_id=database.id, - engine="sqlite", - schema="", - template_processor=None, - ) + assert '"Customer Name"' in result_str - # Should process the expression without breaking it - assert result is not None - assert "*" in result # The multiplication should be preserved - # Test 3: Expression with SQL keywords - should NOT be pre-quoted - result = table._process_select_expression( - expression="UPPER(name)", - database_id=database.id, - engine="sqlite", - schema="", - template_processor=None, - ) +def test_adhoc_column_to_sqla_column_reference_already_quoted( + database: Database, +) -> None: + """ + Test that adhoc_column_to_sqla handles already quoted column names correctly. - # Should process the function call correctly - assert result is not None - assert "UPPER" in result - assert "name" in result + When isColumnReference is true but the column is already quoted, + it should not be double-quoted. + """ + from superset.connectors.sqla.models import SqlaTable - # Test 4: Already quoted column - should NOT be quoted again - result = table._process_select_expression( - expression='"Already Quoted Column"', - database_id=database.id, - engine="sqlite", - schema="", - template_processor=None, + table = SqlaTable( + table_name="test_table", + database=database, ) - # Should preserve the original quoting - assert result is not None - assert '"Already Quoted Column"' in result + # Test already quoted column + already_quoted_col = { + "sqlExpression": '"Already Quoted"', + "label": "Already Quoted", + "isColumnReference": True, + } - # Test 5: Column name with spaces that should trigger the fix - # This simulates the exact issue from #35493 - result = table._process_select_expression( - expression="Customer Name", - database_id=database.id, - engine="sqlite", - schema="", - template_processor=None, - ) + result = table.adhoc_column_to_sqla(already_quoted_col) - # The key test: should NOT result in "Customer AS Name" + # Should not be double-quoted assert result is not None - assert result != "Customer AS Name" - assert "Customer Name" in result or '"Customer Name"' in result + result_str = str(result) + assert '""' not in result_str # No double quotes + assert '"Already Quoted"' in result_str From 8bc862cb2ebffcdc4c31e4ae83ec85103297596b Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Mon, 3 Nov 2025 23:52:05 +0300 Subject: [PATCH 13/18] chore: remove extra type --- superset/superset_typing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/superset_typing.py b/superset/superset_typing.py index 7d00b977d23d..c3460d80823d 100644 --- a/superset/superset_typing.py +++ b/superset/superset_typing.py @@ -59,7 +59,6 @@ class AdhocColumn(TypedDict, total=False): hasCustomLabel: Optional[bool] label: str sqlExpression: str - expressionType: Optional[Literal["SQL"]] isColumnReference: Optional[bool] columnType: Optional[Literal["BASE_AXIS", "SERIES"]] timeGrain: Optional[str] From f3a215d809e971cdb93d4a497a150881fb6d5996 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Tue, 4 Nov 2025 10:35:08 +0300 Subject: [PATCH 14/18] fix: lint --- tests/unit_tests/models/helpers_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index c462c530268c..e0c43916cdfb 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -1129,7 +1129,8 @@ def test_process_select_expression_end_to_end(database: Database) -> None: def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: """ - Test that adhoc_column_to_sqla properly quotes column identifiers when isColumnReference is true. + Test that adhoc_column_to_sqla + properly quotes column identifiers when isColumnReference is true. This tests the fix for column names with spaces being properly quoted before being processed by SQLGlot to prevent "column AS alias" misinterpretation. From 6b23794a010f76057d516d09cf2e12db59b2cfd1 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Tue, 4 Nov 2025 11:22:32 +0300 Subject: [PATCH 15/18] fix: ci --- superset/connectors/sqla/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index a4c12dd00cc8..99e05918d51e 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1506,7 +1506,11 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals # For column references, conditionally quote identifiers that need it if col.get("isColumnReference"): - sql_expression = self.database.quote_identifier(sql_expression) + # Check if already quoted to avoid double-quoting + if not ( + sql_expression.startswith('"') and sql_expression.endswith('"') + ): + sql_expression = self.database.quote_identifier(sql_expression) expression = self._process_select_expression( expression=sql_expression, From cebc0f43f15d65d3e80b0278a0e5f4e63e6767f1 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Tue, 4 Nov 2025 11:48:15 +0300 Subject: [PATCH 16/18] fix: ci --- tests/unit_tests/models/helpers_test.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index e0c43916cdfb..0c3d89ef1234 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -29,6 +29,8 @@ from sqlalchemy.orm.session import Session from sqlalchemy.pool import StaticPool +from superset.superset_typing import AdhocColumn + if TYPE_CHECKING: from superset.models.core import Database @@ -474,9 +476,9 @@ def condition_factory(col_name: str, expr): has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql - assert ( - has_single_quotes or has_double_quotes - ), "Others literal should be quoted with either single or double quotes" + assert has_single_quotes or has_double_quotes, ( + "Others literal should be quoted with either single or double quotes" + ) # Verify the structure of the generated SQL assert "CASE WHEN" in select_sql @@ -1118,13 +1120,13 @@ def test_process_select_expression_end_to_end(database: Database) -> None: # sqlglot may normalize the SQL slightly, so we check the result exists # and doesn't contain the SELECT prefix assert result is not None, f"Failed to process: {expression}" - assert not result.upper().startswith( - "SELECT" - ), f"Result still has SELECT prefix: {result}" + assert not result.upper().startswith("SELECT"), ( + f"Result still has SELECT prefix: {result}" + ) # The result should contain the core expression (case-insensitive check) - assert ( - expected.replace(" ", "").lower() in result.replace(" ", "").lower() - ), f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + assert expected.replace(" ", "").lower() in result.replace(" ", "").lower(), ( + f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + ) def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: @@ -1143,7 +1145,7 @@ def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: ) # Test 1: Column reference with spaces should be quoted - col_with_spaces = { + col_with_spaces: AdhocColumn = { "sqlExpression": "Customer Name", "label": "Customer Name", "isColumnReference": True, @@ -1175,7 +1177,7 @@ def test_adhoc_column_to_sqla_column_reference_already_quoted( ) # Test already quoted column - already_quoted_col = { + already_quoted_col: AdhocColumn = { "sqlExpression": '"Already Quoted"', "label": "Already Quoted", "isColumnReference": True, From 7b28a5d2c1500267820aa95b5b139e887efb8766 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Fri, 7 Nov 2025 20:13:18 +0300 Subject: [PATCH 17/18] fix: remove redundant check --- superset/connectors/sqla/models.py | 6 +-- tests/unit_tests/models/helpers_test.py | 50 +++++-------------------- 2 files changed, 10 insertions(+), 46 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 99e05918d51e..a4c12dd00cc8 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1506,11 +1506,7 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals # For column references, conditionally quote identifiers that need it if col.get("isColumnReference"): - # Check if already quoted to avoid double-quoting - if not ( - sql_expression.startswith('"') and sql_expression.endswith('"') - ): - sql_expression = self.database.quote_identifier(sql_expression) + sql_expression = self.database.quote_identifier(sql_expression) expression = self._process_select_expression( expression=sql_expression, diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index 0c3d89ef1234..4f0dd10bae2d 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -476,9 +476,9 @@ def condition_factory(col_name: str, expr): has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql - assert has_single_quotes or has_double_quotes, ( - "Others literal should be quoted with either single or double quotes" - ) + assert ( + has_single_quotes or has_double_quotes + ), "Others literal should be quoted with either single or double quotes" # Verify the structure of the generated SQL assert "CASE WHEN" in select_sql @@ -1120,13 +1120,13 @@ def test_process_select_expression_end_to_end(database: Database) -> None: # sqlglot may normalize the SQL slightly, so we check the result exists # and doesn't contain the SELECT prefix assert result is not None, f"Failed to process: {expression}" - assert not result.upper().startswith("SELECT"), ( - f"Result still has SELECT prefix: {result}" - ) + assert not result.upper().startswith( + "SELECT" + ), f"Result still has SELECT prefix: {result}" # The result should contain the core expression (case-insensitive check) - assert expected.replace(" ", "").lower() in result.replace(" ", "").lower(), ( - f"Expected '{expected}' to be in result '{result}' for input '{expression}'" - ) + assert ( + expected.replace(" ", "").lower() in result.replace(" ", "").lower() + ), f"Expected '{expected}' to be in result '{result}' for input '{expression}'" def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: @@ -1158,35 +1158,3 @@ def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: result_str = str(result) assert '"Customer Name"' in result_str - - -def test_adhoc_column_to_sqla_column_reference_already_quoted( - database: Database, -) -> None: - """ - Test that adhoc_column_to_sqla handles already quoted column names correctly. - - When isColumnReference is true but the column is already quoted, - it should not be double-quoted. - """ - from superset.connectors.sqla.models import SqlaTable - - table = SqlaTable( - table_name="test_table", - database=database, - ) - - # Test already quoted column - already_quoted_col: AdhocColumn = { - "sqlExpression": '"Already Quoted"', - "label": "Already Quoted", - "isColumnReference": True, - } - - result = table.adhoc_column_to_sqla(already_quoted_col) - - # Should not be double-quoted - assert result is not None - result_str = str(result) - assert '""' not in result_str # No double quotes - assert '"Already Quoted"' in result_str From a9d62a21474f5c3545eb7f0ad257f9985afb1068 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Mon, 10 Nov 2025 11:05:57 +0300 Subject: [PATCH 18/18] fix: format --- tests/unit_tests/models/helpers_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index 4f0dd10bae2d..6f434c4f64ea 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -476,9 +476,9 @@ def condition_factory(col_name: str, expr): has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql - assert ( - has_single_quotes or has_double_quotes - ), "Others literal should be quoted with either single or double quotes" + assert has_single_quotes or has_double_quotes, ( + "Others literal should be quoted with either single or double quotes" + ) # Verify the structure of the generated SQL assert "CASE WHEN" in select_sql @@ -1120,13 +1120,13 @@ def test_process_select_expression_end_to_end(database: Database) -> None: # sqlglot may normalize the SQL slightly, so we check the result exists # and doesn't contain the SELECT prefix assert result is not None, f"Failed to process: {expression}" - assert not result.upper().startswith( - "SELECT" - ), f"Result still has SELECT prefix: {result}" + assert not result.upper().startswith("SELECT"), ( + f"Result still has SELECT prefix: {result}" + ) # The result should contain the core expression (case-insensitive check) - assert ( - expected.replace(" ", "").lower() in result.replace(" ", "").lower() - ), f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + assert expected.replace(" ", "").lower() in result.replace(" ", "").lower(), ( + f"Expected '{expected}' to be in result '{result}' for input '{expression}'" + ) def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: