diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 3afe9fabeae..73a60de1ede 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1509,33 +1509,48 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals :rtype: sqlalchemy.sql.column """ label = utils.get_column_name(col) - try: - sql_expression = col["sqlExpression"] - - # For column references, conditionally quote identifiers that need it - if col.get("isColumnReference"): - sql_expression = self.database.quote_identifier(sql_expression) - - expression = self._process_select_expression( - expression=sql_expression, - database_id=self.database_id, - engine=self.database.backend, - schema=self.schema, - template_processor=template_processor, - ) - except SupersetSecurityException as ex: - raise QueryObjectValidationError(ex.message) from ex + sql_expression = col["sqlExpression"] time_grain = col.get("timeGrain") has_timegrain = col.get("columnType") == "BASE_AXIS" and time_grain is_dttm = False pdf = None - if col_in_metadata := self.get_column(expression): + is_column_reference = col.get("isColumnReference") + + # First, check if this is a column reference that exists in metadata + col_in_metadata = None + if is_column_reference: + col_in_metadata = self.get_column(sql_expression) + + if col_in_metadata: + # Column exists in metadata - use it directly sqla_column = col_in_metadata.get_sqla_col( template_processor=template_processor ) is_dttm = col_in_metadata.is_temporal pdf = col_in_metadata.python_date_format else: + # Column doesn't exist in metadata or is not a reference - treat as ad-hoc + # expression Note: If isColumnReference=true but column not found, we still + # quote it as a fallback for backwards compatibility, though this indicates + # the frontend sent incorrect metadata + try: + # For column references, conditionally quote identifiers that need it + expression_to_process = sql_expression + if is_column_reference: + expression_to_process = self.database.quote_identifier( + sql_expression + ) + + expression = self._process_select_expression( + expression=expression_to_process, + database_id=self.database_id, + engine=self.database.backend, + schema=self.schema, + template_processor=template_processor, + ) + except SupersetSecurityException as ex: + raise QueryObjectValidationError(ex.message) from ex + sqla_column = literal_column(expression) if has_timegrain or force_type_check: try: diff --git a/superset/utils/log.py b/superset/utils/log.py index 81ea1bab0bf..51e91e1a37f 100644 --- a/superset/utils/log.py +++ b/superset/utils/log.py @@ -408,8 +408,19 @@ def log( # pylint: disable=too-many-arguments,too-many-locals db.session.bulk_save_objects(logs) db.session.commit() # pylint: disable=consider-using-transaction except SQLAlchemyError as ex: + # Log errors but don't raise - logging failures should not break the + # application. Common in tests where the session may be in prepared state or + # db is locked logging.error("DBEventLogger failed to log event(s)") logging.exception(ex) + # Rollback to clean up the session state + try: + db.session.rollback() + except Exception: # pylint: disable=broad-except + # If rollback also fails, just continue - don't let issues crash the app + logging.error( + "DBEventLogger failed to rollback the session after failure" + ) class StdOutEventLogger(AbstractEventLogger): diff --git a/tests/integration_tests/query_context_tests.py b/tests/integration_tests/query_context_tests.py index 21694580aa9..dedc9b97820 100644 --- a/tests/integration_tests/query_context_tests.py +++ b/tests/integration_tests/query_context_tests.py @@ -864,12 +864,14 @@ def test_time_column_with_time_grain(app_context, physical_dataset): "label": "I_AM_AN_ORIGINAL_COLUMN", "sqlExpression": "col5", "timeGrain": "P1Y", + "isColumnReference": True, } adhoc_column: AdhocColumn = { "label": "I_AM_A_TRUNC_COLUMN", "sqlExpression": "col6", "columnType": "BASE_AXIS", "timeGrain": "P1Y", + "isColumnReference": True, } qc = QueryContextFactory().create( datasource={ @@ -1039,6 +1041,7 @@ def test_time_grain_and_time_offset_with_base_axis(app_context, physical_dataset "sqlExpression": "col6", "columnType": "BASE_AXIS", "timeGrain": "P3M", + "isColumnReference": True, } qc = QueryContextFactory().create( datasource={ @@ -1152,6 +1155,7 @@ def test_time_offset_with_temporal_range_filter(app_context, physical_dataset): "sqlExpression": "col6", "columnType": "BASE_AXIS", "timeGrain": "P3M", + "isColumnReference": True, } ], "metrics": [ diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index dfbcd9a278e..d1b5721822b 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -1395,20 +1395,24 @@ def test_reapply_query_filters_with_empty_filters(database: Database) -> None: def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: """ - Test that adhoc_column_to_sqla - properly quotes column identifiers when isColumnReference is true. + Test that adhoc_column_to_sqla properly handles column references + by looking up the column in metadata instead of quoting and processing through + SQLGlot. - This tests the fix for column names with spaces being properly quoted - before being processed by SQLGlot to prevent "column AS alias" misinterpretation. + This tests the fix for column names with spaces being properly handled + without going through SQLGlot which could misinterpret "column AS alias" patterns. """ - from superset.connectors.sqla.models import SqlaTable + from superset.connectors.sqla.models import SqlaTable, TableColumn table = SqlaTable( table_name="test_table", database=database, + columns=[ + TableColumn(column_name="Customer Name", type="TEXT"), + ], ) - # Test 1: Column reference with spaces should be quoted + # Test: Column reference with spaces should be found in metadata col_with_spaces: AdhocColumn = { "sqlExpression": "Customer Name", "label": "Customer Name", @@ -1417,8 +1421,223 @@ def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None: result = table.adhoc_column_to_sqla(col_with_spaces) - # Should contain the quoted column name + # Should return a valid SQLAlchemy column + assert result is not None + result_str = str(result) + + # The column name should be present (may or may not be quoted depending on dialect) + assert "Customer Name" in result_str or '"Customer Name"' in result_str + + +def test_adhoc_column_to_sqla_preserves_column_type_for_time_grain( + database: Database, +) -> None: + """ + Test that adhoc_column_to_sqla preserves column type info in column references. + + This tests the fix where column references now look up metadata first, preserving + type information needed for time grain operations. Previously, quoting the column + name before metadata lookup would cause the column to not be found, resulting in + NULL type and failing to apply time grain transformations properly. + + The test verifies that: + 1. Column metadata is found by looking up the unquoted column name + 2. The column type (DATE) is preserved when creating the SQLAlchemy column + 3. The get_timestamp_expr method is properly called with the column type info + """ + from superset.connectors.sqla.models import SqlaTable, TableColumn + + # Create a table with a temporal column + table = SqlaTable( + table_name="test_table", + database=database, + columns=[ + TableColumn( + column_name="local_date", + type="DATE", + is_dttm=True, + ) + ], + ) + + # Test with a DATE column reference with time grain + date_col: AdhocColumn = { + "sqlExpression": "local_date", + "label": "local_date", + "isColumnReference": True, + "timeGrain": "P1D", # Daily time grain + "columnType": "BASE_AXIS", + } + + # Should not raise ColumnNotFoundException + result = table.adhoc_column_to_sqla(date_col) + assert result is not None result_str = str(result) - assert '"Customer Name"' in result_str + # Verify the column name is present (may be quoted depending on dialect) + assert "local_date" in result_str + + +def test_adhoc_column_to_sqla_with_temporal_column_types(database: Database) -> None: + """ + Test that adhoc_column_to_sqla correctly handles different temporal column types. + + This verifies that for different temporal types (DATE, DATETIME, TIMESTAMP), + the column metadata is properly found and the column type is preserved, + allowing time grain operations to work correctly. + """ + from superset.connectors.sqla.models import SqlaTable, TableColumn + + # Test different temporal types + temporal_types = ["DATE", "DATETIME", "TIMESTAMP"] + + for type_name in temporal_types: + table = SqlaTable( + table_name="test_table", + database=database, + columns=[ + TableColumn( + column_name="time_col", + type=type_name, + is_dttm=True, + ) + ], + ) + + time_col: AdhocColumn = { + "sqlExpression": "time_col", + "label": "time_col", + "isColumnReference": True, + "timeGrain": "P1D", + "columnType": "BASE_AXIS", + } + + result = table.adhoc_column_to_sqla(time_col) + + assert result is not None + result_str = str(result) + + # Verify the column name is present + assert "time_col" in result_str + + +def test_adhoc_column_with_spaces_generates_quoted_sql(database: Database) -> None: + """ + Test that column names with spaces are properly quoted in the generated SQL. + + This verifies that even though we look up columns using unquoted names, + the final SQL still properly quotes column names that need quoting (like those with + spaces). + """ + + from superset.connectors.sqla.models import SqlaTable, TableColumn + + table = SqlaTable( + table_name="test_table", + database=database, + columns=[ + TableColumn(column_name="Customer Name", type="TEXT"), + TableColumn(column_name="Order Total", type="NUMERIC"), + ], + ) + + # Test column reference with spaces + col_with_spaces: AdhocColumn = { + "sqlExpression": "Customer Name", + "label": "Customer Name", + "isColumnReference": True, + } + + result = table.adhoc_column_to_sqla(col_with_spaces) + + # Compile the column to SQL to see how it's rendered + with database.get_sqla_engine() as engine: + sql = str( + result.compile( + dialect=engine.dialect, compile_kwargs={"literal_binds": True} + ) + ) + + # The SQL should quote the column name (SQLite uses double quotes) + # Column names with spaces MUST be quoted in SQL + assert '"Customer Name"' in sql, f"Expected quoted column name in SQL: {sql}" + + # Also test that it works in a query context + col_numeric: AdhocColumn = { + "sqlExpression": "Order Total", + "label": "Order Total", + "isColumnReference": True, + } + + result_numeric = table.adhoc_column_to_sqla(col_numeric) + + with database.get_sqla_engine() as engine: + sql_numeric = str( + result_numeric.compile( + dialect=engine.dialect, compile_kwargs={"literal_binds": True} + ) + ) + + assert '"Order Total"' in sql_numeric, ( + f"Expected quoted column name in SQL: {sql_numeric}" + ) + + +def test_adhoc_column_with_spaces_in_full_query(database: Database) -> None: + """ + Test that column names with spaces work correctly in a full SELECT query. + + This demonstrates that the fix properly handles column names with spaces + throughout the entire query generation process, with proper quoting in the final + SQL. + """ + import sqlalchemy as sa + + from superset.connectors.sqla.models import SqlaTable, TableColumn + + table = SqlaTable( + table_name="test_table", + database=database, + columns=[ + TableColumn(column_name="Customer Name", type="TEXT"), + TableColumn(column_name="Order Total", type="NUMERIC"), + ], + ) + + # Create adhoc columns for both columns with spaces + customer_col: AdhocColumn = { + "sqlExpression": "Customer Name", + "label": "Customer Name", + "isColumnReference": True, + } + + order_col: AdhocColumn = { + "sqlExpression": "Order Total", + "label": "Order Total", + "isColumnReference": True, + } + + # Get SQLAlchemy columns + customer_sqla = table.adhoc_column_to_sqla(customer_col) + order_sqla = table.adhoc_column_to_sqla(order_col) + + # Build a full query + tbl = table.get_sqla_table() + query = sa.select(customer_sqla, order_sqla).select_from(tbl) + + # Compile to SQL + with database.get_sqla_engine() as engine: + sql = str( + query.compile( + dialect=engine.dialect, compile_kwargs={"literal_binds": True} + ) + ) + + # Verify both column names are quoted in the final SQL + assert '"Customer Name"' in sql, f"Customer Name not properly quoted in SQL: {sql}" + assert '"Order Total"' in sql, f"Order Total not properly quoted in SQL: {sql}" + + # Verify SELECT and FROM clauses are present + assert "SELECT" in sql + assert "FROM" in sql