Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 32 additions & 17 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1509,33 +1509,48 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals
:rtype: sqlalchemy.sql.column
"""
label = utils.get_column_name(col)
try:
sql_expression = col["sqlExpression"]

# For column references, conditionally quote identifiers that need it
if col.get("isColumnReference"):
sql_expression = self.database.quote_identifier(sql_expression)

expression = self._process_select_expression(
expression=sql_expression,
database_id=self.database_id,
engine=self.database.backend,
schema=self.schema,
template_processor=template_processor,
)
except SupersetSecurityException as ex:
raise QueryObjectValidationError(ex.message) from ex
sql_expression = col["sqlExpression"]
time_grain = col.get("timeGrain")
has_timegrain = col.get("columnType") == "BASE_AXIS" and time_grain
is_dttm = False
pdf = None
if col_in_metadata := self.get_column(expression):
is_column_reference = col.get("isColumnReference")

# First, check if this is a column reference that exists in metadata
col_in_metadata = None
if is_column_reference:
col_in_metadata = self.get_column(sql_expression)
Comment on lines +1517 to +1522
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do older saved charts have isColumnReference? If not will this it break for them?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They don't, but we have a fallback for old charts without the attribute.


if col_in_metadata:
# Column exists in metadata - use it directly
sqla_column = col_in_metadata.get_sqla_col(
template_processor=template_processor
)
is_dttm = col_in_metadata.is_temporal
pdf = col_in_metadata.python_date_format
else:
# Column doesn't exist in metadata or is not a reference - treat as ad-hoc
# expression Note: If isColumnReference=true but column not found, we still
# quote it as a fallback for backwards compatibility, though this indicates
# the frontend sent incorrect metadata
try:
# For column references, conditionally quote identifiers that need it
expression_to_process = sql_expression
if is_column_reference:
expression_to_process = self.database.quote_identifier(
sql_expression
)

expression = self._process_select_expression(
expression=expression_to_process,
database_id=self.database_id,
engine=self.database.backend,
schema=self.schema,
template_processor=template_processor,
)
except SupersetSecurityException as ex:
raise QueryObjectValidationError(ex.message) from ex

sqla_column = literal_column(expression)
if has_timegrain or force_type_check:
try:
Expand Down
11 changes: 11 additions & 0 deletions superset/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,19 @@ def log( # pylint: disable=too-many-arguments,too-many-locals
db.session.bulk_save_objects(logs)
db.session.commit() # pylint: disable=consider-using-transaction
except SQLAlchemyError as ex:
# Log errors but don't raise - logging failures should not break the
# application. Common in tests where the session may be in prepared state or
# db is locked
logging.error("DBEventLogger failed to log event(s)")
logging.exception(ex)
# Rollback to clean up the session state
try:
db.session.rollback()
except Exception: # pylint: disable=broad-except
# If rollback also fails, just continue - don't let issues crash the app
logging.error(
"DBEventLogger failed to rollback the session after failure"
)


class StdOutEventLogger(AbstractEventLogger):
Expand Down
4 changes: 4 additions & 0 deletions tests/integration_tests/query_context_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,14 @@ def test_time_column_with_time_grain(app_context, physical_dataset):
"label": "I_AM_AN_ORIGINAL_COLUMN",
"sqlExpression": "col5",
"timeGrain": "P1Y",
"isColumnReference": True,
}
adhoc_column: AdhocColumn = {
"label": "I_AM_A_TRUNC_COLUMN",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
"isColumnReference": True,
}
qc = QueryContextFactory().create(
datasource={
Expand Down Expand Up @@ -1039,6 +1041,7 @@ def test_time_grain_and_time_offset_with_base_axis(app_context, physical_dataset
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P3M",
"isColumnReference": True,
}
qc = QueryContextFactory().create(
datasource={
Expand Down Expand Up @@ -1152,6 +1155,7 @@ def test_time_offset_with_temporal_range_filter(app_context, physical_dataset):
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P3M",
"isColumnReference": True,
}
],
"metrics": [
Expand Down
235 changes: 227 additions & 8 deletions tests/unit_tests/models/helpers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,20 +1395,24 @@ def test_reapply_query_filters_with_empty_filters(database: Database) -> None:

def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None:
"""
Test that adhoc_column_to_sqla
properly quotes column identifiers when isColumnReference is true.
Test that adhoc_column_to_sqla properly handles column references
by looking up the column in metadata instead of quoting and processing through
SQLGlot.

This tests the fix for column names with spaces being properly quoted
before being processed by SQLGlot to prevent "column AS alias" misinterpretation.
This tests the fix for column names with spaces being properly handled
without going through SQLGlot which could misinterpret "column AS alias" patterns.
"""
from superset.connectors.sqla.models import SqlaTable
from superset.connectors.sqla.models import SqlaTable, TableColumn

table = SqlaTable(
table_name="test_table",
database=database,
columns=[
TableColumn(column_name="Customer Name", type="TEXT"),
],
)

# Test 1: Column reference with spaces should be quoted
# Test: Column reference with spaces should be found in metadata
col_with_spaces: AdhocColumn = {
"sqlExpression": "Customer Name",
"label": "Customer Name",
Expand All @@ -1417,8 +1421,223 @@ def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None:

result = table.adhoc_column_to_sqla(col_with_spaces)

# Should contain the quoted column name
# Should return a valid SQLAlchemy column
assert result is not None
result_str = str(result)

# The column name should be present (may or may not be quoted depending on dialect)
assert "Customer Name" in result_str or '"Customer Name"' in result_str


def test_adhoc_column_to_sqla_preserves_column_type_for_time_grain(
database: Database,
) -> None:
"""
Test that adhoc_column_to_sqla preserves column type info in column references.

This tests the fix where column references now look up metadata first, preserving
type information needed for time grain operations. Previously, quoting the column
name before metadata lookup would cause the column to not be found, resulting in
NULL type and failing to apply time grain transformations properly.

The test verifies that:
1. Column metadata is found by looking up the unquoted column name
2. The column type (DATE) is preserved when creating the SQLAlchemy column
3. The get_timestamp_expr method is properly called with the column type info
"""
from superset.connectors.sqla.models import SqlaTable, TableColumn

# Create a table with a temporal column
table = SqlaTable(
table_name="test_table",
database=database,
columns=[
TableColumn(
column_name="local_date",
type="DATE",
is_dttm=True,
)
],
)

# Test with a DATE column reference with time grain
date_col: AdhocColumn = {
"sqlExpression": "local_date",
"label": "local_date",
"isColumnReference": True,
"timeGrain": "P1D", # Daily time grain
"columnType": "BASE_AXIS",
}

# Should not raise ColumnNotFoundException
result = table.adhoc_column_to_sqla(date_col)

assert result is not None
result_str = str(result)

assert '"Customer Name"' in result_str
# Verify the column name is present (may be quoted depending on dialect)
assert "local_date" in result_str


def test_adhoc_column_to_sqla_with_temporal_column_types(database: Database) -> None:
"""
Test that adhoc_column_to_sqla correctly handles different temporal column types.

This verifies that for different temporal types (DATE, DATETIME, TIMESTAMP),
the column metadata is properly found and the column type is preserved,
allowing time grain operations to work correctly.
"""
from superset.connectors.sqla.models import SqlaTable, TableColumn

# Test different temporal types
temporal_types = ["DATE", "DATETIME", "TIMESTAMP"]

for type_name in temporal_types:
table = SqlaTable(
table_name="test_table",
database=database,
columns=[
TableColumn(
column_name="time_col",
type=type_name,
is_dttm=True,
)
],
)

time_col: AdhocColumn = {
"sqlExpression": "time_col",
"label": "time_col",
"isColumnReference": True,
"timeGrain": "P1D",
"columnType": "BASE_AXIS",
}

result = table.adhoc_column_to_sqla(time_col)

assert result is not None
result_str = str(result)

# Verify the column name is present
assert "time_col" in result_str


def test_adhoc_column_with_spaces_generates_quoted_sql(database: Database) -> None:
"""
Test that column names with spaces are properly quoted in the generated SQL.

This verifies that even though we look up columns using unquoted names,
the final SQL still properly quotes column names that need quoting (like those with
spaces).
"""

from superset.connectors.sqla.models import SqlaTable, TableColumn

table = SqlaTable(
table_name="test_table",
database=database,
columns=[
TableColumn(column_name="Customer Name", type="TEXT"),
TableColumn(column_name="Order Total", type="NUMERIC"),
],
)

# Test column reference with spaces
col_with_spaces: AdhocColumn = {
"sqlExpression": "Customer Name",
"label": "Customer Name",
"isColumnReference": True,
}

result = table.adhoc_column_to_sqla(col_with_spaces)

# Compile the column to SQL to see how it's rendered
with database.get_sqla_engine() as engine:
sql = str(
result.compile(
dialect=engine.dialect, compile_kwargs={"literal_binds": True}
)
)

# The SQL should quote the column name (SQLite uses double quotes)
# Column names with spaces MUST be quoted in SQL
assert '"Customer Name"' in sql, f"Expected quoted column name in SQL: {sql}"

# Also test that it works in a query context
col_numeric: AdhocColumn = {
"sqlExpression": "Order Total",
"label": "Order Total",
"isColumnReference": True,
}

result_numeric = table.adhoc_column_to_sqla(col_numeric)

with database.get_sqla_engine() as engine:
sql_numeric = str(
result_numeric.compile(
dialect=engine.dialect, compile_kwargs={"literal_binds": True}
)
)

assert '"Order Total"' in sql_numeric, (
f"Expected quoted column name in SQL: {sql_numeric}"
)


def test_adhoc_column_with_spaces_in_full_query(database: Database) -> None:
"""
Test that column names with spaces work correctly in a full SELECT query.

This demonstrates that the fix properly handles column names with spaces
throughout the entire query generation process, with proper quoting in the final
SQL.
"""
import sqlalchemy as sa

from superset.connectors.sqla.models import SqlaTable, TableColumn

table = SqlaTable(
table_name="test_table",
database=database,
columns=[
TableColumn(column_name="Customer Name", type="TEXT"),
TableColumn(column_name="Order Total", type="NUMERIC"),
],
)

# Create adhoc columns for both columns with spaces
customer_col: AdhocColumn = {
"sqlExpression": "Customer Name",
"label": "Customer Name",
"isColumnReference": True,
}

order_col: AdhocColumn = {
"sqlExpression": "Order Total",
"label": "Order Total",
"isColumnReference": True,
}

# Get SQLAlchemy columns
customer_sqla = table.adhoc_column_to_sqla(customer_col)
order_sqla = table.adhoc_column_to_sqla(order_col)

# Build a full query
tbl = table.get_sqla_table()
query = sa.select(customer_sqla, order_sqla).select_from(tbl)

# Compile to SQL
with database.get_sqla_engine() as engine:
sql = str(
query.compile(
dialect=engine.dialect, compile_kwargs={"literal_binds": True}
)
)

# Verify both column names are quoted in the final SQL
assert '"Customer Name"' in sql, f"Customer Name not properly quoted in SQL: {sql}"
assert '"Order Total"' in sql, f"Order Total not properly quoted in SQL: {sql}"

# Verify SELECT and FROM clauses are present
assert "SELECT" in sql
assert "FROM" in sql
Loading