diff --git a/superset/models/helpers.py b/superset/models/helpers.py index ea8526bd7329..8a63b5d85429 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -2042,6 +2042,11 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma metrics_exprs = [] time_filters = [] + + # Process FROM clause early to populate removed_filters from virtual dataset + # templates before we decide whether to add time filters + tbl, cte = self.get_from_clause(template_processor) + if granularity: if granularity not in columns_by_name or not dttm_col: raise QueryObjectValidationError( @@ -2064,6 +2069,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma self.always_filter_main_dttm and self.main_dttm_col in self.dttm_cols and self.main_dttm_col != dttm_col.column_name + and self.main_dttm_col not in removed_filters ): time_filters.append( self.get_time_filter( @@ -2074,13 +2080,21 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma ) ) - time_filter_column = self.get_time_filter( - time_col=dttm_col, - start_dttm=from_dttm, - end_dttm=to_dttm, - template_processor=template_processor, + # Check if time filter should be skipped because it was handled in template. + # Check both the actual column name and __timestamp alias + should_skip_time_filter = ( + dttm_col.column_name in removed_filters + or utils.DTTM_ALIAS in removed_filters ) - time_filters.append(time_filter_column) + + if not should_skip_time_filter: + time_filter_column = self.get_time_filter( + time_col=dttm_col, + start_dttm=from_dttm, + end_dttm=to_dttm, + template_processor=template_processor, + ) + time_filters.append(time_filter_column) # Always remove duplicates by column name, as sometimes `metrics_exprs` # can have the same name as a groupby column (e.g. when users use @@ -2099,8 +2113,6 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma qry = sa.select(select_exprs) - tbl, cte = self.get_from_clause(template_processor) - if groupby_all_columns: qry = qry.group_by(*groupby_all_columns.values()) @@ -2145,7 +2157,17 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma is_metric_filter = True filter_grain = flt.get("grain") - if get_column_name(flt_col) in removed_filters: + # Check if this filter should be skipped because it was handled in + # template. Special handling for __timestamp alias: check both the + # alias and the actual column name + filter_col_name = get_column_name(flt_col) + should_skip_filter = filter_col_name in removed_filters + if not should_skip_filter and flt_col == utils.DTTM_ALIAS and col_obj: + # For __timestamp, also check if the actual datetime column was + # removed + should_skip_filter = col_obj.column_name in removed_filters + + if should_skip_filter: # Skip generating SQLA filter when the jinja template handles it. continue diff --git a/tests/unit_tests/models/test_time_filter_double_application.py b/tests/unit_tests/models/test_time_filter_double_application.py new file mode 100644 index 000000000000..b37c6ce4b71f --- /dev/null +++ b/tests/unit_tests/models/test_time_filter_double_application.py @@ -0,0 +1,369 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test for double time filter application in virtual datasets (Issue #34894)""" + +from __future__ import annotations + +from datetime import datetime + +from flask import Flask +from freezegun import freeze_time +from pytest_mock import MockerFixture + +from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn +from superset.models.core import Database +from superset.superset_typing import QueryObjectDict + + +def test_time_filter_applied_once_with_remove_filter_in_virtual_dataset( + mocker: MockerFixture, + app: Flask, +) -> None: + """ + Test that time filter is applied only once when using get_time_filter with + remove_filter=True in a virtual dataset. + + This test verifies the fix for GitHub issue #34894 where time filters were + being applied twice - once in the inner query (virtual dataset) and again in + the outer query. + + Expected behavior: When remove_filter=True is used in the virtual dataset, + the time filter should only be applied in the inner query, not in the outer query. + """ + # use a sqlite database just for the dttm conversion from its DB engine spec + database = Database( + id=1, + database_name="test_db", + sqlalchemy_uri="sqlite://", + ) + + # Create a virtual dataset that uses get_time_filter with + # remove_filter=True + virtual_dataset_sql = """ + SELECT * + FROM my_table + WHERE + dttm >= {{ + get_time_filter('dttm', remove_filter=True, target_type='DATE') + .from_expr + }} + AND dttm < {{ + get_time_filter('dttm', remove_filter=True, target_type='DATE') + .to_expr + }} + """ + + columns = [ + TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"), + TableColumn(column_name="value", type="INTEGER"), + ] + + virtual_dataset = SqlaTable( + table_name="virtual_dataset", + sql=virtual_dataset_sql, + columns=columns, + main_dttm_col="dttm", + database=database, + ) + + # Mock the security manager to avoid RLS checks + mocker.patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.is_guest_user", + return_value=False, + ) + + # Create a query object with a time filter + query_obj: QueryObjectDict = { + "granularity": "dttm", + "from_dttm": datetime(2024, 1, 1), + "to_dttm": datetime(2024, 1, 31), + "is_timeseries": False, + "filter": [ + { + "col": "dttm", + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + "metrics": [], + "columns": ["value"], + } + + with ( + freeze_time("2024-01-15"), + app.test_request_context( + json={ + "queries": [ + { + "filters": [ + { + "col": "dttm", + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + } + ], + } + ), + ): + # Get the query SQL + sqla_query = virtual_dataset.get_query_str_extended(query_obj, mutate=False) + generated_sql = sqla_query.sql.lower() + + # Verify that the time filter appears only in the inner query from the + # virtual dataset and NOT in the outer query WHERE clause + + # Check if there's a WHERE clause after the subquery closes + # Split by "as virtual_table" and check if WHERE is in outer part + if "as virtual_table" in generated_sql: + outer_part = generated_sql.split("as virtual_table")[1] + # Extract just the part before GROUP BY/ORDER BY/LIMIT to check for WHERE + outer_where_part = ( + outer_part.split("group by")[0] + if "group by" in outer_part + else outer_part + ) + outer_where_part = ( + outer_where_part.split("order by")[0] + if "order by" in outer_where_part + else outer_where_part + ) + outer_where_part = ( + outer_where_part.split("limit")[0] + if "limit" in outer_where_part + else outer_where_part + ) + has_outer_where = "where" in outer_where_part and "dttm" in outer_where_part + else: + has_outer_where = False + + # Assert that there's NO time filter in the outer query + outer_clause = outer_where_part if "outer_where_part" in locals() else "N/A" + assert not has_outer_where, ( + f"Time filter should only be applied in the inner query " + f"(virtual dataset), not in the outer query when using " + f"remove_filter=True. Found outer WHERE clause: {outer_clause}" + ) + + +def test_time_filter_removed_from_outer_query_simple_case( + mocker: MockerFixture, app: Flask +) -> None: + """ + Test the basic mechanism: when a Jinja template uses get_time_filter with + remove_filter=True, the filter should be added to removed_filters list and + not be applied in the SQLAlchemy query generation. + + This is a simpler test that verifies the core mechanism works at a single level. + """ + # use a sqlite database just for the dttm conversion from its DB engine spec + database = Database( + id=1, + database_name="test_db", + sqlalchemy_uri="sqlite://", + ) + + columns = [ + TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"), + TableColumn(column_name="value", type="INTEGER"), + ] + + # Simple query with Jinja template that removes the filter + simple_sql = """ + SELECT * + FROM my_table + WHERE dttm >= {{ + get_time_filter('dttm', remove_filter=True, target_type='DATE').from_expr + }} + """ + + dataset = SqlaTable( + table_name="test_table", + sql=simple_sql, + columns=columns, + main_dttm_col="dttm", + database=database, + ) + + mocker.patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.is_guest_user", + return_value=False, + ) + + query_obj: QueryObjectDict = { + "granularity": "dttm", + "from_dttm": datetime(2024, 1, 1), + "to_dttm": datetime(2024, 1, 31), + "is_timeseries": False, + "filter": [ + { + "col": "dttm", + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + "metrics": [], + "columns": ["value"], + } + + with ( + freeze_time("2024-01-15"), + app.test_request_context( + json={ + "queries": [ + { + "filters": [ + { + "col": "dttm", + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + } + ], + } + ), + ): + sqla_query = dataset.get_query_str_extended(query_obj, mutate=False) + generated_sql = sqla_query.sql.lower() + + # In this simple case, the filter should appear exactly once + # (in the template, not added by SQLAlchemy) + # Count occurrences of date filter patterns + dttm_count = generated_sql.count("dttm") + + # We expect to see dttm in the SELECT and WHERE (from template only) + # But NOT duplicated by SQLAlchemy's filter application + assert dttm_count <= 3, ( + f"Expected at most 3 occurrences of 'dttm' " + f"(SELECT, WHERE from template), but found {dttm_count}. " + f"This suggests the filter is being applied multiple times." + ) + + +def test_time_filter_with_timestamp_alias(mocker: MockerFixture, app: Flask) -> None: + """ + Test that remove_filter works correctly when using __timestamp alias. + + This test specifically addresses the issue where time filters use the __timestamp + alias in timeseries queries, but get_time_filter uses the actual column name. + The fix ensures both are recognized as the same column. + """ + # use a sqlite database just for the dttm conversion from its DB engine spec + database = Database( + id=1, + database_name="test_db", + sqlalchemy_uri="sqlite://", + ) + + columns = [ + TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"), + TableColumn(column_name="value", type="INTEGER"), + ] + + # Virtual dataset with Jinja template that removes the filter using + # actual column name + virtual_sql = """ + SELECT * + FROM my_table + WHERE dttm >= {{ + get_time_filter('dttm', remove_filter=True, target_type='DATE').from_expr + }} + AND dttm < {{ + get_time_filter('dttm', remove_filter=True, target_type='DATE').to_expr + }} + """ + + dataset = SqlaTable( + table_name="test_table", + sql=virtual_sql, + columns=columns, + main_dttm_col="dttm", + database=database, + metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")], + ) + + mocker.patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.is_guest_user", + return_value=False, + ) + + # Query using __timestamp alias (as timeseries charts do) + query_obj: QueryObjectDict = { + "granularity": "dttm", + "from_dttm": datetime(2024, 1, 1), + "to_dttm": datetime(2024, 1, 31), + "is_timeseries": True, # This will cause __timestamp alias to be used + "filter": [ + { + "col": "__timestamp", # Using the special alias + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + "metrics": ["count"], + "columns": [], + } + + with ( + freeze_time("2024-01-15"), + app.test_request_context( + json={ + "queries": [ + { + "filters": [ + { + "col": "__timestamp", + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + } + ], + } + ), + ): + sqla_query = dataset.get_query_str_extended(query_obj, mutate=False) + generated_sql = sqla_query.sql.lower() + + # The filter should only appear in the inner query (virtual dataset) + # NOT in the outer query WHERE clause + # Check if there's a WHERE clause after the subquery + parts = generated_sql.split(")") + if len(parts) > 1: + outer_part = parts[-1] + # The outer part should not have additional date filter conditions + # (It may have GROUP BY, ORDER BY, LIMIT, but not WHERE with dttm) + if "where" in outer_part: + assert "dttm" not in outer_part or "to_date" not in outer_part, ( + "Time filter should not be applied in outer query when using " + "remove_filter=True in virtual dataset. The __timestamp alias " + "should be recognized as equivalent to the actual column name." + )