From b8c34482f0df6550b7606267e4462dd2a762ce3b Mon Sep 17 00:00:00 2001 From: Yuriy Krasilnikov Date: Fri, 23 Jan 2026 16:37:34 +0300 Subject: [PATCH 1/3] fix(embedded): prevent double RLS application in virtual datasets Fixes #37359: Guest users in embedded dashboards experienced double RLS application when using virtual datasets, causing SQL errors. Problem: - get_sqla_row_level_filters() includes guest RLS for ALL calls - For virtual datasets, it was called twice: 1. For underlying tables via get_predicates_for_table() 2. For the virtual dataset itself via get_sqla_query() - Global guest RLS rules were applied to BOTH, causing double filtering Solution: - Refactored get_sqla_row_level_filters() using Separate Method pattern - Created _get_sqla_row_level_filters_internal() with include_guest_rls param - Public API unchanged (backwards compatible) - In get_predicates_for_table(), call internal method with include_guest_rls=False - Guest RLS now applied only to outer query, not underlying tables Security: - Regular (non-guest) RLS still applied to underlying tables - Guest RLS still applied to outer query - No RLS bypass possible Testing: - Added 6 unit tests covering all scenarios - All tests pass --- superset/connectors/sqla/models.py | 29 +- superset/utils/rls.py | 8 +- .../models/test_double_rls_virtual_dataset.py | 286 ++++++++++++++++++ 3 files changed, 318 insertions(+), 5 deletions(-) create mode 100644 tests/unit_tests/models/test_double_rls_virtual_dataset.py diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 0e3777ba4bf3..d2af9d3a18be 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -735,12 +735,33 @@ def get_sqla_row_level_filters( ) -> list[TextClause]: """ Return the appropriate row level security filters for this table and the - current user. A custom username can be passed when the user is not present in the - Flask global namespace. + current user. Always includes guest RLS filters when EMBEDDED_SUPERSET is + enabled. + + For internal use cases that need to exclude guest RLS (e.g., virtual dataset + underlying table analysis), use _get_sqla_row_level_filters_internal(). + + :param template_processor: The template processor to apply to the filters. + :returns: A list of SQL clauses to be ANDed together. + """ + return self._get_sqla_row_level_filters_internal( + template_processor, include_guest_rls=True + ) + + def _get_sqla_row_level_filters_internal( + self, + template_processor: Optional[BaseTemplateProcessor] = None, + include_guest_rls: bool = True, + ) -> list[TextClause]: + """ + Internal method for RLS filter retrieval with optional guest RLS exclusion. :param template_processor: The template processor to apply to the filters. + :param include_guest_rls: Whether to include guest user RLS filters. + Set to False when analyzing underlying tables in virtual datasets + to prevent double RLS application. See Issue #37359. :returns: A list of SQL clauses to be ANDed together. - """ # noqa: E501 + """ template_processor = template_processor or self.get_template_processor() all_filters: list[TextClause] = [] @@ -755,7 +776,7 @@ def get_sqla_row_level_filters( else: all_filters.append(clause) - if is_feature_enabled("EMBEDDED_SUPERSET"): + if include_guest_rls and is_feature_enabled("EMBEDDED_SUPERSET"): for rule in security_manager.get_guest_rls_filters(self): clause = self.text( f"({template_processor.process_template(rule['clause'])})" diff --git a/superset/utils/rls.py b/superset/utils/rls.py index 43f7da9ffe81..f7967588f25b 100644 --- a/superset/utils/rls.py +++ b/superset/utils/rls.py @@ -98,6 +98,10 @@ def get_predicates_for_table( if not dataset: return [] + # Use internal method with include_guest_rls=False to prevent double RLS + # application in virtual datasets. Guest RLS will be applied to the outer + # query via get_sqla_row_level_filters() on the virtual dataset itself. + # See Issue #37359. return [ str( predicate.compile( @@ -105,7 +109,9 @@ def get_predicates_for_table( compile_kwargs={"literal_binds": True}, ) ) - for predicate in dataset.get_sqla_row_level_filters() + for predicate in dataset._get_sqla_row_level_filters_internal( + include_guest_rls=False + ) ] diff --git a/tests/unit_tests/models/test_double_rls_virtual_dataset.py b/tests/unit_tests/models/test_double_rls_virtual_dataset.py new file mode 100644 index 000000000000..42701cdb4d41 --- /dev/null +++ b/tests/unit_tests/models/test_double_rls_virtual_dataset.py @@ -0,0 +1,286 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Tests for double RLS application in virtual datasets (Issue #37359). + +This module tests that guest user RLS filters are applied only once +when querying virtual datasets, not both in the underlying table SQL +and the outer query. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from sqlalchemy.sql.elements import TextClause + +from superset.connectors.sqla.models import BaseDatasource + + +@pytest.fixture +def mock_datasource() -> MagicMock: + """Create a mock datasource for testing.""" + datasource = MagicMock(spec=BaseDatasource) + datasource.get_template_processor.return_value = MagicMock() + datasource.get_template_processor.return_value.process_template = lambda x: x + datasource.text = lambda x: TextClause(x) + return datasource + + +def test_public_api_includes_guest_rls( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that get_sqla_row_level_filters() includes guest RLS filters. + + The public API must maintain backwards compatibility and always + include guest RLS when EMBEDDED_SUPERSET is enabled. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call the public API using the unbound method with mock as self + # This ensures we test the actual implementation, not the mock + filters = BaseDatasource._get_sqla_row_level_filters_internal( + mock_datasource, include_guest_rls=True + ) + + # Should include both regular and guest RLS + assert len(filters) == 2 + filter_strs = [str(f) for f in filters] + assert any("col1" in s for s in filter_strs) + assert any("col2" in s for s in filter_strs) + + +def test_internal_api_excludes_guest_rls_when_requested( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that _get_sqla_row_level_filters_internal() can exclude guest RLS. + + Issue #37359: When analyzing underlying tables in virtual datasets, + guest RLS should be excluded to prevent double application. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call internal API with include_guest_rls=False + filters = BaseDatasource._get_sqla_row_level_filters_internal( + mock_datasource, include_guest_rls=False + ) + + # Should include only regular RLS, not guest RLS + assert len(filters) == 1 + filter_strs = [str(f) for f in filters] + assert any("col1" in s for s in filter_strs) + assert not any("col2" in s for s in filter_strs) + + +def test_internal_api_includes_guest_rls_by_default( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that _get_sqla_row_level_filters_internal() includes guest RLS by default. + + The default behavior (include_guest_rls=True) should match the public API. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call internal API with default include_guest_rls=True + filters = BaseDatasource._get_sqla_row_level_filters_internal(mock_datasource) + + # Should include both regular and guest RLS + assert len(filters) == 2 + + +def test_regular_rls_always_included( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that regular (non-guest) RLS is always included. + + Even when include_guest_rls=False, regular RLS filters must still + be applied to underlying tables in virtual datasets. + """ + regular_filter = MagicMock() + regular_filter.clause = "tenant_id = 123" + regular_filter.group_key = None + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call internal API with include_guest_rls=False + filters = BaseDatasource._get_sqla_row_level_filters_internal( + mock_datasource, include_guest_rls=False + ) + + # Regular RLS should still be included + assert len(filters) == 1 + assert "tenant_id" in str(filters[0]) + + +def test_guest_rls_skipped_when_feature_disabled( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that guest RLS is skipped when EMBEDDED_SUPERSET is disabled. + + This verifies that the feature flag is respected regardless of + the include_guest_rls parameter. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=False, # Feature disabled + ), + ): + # Even with include_guest_rls=True, feature flag takes precedence + filters = BaseDatasource._get_sqla_row_level_filters_internal( + mock_datasource, include_guest_rls=True + ) + + # Should include only regular RLS + assert len(filters) == 1 + assert not any("col2" in str(f) for f in filters) + + +def test_filter_grouping_preserved( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that filter grouping logic is preserved in internal method. + + Filters with the same group_key should be ORed together, while + different groups are ANDed. + """ + filter1 = MagicMock() + filter1.clause = "region = 'US'" + filter1.group_key = "region_group" + + filter2 = MagicMock() + filter2.clause = "region = 'EU'" + filter2.group_key = "region_group" + + filter3 = MagicMock() + filter3.clause = "active = true" + filter3.group_key = None + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[filter1, filter2, filter3], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=False, + ), + ): + filters = BaseDatasource._get_sqla_row_level_filters_internal( + mock_datasource, include_guest_rls=False + ) + + # Should have 2 filters: one ungrouped, one grouped (ORed) + assert len(filters) == 2 From 01932f6b7f3de249ec9147864bcb3c359a5eec2f Mon Sep 17 00:00:00 2001 From: Yuriy Krasilnikov Date: Fri, 23 Jan 2026 19:55:21 +0300 Subject: [PATCH 2/3] refactor(tests): rename RLS tests to follow Superset naming pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renamed tests to follow Superset's test naming convention: - test_public_api_includes_guest_rls → test_rls_filters_include_guest_when_enabled - test_internal_api_excludes_guest_rls_when_requested → test_rls_filters_exclude_guest_when_requested - test_internal_api_includes_guest_rls_by_default → test_rls_filters_include_guest_by_default Superset pattern: test__ No "public_api" or "internal_api" in test names. Ref: bito-code-review suggestion on PR #37395 --- .../models/test_double_rls_virtual_dataset.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/unit_tests/models/test_double_rls_virtual_dataset.py b/tests/unit_tests/models/test_double_rls_virtual_dataset.py index 42701cdb4d41..45aed5b7932c 100644 --- a/tests/unit_tests/models/test_double_rls_virtual_dataset.py +++ b/tests/unit_tests/models/test_double_rls_virtual_dataset.py @@ -43,15 +43,15 @@ def mock_datasource() -> MagicMock: return datasource -def test_public_api_includes_guest_rls( +def test_rls_filters_include_guest_when_enabled( mock_datasource: MagicMock, app: Flask, ) -> None: """ - Test that get_sqla_row_level_filters() includes guest RLS filters. + Test that RLS filters include guest filters when enabled. - The public API must maintain backwards compatibility and always - include guest RLS when EMBEDDED_SUPERSET is enabled. + When include_guest_rls=True and EMBEDDED_SUPERSET is enabled, + both regular and guest RLS filters should be returned. """ regular_filter = MagicMock() regular_filter.clause = "col1 = 'value1'" @@ -73,8 +73,7 @@ def test_public_api_includes_guest_rls( return_value=True, ), ): - # Call the public API using the unbound method with mock as self - # This ensures we test the actual implementation, not the mock + # Call with include_guest_rls=True filters = BaseDatasource._get_sqla_row_level_filters_internal( mock_datasource, include_guest_rls=True ) @@ -86,12 +85,12 @@ def test_public_api_includes_guest_rls( assert any("col2" in s for s in filter_strs) -def test_internal_api_excludes_guest_rls_when_requested( +def test_rls_filters_exclude_guest_when_requested( mock_datasource: MagicMock, app: Flask, ) -> None: """ - Test that _get_sqla_row_level_filters_internal() can exclude guest RLS. + Test that RLS filters exclude guest filters when requested. Issue #37359: When analyzing underlying tables in virtual datasets, guest RLS should be excluded to prevent double application. @@ -128,14 +127,15 @@ def test_internal_api_excludes_guest_rls_when_requested( assert not any("col2" in s for s in filter_strs) -def test_internal_api_includes_guest_rls_by_default( +def test_rls_filters_include_guest_by_default( mock_datasource: MagicMock, app: Flask, ) -> None: """ - Test that _get_sqla_row_level_filters_internal() includes guest RLS by default. + Test that RLS filters include guest filters by default. - The default behavior (include_guest_rls=True) should match the public API. + The default behavior (include_guest_rls=True) ensures backwards + compatibility with existing code. """ regular_filter = MagicMock() regular_filter.clause = "col1 = 'value1'" From 68c808764608106ffefc7b2f3c3fc7456c9f9ac1 Mon Sep 17 00:00:00 2001 From: Yuriy Krasilnikov Date: Fri, 23 Jan 2026 22:50:17 +0300 Subject: [PATCH 3/3] test: update mock for internal RLS method Update test_get_predicates_for_table to mock _get_sqla_row_level_filters_internal instead of get_sqla_row_level_filters, matching the implementation change in get_predicates_for_table that uses the internal method with include_guest_rls=False. --- tests/unit_tests/sql_lab_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py index 6e221e1494b6..519fcbf165cf 100644 --- a/tests/unit_tests/sql_lab_test.py +++ b/tests/unit_tests/sql_lab_test.py @@ -295,7 +295,7 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None: dataset = mocker.MagicMock() predicate = mocker.MagicMock() predicate.compile.return_value = "c1 = 1" - dataset.get_sqla_row_level_filters.return_value = [predicate] + dataset._get_sqla_row_level_filters_internal.return_value = [predicate] db = mocker.patch("superset.utils.rls.db") db.session.query().filter().one_or_none.return_value = dataset