Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 51 additions & 31 deletions superset/security/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,27 +725,36 @@ def _validate_child_in_parent_multilayer(
except (json.JSONDecodeError, TypeError):
return False

def has_drill_by_access(
def has_drill_access(
self,
form_data: dict[str, Any],
dashboard: "Dashboard",
datasource: "BaseDatasource | Explorable",
) -> bool:
"""
Return True if the form_data is performing a supported drill by operation,
False otherwise.
Return True if the form_data is performing a supported drill operation
(Drill to Detail or Drill By), False otherwise.

:param form_data: The form_data included in the request.
:param dashboard: The dashboard the user is drilling from.
:param datasource: The datasource being queried
:returns: Whether the user has drill by access.
:returns: Whether the user has drill access.
"""

from superset.models.slice import Slice

# Drill to Detail: no slice/chart context, dataset must belong to the dashboard
if (
form_data.get("slice_id") is None
and form_data.get("chart_id") is None
and datasource in dashboard.datasources
):
return True

# Drill By: slice_id is 0 (sentinel), chart_id identifies the source chart,
# and the requested groupby columns must be drillable
return bool(
form_data.get("type") != "NATIVE_FILTER"
and form_data.get("slice_id") == 0
form_data.get("slice_id") == 0
and (chart_id := form_data.get("chart_id"))
and (
slc := self.session.query(Slice)
Expand Down Expand Up @@ -2630,40 +2639,51 @@ def raise_for_access( # noqa: C901
)
)
or (
# Chart.
form_data.get("type") != "NATIVE_FILTER"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only real changes to this block are:

  1. I noticed we were validating form_data.get("type") != "NATIVE_FILTER" more than once, so I moved it to the top for single validation;
  2. Re-named the method from has_drill_by_access to has_drill_access.

I can undo #1 if we think it's better

and (slice_id := form_data.get("slice_id"))
and (
# Direct chart access (no parent)
(
form_data.get("parent_slice_id") is None
# Chart.
(slice_id := form_data.get("slice_id"))
and (
slc := self.session.query(Slice)
.filter(Slice.id == slice_id)
.one_or_none()
# Direct chart access (no parent)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

holly molly, maybe it's time to refactor this method ...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll try tackling that in a follow up. I believe the logic on its own is correct, but we could move each validation block to their own helper method (like drill validation is now)

(
form_data.get("parent_slice_id") is None
and (
slc := self.session.query(Slice)
.filter(Slice.id == slice_id)
.one_or_none()
)
and slc in dashboard_.slices
and slc.datasource == datasource
)
or
# Multi-layer chart child access (has parent)
(
(
parent_id := form_data.get(
"parent_slice_id"
)
)
and (
parent_slc := self.session.query(Slice)
.filter(Slice.id == parent_id)
.one_or_none()
)
and parent_slc in dashboard_.slices
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The multi-layer child access path validates parent_slice_id and child membership in deck_slices, but it never verifies that the requested datasource actually belongs to that child chart. Because datasource authorization is granted when this branch is true, a forged request can pair a valid parent/child chart combination with an unrelated datasource and bypass datasource access checks. Add an explicit lookup of the child slice and ensure its datasource matches the requested datasource before granting access. [security]

Severity Level: Critical 🚨
- ❌ Embedded guests query unauthorized datasets via ChartDataRestApi.data endpoint.
- ❌ Dashboard RBAC users bypass datasource checks for deck_multi children.
- ⚠️ Inconsistent child datasource validation in SupersetSecurityManager.raise_for_access.
Suggested change
and parent_slc in dashboard_.slices
and (
child_slc := self.session.query(Slice)
.filter(Slice.id == slice_id)
.one_or_none()
)
and parent_slc in dashboard_.slices
and child_slc.datasource == datasource
Steps of Reproduction ✅
1. Configure a dashboard with roles or embedded access so that either DASHBOARD_RBAC or
EMBEDDED_SUPERSET is enabled and used (checked in
`SupersetSecurityManager.raise_for_access` at `superset/security/manager.py:162-179` and
`:269-271` via `is_feature_enabled("DASHBOARD_RBAC")` /
`is_feature_enabled("EMBEDDED_SUPERSET")`).

2. On that dashboard, add a `deck_multi` chart whose parent slice has a `deck_slices`
configuration including a child chart ID (validated by
`_validate_child_in_parent_multilayer` at `superset/security/manager.py:702-724`),
ensuring the child chart uses dataset A; also create another dataset B that the same
user/guest does NOT have `datasource_access` or schema access to.

3. As an embedded guest user (feature flag `EMBEDDED_SUPERSET`) or a RBAC viewer of that
dashboard, send a POST request to `ChartDataRestApi.data` at `/api/v1/chart/data`
(`superset/charts/data/api.py:280-99`) with a JSON body that `ChartDataQueryContextSchema`
accepts, where `datasource` points to dataset B (e.g. `{"id": <B_id>, "type": "table"}`)
and `form_data` contains `{"dashboardId": <dashboard_id>, "type": "DRILL_DETAIL",
"slice_id": <child_slice_id>, "parent_slice_id": <parent_slice_id>, ...}` so that
`slice_id` is the valid child from the parent's `deck_slices`.

4. The request flows through `ChartDataCommand.validate`
(`superset/commands/chart/data/get_data_command.py:39-40`) which calls
`QueryContext.raise_for_access` (`superset/common/query_context.py:18`), which delegates
to `QueryContextProcessor.raise_for_access` and then
`security_manager.raise_for_access(query_context=self._query_context)`
(`superset/common/query_context_processor.py:525-537);` inside
`SupersetSecurityManager.raise_for_access` (`superset/security/manager.py:145-180` and
`:2445-2647`), the user fails the normal schema/datasource/ownership checks, but passes
the multi-layer child branch at lines `2644-35` because `parent_slc` is in
`dashboard_.slices` and `_validate_child_in_parent_multilayer` returns True, and there is
currently no `child_slc.datasource == datasource` validation—so no
`SupersetSecurityException` is raised and the query executes against dataset B, returning
unauthorized data.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** superset/security/manager.py
**Line:** 2672:2672
**Comment:**
	*Security: The multi-layer child access path validates `parent_slice_id` and child membership in `deck_slices`, but it never verifies that the requested datasource actually belongs to that child chart. Because datasource authorization is granted when this branch is true, a forged request can pair a valid parent/child chart combination with an unrelated datasource and bypass datasource access checks. Add an explicit lookup of the child slice and ensure its datasource matches the requested datasource before granting access.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is outside the scope of this PR. I'll tag @msyavuz here as he originally worked on this logic. Let me know if this is a concern, and if I can help with anything

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The child slice is validated against the parent's config via _validate_child_in_parent_multilayer this ensures the slice_id is actually a child of the parent_slice_id in the deck_multi chart's deck_slices param. This is a false positive

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for confirming it!

# Validate child is actually part of parent's config # noqa: E501
and self._validate_child_in_parent_multilayer( # noqa: E501
child_slice_id=slice_id,
parent_slice=parent_slc,
)
)
)
and slc in dashboard_.slices
and slc.datasource == datasource
)
or
# Multi-layer chart child access (has parent)
(
(parent_id := form_data.get("parent_slice_id"))
and (
parent_slc := self.session.query(Slice)
.filter(Slice.id == parent_id)
.one_or_none()
)
and parent_slc in dashboard_.slices
# Validate child is actually part of parent's config
and self._validate_child_in_parent_multilayer(
child_slice_id=slice_id,
parent_slice=parent_slc,
)
# D2D or Drill By
or self.has_drill_access(
form_data, dashboard_, datasource
)
)
)
or self.has_drill_by_access(form_data, dashboard_, datasource)
)
and self.can_access_dashboard(dashboard_)
)
Expand Down
7 changes: 6 additions & 1 deletion superset/views/datasource/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,15 @@ def get_samples( # pylint: disable=too-many-arguments
force: bool = False,
page: int = 1,
per_page: int = 1000,
payload: Optional[SamplesPayloadSchema] = None,
payload: SamplesPayloadSchema | None = None,
dashboard_id: int | None = None,
) -> dict[str, Any]:
datasource = DatasourceDAO.get_datasource(
datasource_type=datasource_type,
database_id_or_uuid=str(datasource_id),
)

form_data = {"dashboardId": dashboard_id} if dashboard_id else None
limit_clause = get_limit_clause(page, per_page)

# todo(yongjie): Constructing count(*) and samples in the same query_context,
Expand All @@ -112,6 +114,7 @@ def get_samples( # pylint: disable=too-many-arguments
"id": datasource.id,
},
queries=[limit_clause],
form_data=form_data,
result_type=ChartDataResultType.SAMPLES,
force=force,
)
Expand All @@ -128,6 +131,7 @@ def get_samples( # pylint: disable=too-many-arguments
"id": datasource.id,
},
queries=[{**payload, **limit_clause}],
form_data=form_data,
result_type=ChartDataResultType.DRILL_DETAIL,
force=force,
)
Expand All @@ -148,6 +152,7 @@ def get_samples( # pylint: disable=too-many-arguments
"id": datasource.id,
},
queries=[{**payload, **count_star_metric} if payload else count_star_metric],
form_data=form_data,
result_type=ChartDataResultType.FULL,
force=force,
)
Expand Down
8 changes: 4 additions & 4 deletions superset/views/datasource/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,15 @@ def samples(self) -> FlaskResponse:
payload = SamplesPayloadSchema().load(request.json)
except ValidationError as err:
return json_error_response(err.messages, status=400)

dashboard_id = None
if security_manager.is_guest_user():
if not params["dashboard_id"]:
return json_error_response(_("Forbidden"), status=403)
dashboard_id = params["dashboard_id"]
dataset = DatasetDAO.find_by_id(
params["datasource_id"], skip_base_filter=True
)
dashboard = DashboardDAO.find_by_id(
params["dashboard_id"], skip_base_filter=True
)
dashboard = DashboardDAO.find_by_id(dashboard_id, skip_base_filter=True)
if not (dashboard and dataset):
return self.response_404()
if not security_manager.can_drill_dataset_via_dashboard_access(
Expand All @@ -237,6 +236,7 @@ def samples(self) -> FlaskResponse:
page=params["page"],
per_page=params["per_page"],
payload=payload,
dashboard_id=dashboard_id,
)
return self.json_response({"result": rv})

Expand Down
21 changes: 15 additions & 6 deletions tests/integration_tests/datasource_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pytest
from flask import current_app

from superset import db
from superset import db, security_manager as sm
from superset.commands.dataset.exceptions import DatasetNotFoundError
from superset.common.utils.query_cache_manager import QueryCacheManager
from superset.connectors.sqla.models import ( # noqa: F401
Expand Down Expand Up @@ -531,17 +531,26 @@ def test_get_samples_embedded_user(
self, mock_has_guest_access, mock_is_guest_user, mock_rls
):
"""
Embedded user can access the /samples view.
Embedded guest user can access /samples (for D2D) via the dashboard context
passed as form_data to QueryContextFactory.
"""
self.login(ADMIN_USERNAME)
# Gamma role doesn't have dataset access (mimic embedded role),
# but needs access to the /samples endpoint
gamma_role = sm.find_role("Gamma")
perm_view = sm.find_permission_view_menu("can_samples", "Datasource")
sm.add_permission_role(gamma_role, perm_view)
self.login(GAMMA_USERNAME)
mock_is_guest_user.return_value = True
mock_has_guest_access.return_value = True
mock_rls.return_value = []
tbl = self.get_table(name="birth_names")
dash = self.get_dash_by_slug("births")
uri = f"/datasource/samples?datasource_id={tbl.id}&datasource_type=table&dashboard_id={dash.id}" # noqa: E501
resp = self.client.post(uri, json={})
assert resp.status_code == 200
try:
uri = f"/datasource/samples?datasource_id={tbl.id}&datasource_type=table&dashboard_id={dash.id}" # noqa: E501
resp = self.client.post(uri, json={})
assert resp.status_code == 200
finally:
sm.del_permission_role(gamma_role, perm_view)

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@mock.patch(
Expand Down
100 changes: 99 additions & 1 deletion tests/integration_tests/security/guest_token_security_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,100 @@ def test_raise_for_access__no_dashboard_in_form_data(self):
}
)

def test_raise_for_access__no_chart_in_form_data(self):
def test_raise_for_access__drill_to_detail_happy_path(self):
"""
Drill to Detail: no slice_id in form_data, datasource is on the dashboard
the embedded user has access to.
"""
g.user = self.authorized_guest
for kwarg in ["viz", "query_context"]:
security_manager.raise_for_access(
**{
kwarg: Mock(
datasource=self.datasource,
form_data={
"dashboardId": self.dash.id,
},
slice_=None,
queries=[],
)
}
)

def test_raise_for_access__drill_to_detail_datasource_not_on_dashboard(self):
"""
Drill to Detail is denied when the target datasource is not associated
with the dashboard the embedded user has access to.
"""
g.user = self.authorized_guest
for kwarg in ["viz", "query_context"]:
with self.assertRaises(SupersetSecurityException): # noqa: PT027
security_manager.raise_for_access(
**{
kwarg: Mock(
datasource=self.other_datasource,
form_data={
"dashboardId": self.dash.id,
},
slice_=None,
queries=[],
)
}
)

def test_raise_for_access__drill_by_happy_path(self):
"""
Drill By: slice_id=0 (sentinel), chart_id points to a chart on the dashboard
whose datasource matches, the requested groupby column is drillable and the
embedded user has access to.
"""
g.user = self.authorized_guest
for kwarg in ["viz", "query_context"]:
security_manager.raise_for_access(
**{
kwarg: Mock(
datasource=self.datasource,
form_data={
"dashboardId": self.dash.id,
"slice_id": 0,
"chart_id": self.chart.id,
"groupby": ["gender"],
},
slice_=None,
queries=[],
)
}
)

def test_raise_for_access__drill_by_chart_not_on_dashboard(self):
"""
Drill By is denied when chart_id refers to a chart that is not on the
dashboard the embedded user has access to.
"""
g.user = self.authorized_guest
for kwarg in ["viz", "query_context"]:
with self.assertRaises(SupersetSecurityException): # noqa: PT027
security_manager.raise_for_access(
**{
kwarg: Mock(
datasource=self.other_datasource,
form_data={
"dashboardId": self.dash.id,
"slice_id": 0,
"chart_id": self.other_chart.id,
"groupby": ["gender"],
},
slice_=None,
queries=[],
)
}
)

def test_raise_for_access__drill_by_columns_not_drillable(self):
"""
Drill By is denied when the requested groupby columns are not marked as
drillable (groupby=True) on the datasource.
"""
g.user = self.authorized_guest
for kwarg in ["viz", "query_context"]:
with self.assertRaises(SupersetSecurityException): # noqa: PT027
Expand All @@ -366,7 +459,12 @@ def test_raise_for_access__no_chart_in_form_data(self):
datasource=self.datasource,
form_data={
"dashboardId": self.dash.id,
"slice_id": 0,
"chart_id": self.chart.id,
"groupby": ["__not_a_drillable_column__"],
},
slice_=None,
queries=[],
)
}
)
Expand Down
23 changes: 13 additions & 10 deletions tests/integration_tests/security_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1895,16 +1895,19 @@ def test_raise_for_access_rbac(
}
)

# Undefined dashboard chart.
with self.assertRaises(SupersetSecurityException): # noqa: PT027
security_manager.raise_for_access(
**{
kwarg: Mock(
datasource=birth_names,
form_data={"dashboardId": births.id},
)
}
)
# Drill to Detail (no slice_id/chart_id): datasource on dashboard.
# Access is granted via DASHBOARD_RBAC — D2D is a valid operation
# for users who have dashboard access.
security_manager.raise_for_access(
**{
kwarg: Mock(
datasource=birth_names,
form_data={"dashboardId": births.id},
slice_=None,
queries=[],
)
}
)

# Ill-defined dashboard chart.
with self.assertRaises(SupersetSecurityException): # noqa: PT027
Expand Down
Loading