Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docs/static/resources/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -962,8 +962,11 @@
"ChartDataDatasource": {
"properties": {
"id": {
"description": "Datasource id",
"type": "integer"
"description": "Datasource id/uuid",
"oneOf": [
{ "type": "integer" },
{ "type": "string" }
]
},
"type": {
"description": "Datasource type",
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ dependencies = [
"markdown>=3.0",
# marshmallow>=4 has issues: https://github.com/apache/superset/issues/33162
"marshmallow>=3.0, <4",
"marshmallow-union>=0.1",
"msgpack>=1.0.0, <1.1",
"nh3>=0.2.11, <0.3",
"numpy>1.23.5, <2.3",
Expand Down Expand Up @@ -227,7 +228,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = "superset, apache-superset-core, apache-superset-extensions-cli"
known_third_party = "alembic, apispec, backoff, celery, click, colorama, cron_descriptor, croniter, cryptography, dateutil, deprecation, flask, flask_appbuilder, flask_babel, flask_caching, flask_compress, flask_jwt_extended, flask_login, flask_migrate, flask_sqlalchemy, flask_talisman, flask_testing, flask_wtf, freezegun, geohash, geopy, holidays, humanize, isodate, jinja2, jwt, markdown, markupsafe, marshmallow, msgpack, nh3, numpy, pandas, parameterized, parsedatetime, pgsanity, polyline, prison, progress, pyarrow, sqlalchemy_bigquery, pyhive, pyparsing, pytest, pytest_mock, pytz, redis, requests, selenium, setuptools, shillelagh, simplejson, slack, sqlalchemy, sqlalchemy_utils, typing_extensions, urllib3, werkzeug, wtforms, wtforms_json, yaml"
known_third_party = "alembic, apispec, backoff, celery, click, colorama, cron_descriptor, croniter, cryptography, dateutil, deprecation, flask, flask_appbuilder, flask_babel, flask_caching, flask_compress, flask_jwt_extended, flask_login, flask_migrate, flask_sqlalchemy, flask_talisman, flask_testing, flask_wtf, freezegun, geohash, geopy, holidays, humanize, isodate, jinja2, jwt, markdown, markupsafe, marshmallow, marshmallow-union, msgpack, nh3, numpy, pandas, parameterized, parsedatetime, pgsanity, polyline, prison, progress, pyarrow, sqlalchemy_bigquery, pyhive, pyparsing, pytest, pytest_mock, pytz, redis, requests, selenium, setuptools, shillelagh, simplejson, slack, sqlalchemy, sqlalchemy_utils, typing_extensions, urllib3, werkzeug, wtforms, wtforms_json, yaml"
multi_line_output = 3
order_by_type = false

Expand Down
4 changes: 4 additions & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ greenlet==3.1.1
# via
# apache-superset (pyproject.toml)
# shillelagh
# sqlalchemy
gunicorn==23.0.0
# via apache-superset (pyproject.toml)
h11==0.16.0
Expand Down Expand Up @@ -219,10 +220,13 @@ marshmallow==3.26.1
# apache-superset (pyproject.toml)
# flask-appbuilder
# marshmallow-sqlalchemy
# marshmallow-union
marshmallow-sqlalchemy==1.4.0
# via
# -r requirements/base.in
# flask-appbuilder
marshmallow-union==0.1.15
# via apache-superset (pyproject.toml)
mdurl==0.1.2
# via markdown-it-py
msgpack==1.0.8
Expand Down
6 changes: 6 additions & 0 deletions requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ greenlet==3.1.1
# apache-superset
# gevent
# shillelagh
# sqlalchemy
grpcio==1.71.0
# via
# apache-superset
Expand Down Expand Up @@ -444,10 +445,15 @@ marshmallow==3.26.1
# apache-superset
# flask-appbuilder
# marshmallow-sqlalchemy
# marshmallow-union
marshmallow-sqlalchemy==1.4.0
# via
# -c requirements/base-constraint.txt
# flask-appbuilder
marshmallow-union==0.1.15
# via
# -c requirements/base.txt
# apache-superset
matplotlib==3.9.0
# via prophet
mccabe==0.7.0
Expand Down
6 changes: 4 additions & 2 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from flask_babel import gettext as _
from marshmallow import EXCLUDE, fields, post_load, Schema, validate
from marshmallow.validate import Length, Range
from marshmallow_union import Union

from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType
from superset.db_engine_specs.base import builtin_time_grains
Expand Down Expand Up @@ -1130,8 +1131,9 @@ class AnnotationLayerSchema(Schema):

class ChartDataDatasourceSchema(Schema):
description = "Chart datasource"
id = fields.Integer(
metadata={"description": "Datasource id"},
id = Union(
[fields.Integer(), fields.UUID()],
metadata={"description": "Datasource id or uuid"},
required=True,
)
type = fields.String(
Expand Down
2 changes: 1 addition & 1 deletion superset/common/query_context_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def create( # pylint: disable=too-many-arguments
def _convert_to_model(self, datasource: DatasourceDict) -> BaseDatasource:
return DatasourceDAO.get_datasource(
datasource_type=DatasourceType(datasource["type"]),
datasource_id=int(datasource["id"]),
database_id_or_uuid=datasource["id"],
)

def _get_slice(self, slice_id: Any) -> Slice | None:
Expand Down
2 changes: 1 addition & 1 deletion superset/common/query_object_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def create( # pylint: disable=too-many-arguments
def _convert_to_model(self, datasource: DatasourceDict) -> BaseDatasource:
return self._datasource_dao.get_datasource(
datasource_type=DatasourceType(datasource["type"]),
datasource_id=int(datasource["id"]),
database_id_or_uuid=datasource["id"],
)

def _process_extras(
Expand Down
29 changes: 29 additions & 0 deletions superset/daos/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,41 @@ class BaseDAO(Generic[T]):
Child classes can register base filtering to be applied to all filter methods
"""
id_column_name = "id"
uuid_column_name = "uuid"

def __init_subclass__(cls) -> None:
cls.model_cls = get_args(
cls.__orig_bases__[0] # type: ignore # pylint: disable=no-member
)[0]

@classmethod
def find_by_id_or_uuid(
cls,
model_id_or_uuid: str,
skip_base_filter: bool = False,
) -> T | None:
"""
Find a model by id or uuid, if defined applies `base_filter`
"""
query = db.session.query(cls.model_cls)
if cls.base_filter and not skip_base_filter:
data_model = SQLAInterface(cls.model_cls, db.session)
query = cls.base_filter( # pylint: disable=not-callable
cls.id_column_name, data_model
).apply(query, None)
id_column = getattr(cls.model_cls, cls.id_column_name)
uuid_column = getattr(cls.model_cls, cls.uuid_column_name)

if model_id_or_uuid.isdigit():
filter = id_column == int(model_id_or_uuid)
else:
filter = uuid_column == model_id_or_uuid
try:
return query.filter(filter).one_or_none()
except StatementError:
# can happen if neither uuid nor int is passed
return None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mmmh, shouldn't we either handle int or let it raise here? Not sure about what the implications are or might be...

Copy link
Member

@mistercrunch mistercrunch Aug 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess mypy should complain if using an int anywhere in the codebase. Since this is a base function going to get reused it could be good to just support int here. Might even be a tiny bit cheaper to validate the type than isdigit() is, but would run both (pseudo: type(v) == int or isdigit())

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you're right, the comment is a little bit irrelevant. Fixed.

Not sure about int optimization thought. Don't we plan to deprecate id eventually?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right. Doesn't matter and mypy should catch error (would be good to confirm). While we're in here and setting the pattern, should we support short-shas in here too? Would make for nicer looking urls, pretty standard nowadays (git/github/...), not sure if it's worth the complexity, collision risks, ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this useful? We don’t currently expose these URLs in the UI. Maybe in the next iteration we should reintroduce the slug + short id URL style (similar to Medium), for example:

  • dashboard/unicode-test-91c65d59d37/
  • chart/unicode-cloud-b86ba33fc5cd/

That said, this looks like a complex and potentially breaking change. It could be useful for dashboards, especially as a replacement for custom slugs. But charts and datasets don’t follow the same pattern — they just redirect you to the /explore endpoint.

I’d prefer to discuss this in a different setting, since it will require some brainstorming.


@classmethod
def find_by_id(
cls,
Expand Down
31 changes: 24 additions & 7 deletions superset/daos/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@
# under the License.

import logging
import uuid
from typing import Union

from superset import db
from superset.connectors.sqla.models import SqlaTable
from superset.daos.base import BaseDAO
from superset.daos.exceptions import DatasourceNotFound, DatasourceTypeNotSupportedError
from superset.daos.exceptions import (
DatasourceNotFound,
DatasourceTypeNotSupportedError,
DatasourceValueIsIncorrect,
)
from superset.models.sql_lab import Query, SavedQuery
from superset.utils.core import DatasourceType

Expand All @@ -41,22 +46,34 @@ class DatasourceDAO(BaseDAO[Datasource]):
def get_datasource(
cls,
datasource_type: Union[DatasourceType, str],
datasource_id: int,
database_id_or_uuid: int | str,
) -> Datasource:
if datasource_type not in cls.sources:
raise DatasourceTypeNotSupportedError()

model = cls.sources[datasource_type]

if str(database_id_or_uuid).isdigit():
filter = model.id == int(database_id_or_uuid)
else:
try:
uuid.UUID(str(database_id_or_uuid)) # uuid validation
filter = model.uuid == database_id_or_uuid
except ValueError as err:
logger.warning(
f"database_id_or_uuid {database_id_or_uuid} isn't valid uuid"
)
raise DatasourceValueIsIncorrect() from err

datasource = (
db.session.query(cls.sources[datasource_type])
.filter_by(id=datasource_id)
.one_or_none()
db.session.query(cls.sources[datasource_type]).filter(filter).one_or_none()
)

if not datasource:
logger.warning(
"Datasource not found datasource_type: %s, datasource_id: %s",
"Datasource not found datasource_type: %s, database_id_or_uuid: %s",
datasource_type,
datasource_id,
database_id_or_uuid,
)
raise DatasourceNotFound()

Expand Down
5 changes: 5 additions & 0 deletions superset/daos/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,8 @@ class DatasourceTypeNotSupportedError(DAOException):
class DatasourceNotFound(DAOException):
status = 404
message = "Datasource does not exist"


class DatasourceValueIsIncorrect(DAOException):
status = 422
message = "Datasource value is neither id or uuid"
38 changes: 17 additions & 21 deletions superset/datasets/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ def refresh(self, pk: int) -> Response:
)
return self.response_422(message=str(ex))

@expose("/<pk>/related_objects", methods=("GET",))
@expose("/<id_or_uuid>/related_objects", methods=("GET",))
@protect()
@safe
@statsd_metrics
Expand All @@ -711,16 +711,16 @@ def refresh(self, pk: int) -> Response:
f".related_objects",
log_to_statsd=False,
)
def related_objects(self, pk: int) -> Response:
def related_objects(self, id_or_uuid: str) -> Response:
"""Get charts and dashboards count associated to a dataset.
---
get:
summary: Get charts and dashboards count associated to a dataset
parameters:
- in: path
name: pk
name: id_or_uuid
schema:
type: integer
type: string
responses:
200:
200:
Expand All @@ -736,10 +736,10 @@ def related_objects(self, pk: int) -> Response:
500:
$ref: '#/components/responses/500'
"""
dataset = DatasetDAO.find_by_id(pk)
dataset = DatasetDAO.find_by_id_or_uuid(id_or_uuid)
if not dataset:
return self.response_404()
data = DatasetDAO.get_related_objects(pk)
data = DatasetDAO.get_related_objects(dataset.id)
charts = [
{
"id": chart.id,
Expand Down Expand Up @@ -1081,7 +1081,7 @@ def warm_up_cache(self) -> Response:
except CommandException as ex:
return self.response(ex.status, message=ex.message)

@expose("/<int:pk>", methods=("GET",))
@expose("/<id_or_uuid>", methods=("GET",))
@protect()
@safe
@rison(get_item_schema)
Expand All @@ -1091,7 +1091,7 @@ def warm_up_cache(self) -> Response:
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.get",
log_to_statsd=False,
)
def get(self, pk: int, **kwargs: Any) -> Response:
def get(self, id_or_uuid: str, **kwargs: Any) -> Response:
"""Get a dataset.
---
get:
Expand All @@ -1100,9 +1100,9 @@ def get(self, pk: int, **kwargs: Any) -> Response:
parameters:
- in: path
schema:
type: integer
description: The dataset ID
name: pk
type: string
description: Either the id of the dataset, or its uuid
name: id_or_uuid
- in: query
name: q
content:
Expand Down Expand Up @@ -1138,13 +1138,8 @@ def get(self, pk: int, **kwargs: Any) -> Response:
500:
$ref: '#/components/responses/500'
"""
item: SqlaTable | None = self.datamodel.get(
pk,
self._base_filters,
self.show_select_columns,
self.show_outer_default_load,
)
if not item:
table = DatasetDAO.find_by_id_or_uuid(id_or_uuid)
if not table:
return self.response_404()

response: dict[str, Any] = {}
Expand All @@ -1162,8 +1157,8 @@ def get(self, pk: int, **kwargs: Any) -> Response:
else:
show_model_schema = self.show_model_schema

response["id"] = pk
response[API_RESULT_RES_KEY] = show_model_schema.dump(item, many=False)
response["id"] = table.id
response[API_RESULT_RES_KEY] = show_model_schema.dump(table, many=False)

# remove folders from resposne if `DATASET_FOLDERS` is disabled, so that it's
# possible to inspect if the feature is supported or not
Expand All @@ -1175,12 +1170,13 @@ def get(self, pk: int, **kwargs: Any) -> Response:

if parse_boolean_string(request.args.get("include_rendered_sql")):
try:
processor = get_template_processor(database=item.database)
processor = get_template_processor(database=table.database)
response["result"] = self.render_dataset_fields(
response["result"], processor
)
except SupersetTemplateException as ex:
return self.response_400(message=str(ex))

return self.response(200, **response)

@expose("/<int:pk>/drill_info/", methods=("GET",))
Expand Down
2 changes: 1 addition & 1 deletion superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ class HeaderDataType(TypedDict):

class DatasourceDict(TypedDict):
type: str # todo(hugh): update this to be DatasourceType
id: int
id: int | str


class AdhocFilterClause(TypedDict, total=False):
Expand Down
2 changes: 1 addition & 1 deletion superset/views/datasource/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_samples( # pylint: disable=too-many-arguments
) -> dict[str, Any]:
datasource = DatasourceDAO.get_datasource(
datasource_type=datasource_type,
datasource_id=datasource_id,
database_id_or_uuid=str(datasource_id),
)

limit_clause = get_limit_clause(page, per_page)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/query_context_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def test_query_cache_key_changes_when_datasource_is_updated(self):
# make temporary change and revert it to refresh the changed_on property
datasource = DatasourceDAO.get_datasource(
datasource_type=DatasourceType(payload["datasource"]["type"]),
datasource_id=payload["datasource"]["id"],
database_id_or_uuid=payload["datasource"]["id"],
)
description_original = datasource.description
datasource.description = "temporary description"
Expand Down
Loading
Loading