diff --git a/superset.yaml b/superset.yaml index 042facf0335..dad34671de6 100644 --- a/superset.yaml +++ b/superset.yaml @@ -1,7 +1,7 @@ package: name: superset version: "4.1.2" - epoch: 0 + epoch: 1 description: Data Visualization and Data Exploration Platform copyright: - license: Apache-2.0 @@ -49,6 +49,10 @@ pipeline: tag: ${{package.version}} expected-commit: 72538bda06c0deef2140e0df65bf0a13bcddb16c + - uses: patch + with: + patches: examples.patch + - name: Build frontend working-directory: superset-frontend runs: | diff --git a/superset/examples.patch b/superset/examples.patch new file mode 100644 index 00000000000..0bd3fe65b2e --- /dev/null +++ b/superset/examples.patch @@ -0,0 +1,145 @@ +diff --git a/superset/examples/helpers.py b/superset/examples/helpers.py +index 4cc9a47b27..908142ec77 100644 +--- a/superset/examples/helpers.py ++++ b/superset/examples/helpers.py +@@ -14,6 +14,34 @@ + # KIND, either express or implied. See the License for the + # specific language governing permissions and limitations + # under the License. ++"""Helpers for loading Superset example datasets. ++ ++All Superset example data files (CSV, JSON, etc.) are fetched via the ++jsDelivr CDN instead of raw.githubusercontent.com to avoid GitHub API ++rate limits (60 anonymous requests/hour/IP). ++ ++jsDelivr is a multi‑CDN front for public GitHub repos and supports ++arbitrary paths including nested folders. It doesn’t use the GitHub REST API ++and advertises unlimited bandwidth for open-source use. ++ ++Example URL:: ++ ++ https://cdn.jsdelivr.net/gh/apache-superset/examples-data@master/datasets/examples/slack/messages.csv ++ ++Environment knobs ++----------------- ++``SUPERSET_EXAMPLES_DATA_REF`` (default: ``master``) ++ Tag / branch / SHA to pin so builds remain reproducible. ++ ++``SUPERSET_EXAMPLES_BASE_URL`` ++ Override the base completely if you want to host the files elsewhere ++ (internal mirror, S3 bucket, ASF downloads, …). **Include any query ++ string required by your hosting (e.g. ``?raw=true`` if you point back ++ to a GitHub *blob* URL).** ++""" ++ ++from __future__ import annotations ++ + import os + from typing import Any + +@@ -22,27 +50,41 @@ from superset.connectors.sqla.models import SqlaTable + from superset.models.slice import Slice + from superset.utils import json + +-BASE_URL = "https://github.com/apache-superset/examples-data/blob/master/" ++# --------------------------------------------------------------------------- ++# Public sample‑data mirror configuration ++# --------------------------------------------------------------------------- ++BASE_COMMIT: str = os.getenv("SUPERSET_EXAMPLES_DATA_REF", "master") ++BASE_URL: str = os.getenv( ++ "SUPERSET_EXAMPLES_BASE_URL", ++ f"https://cdn.jsdelivr.net/gh/apache-superset/examples-data@{BASE_COMMIT}/", ++) + +-misc_dash_slices: set[str] = set() # slices assembled in a 'Misc Chart' dashboard ++# Slices assembled into a 'Misc Chart' dashboard ++misc_dash_slices: set[str] = set() ++ ++# --------------------------------------------------------------------------- ++# Utility functions ++# --------------------------------------------------------------------------- + + + def get_table_connector_registry() -> Any: ++ """Return the SqlaTable registry so we can mock it in unit tests.""" + return SqlaTable + + + def get_examples_folder() -> str: ++ """Return local path to the examples folder (when vendored).""" + return os.path.join(app.config["BASE_DIR"], "examples") + + + def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]: +- """Update slice ids in position_json and return the slices found.""" ++ """Update slice ids in ``position_json`` and return the slices found.""" + slice_components = [ + component + for component in pos.values() + if isinstance(component, dict) and component.get("type") == "CHART" + ] +- slices = {} ++ slices: dict[str, Slice] = {} + for name in {component["meta"]["sliceName"] for component in slice_components}: + slc = db.session.query(Slice).filter_by(slice_name=name).first() + if slc: +@@ -56,17 +98,24 @@ def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]: + + + def merge_slice(slc: Slice) -> None: +- o = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first() +- if o: +- db.session.delete(o) ++ """Upsert a Slice by name.""" ++ existing = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first() ++ if existing: ++ db.session.delete(existing) + db.session.add(slc) + + + def get_slice_json(defaults: dict[Any, Any], **kwargs: Any) -> str: ++ """Return JSON string for a chart definition, merging extra kwargs.""" + defaults_copy = defaults.copy() + defaults_copy.update(kwargs) + return json.dumps(defaults_copy, indent=4, sort_keys=True) + + + def get_example_url(filepath: str) -> str: +- return f"{BASE_URL}{filepath}?raw=true" ++ """Return an absolute URL to *filepath* under the examples‑data repo. ++ ++ All calls are routed through jsDelivr unless overridden. Supports nested ++ paths like ``datasets/examples/slack/messages.csv``. ++ """ ++ return f"{BASE_URL}{filepath}" +diff --git a/tests/unit_tests/migrations/viz/time_related_fields_test.py b/tests/unit_tests/migrations/viz/time_related_fields_test.py +index 4494aff1cf..1ea8a151c0 100644 +--- a/tests/unit_tests/migrations/viz/time_related_fields_test.py ++++ b/tests/unit_tests/migrations/viz/time_related_fields_test.py +@@ -21,7 +21,7 @@ from tests.unit_tests.migrations.viz.utils import migrate_and_assert + + SOURCE_FORM_DATA: dict[str, Any] = { + "granularity_sqla": "ds", +- "time_range": "100 years ago : now", ++ "time_range": "1925-04-24 : 2025-04-24", + "viz_type": "pivot_table", + } + +@@ -29,7 +29,7 @@ TARGET_FORM_DATA: dict[str, Any] = { + "form_data_bak": SOURCE_FORM_DATA, + "granularity_sqla": "ds", + "rowOrder": "value_z_to_a", +- "time_range": "100 years ago : now", ++ "time_range": "1925-04-24 : 2025-04-24", + "viz_type": "pivot_table_v2", + } + +@@ -40,7 +40,7 @@ def test_migration() -> None: + target["adhoc_filters"] = [ + { + "clause": "WHERE", +- "comparator": "100 years ago : now", ++ "comparator": "1925-04-24 : 2025-04-24", + "expressionType": "SIMPLE", + "operator": "TEMPORAL_RANGE", + "subject": "ds",