diff --git a/superset/examples/helpers.py b/superset/examples/helpers.py index 4cc9a47b2700..908142ec778e 100644 --- a/superset/examples/helpers.py +++ b/superset/examples/helpers.py @@ -14,6 +14,34 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +"""Helpers for loading Superset example datasets. + +All Superset example data files (CSV, JSON, etc.) are fetched via the +jsDelivr CDN instead of raw.githubusercontent.com to avoid GitHub API +rate limits (60 anonymous requests/hour/IP). + +jsDelivr is a multi‑CDN front for public GitHub repos and supports +arbitrary paths including nested folders. It doesn’t use the GitHub REST API +and advertises unlimited bandwidth for open-source use. + +Example URL:: + + https://cdn.jsdelivr.net/gh/apache-superset/examples-data@master/datasets/examples/slack/messages.csv + +Environment knobs +----------------- +``SUPERSET_EXAMPLES_DATA_REF`` (default: ``master``) + Tag / branch / SHA to pin so builds remain reproducible. + +``SUPERSET_EXAMPLES_BASE_URL`` + Override the base completely if you want to host the files elsewhere + (internal mirror, S3 bucket, ASF downloads, …). **Include any query + string required by your hosting (e.g. ``?raw=true`` if you point back + to a GitHub *blob* URL).** +""" + +from __future__ import annotations + import os from typing import Any @@ -22,27 +50,41 @@ from superset.models.slice import Slice from superset.utils import json -BASE_URL = "https://github.com/apache-superset/examples-data/blob/master/" +# --------------------------------------------------------------------------- +# Public sample‑data mirror configuration +# --------------------------------------------------------------------------- +BASE_COMMIT: str = os.getenv("SUPERSET_EXAMPLES_DATA_REF", "master") +BASE_URL: str = os.getenv( + "SUPERSET_EXAMPLES_BASE_URL", + f"https://cdn.jsdelivr.net/gh/apache-superset/examples-data@{BASE_COMMIT}/", +) -misc_dash_slices: set[str] = set() # slices assembled in a 'Misc Chart' dashboard +# Slices assembled into a 'Misc Chart' dashboard +misc_dash_slices: set[str] = set() + +# --------------------------------------------------------------------------- +# Utility functions +# --------------------------------------------------------------------------- def get_table_connector_registry() -> Any: + """Return the SqlaTable registry so we can mock it in unit tests.""" return SqlaTable def get_examples_folder() -> str: + """Return local path to the examples folder (when vendored).""" return os.path.join(app.config["BASE_DIR"], "examples") def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]: - """Update slice ids in position_json and return the slices found.""" + """Update slice ids in ``position_json`` and return the slices found.""" slice_components = [ component for component in pos.values() if isinstance(component, dict) and component.get("type") == "CHART" ] - slices = {} + slices: dict[str, Slice] = {} for name in {component["meta"]["sliceName"] for component in slice_components}: slc = db.session.query(Slice).filter_by(slice_name=name).first() if slc: @@ -56,17 +98,24 @@ def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]: def merge_slice(slc: Slice) -> None: - o = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first() - if o: - db.session.delete(o) + """Upsert a Slice by name.""" + existing = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first() + if existing: + db.session.delete(existing) db.session.add(slc) def get_slice_json(defaults: dict[Any, Any], **kwargs: Any) -> str: + """Return JSON string for a chart definition, merging extra kwargs.""" defaults_copy = defaults.copy() defaults_copy.update(kwargs) return json.dumps(defaults_copy, indent=4, sort_keys=True) def get_example_url(filepath: str) -> str: - return f"{BASE_URL}{filepath}?raw=true" + """Return an absolute URL to *filepath* under the examples‑data repo. + + All calls are routed through jsDelivr unless overridden. Supports nested + paths like ``datasets/examples/slack/messages.csv``. + """ + return f"{BASE_URL}{filepath}" diff --git a/tests/unit_tests/migrations/viz/time_related_fields_test.py b/tests/unit_tests/migrations/viz/time_related_fields_test.py index 128342737bd7..c5a94a6adca5 100644 --- a/tests/unit_tests/migrations/viz/time_related_fields_test.py +++ b/tests/unit_tests/migrations/viz/time_related_fields_test.py @@ -21,7 +21,7 @@ SOURCE_FORM_DATA: dict[str, Any] = { "granularity_sqla": "ds", - "time_range": "100 years ago : today", + "time_range": "1925-04-24 : 2025-04-24", "viz_type": "pivot_table", } @@ -29,7 +29,7 @@ "form_data_bak": SOURCE_FORM_DATA, "granularity_sqla": "ds", "rowOrder": "value_z_to_a", - "time_range": "100 years ago : today", + "time_range": "1925-04-24 : 2025-04-24", "viz_type": "pivot_table_v2", } @@ -40,7 +40,7 @@ def test_migration() -> None: target["adhoc_filters"] = [ { "clause": "WHERE", - "comparator": "100 years ago : today", + "comparator": "1925-04-24 : 2025-04-24", "expressionType": "SIMPLE", "operator": "TEMPORAL_RANGE", "subject": "ds",