Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ include NOTICE
include LICENSE.txt
graft licenses/
include README.md
recursive-include superset/data *
recursive-include superset/examples *
recursive-include superset/migrations *
recursive-include superset/static *
recursive-exclude superset/static/assets/docs *
Expand Down
57 changes: 34 additions & 23 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pathlib2 import Path
import yaml

from superset import app, appbuilder, data, db, security_manager
from superset import app, appbuilder, db, examples, security_manager
from superset.utils import core as utils, dashboard_import_export, dict_import_export

config = app.config
Expand All @@ -46,6 +46,7 @@ def make_shell_context():
def init():
"""Inits the Superset application"""
utils.get_or_create_main_db()
utils.get_example_database()
appbuilder.add_permissions(update_perms=True)
security_manager.sync_role_definitions()

Expand All @@ -67,66 +68,76 @@ def version(verbose):
print(Style.RESET_ALL)


def load_examples_run(load_test_data):
print("Loading examples into {}".format(db))
def load_examples_run(load_test_data, only_metadata=False, force=False):
if only_metadata:
print("Loading examples metadata")
else:
examples_db = utils.get_example_database()
print(f"Loading examples metadata and related data into {examples_db}")

data.load_css_templates()
examples.load_css_templates()

print("Loading energy related dataset")
data.load_energy()
examples.load_energy(only_metadata, force)

print("Loading [World Bank's Health Nutrition and Population Stats]")
data.load_world_bank_health_n_pop()
examples.load_world_bank_health_n_pop(only_metadata, force)

print("Loading [Birth names]")
data.load_birth_names()
examples.load_birth_names(only_metadata, force)

print("Loading [Unicode test data]")
data.load_unicode_test_data()
examples.load_unicode_test_data(only_metadata, force)

if not load_test_data:
print("Loading [Random time series data]")
data.load_random_time_series_data()
examples.load_random_time_series_data(only_metadata, force)

print("Loading [Random long/lat data]")
data.load_long_lat_data()
examples.load_long_lat_data(only_metadata, force)

print("Loading [Country Map data]")
data.load_country_map_data()
examples.load_country_map_data(only_metadata, force)

print("Loading [Multiformat time series]")
data.load_multiformat_time_series()
examples.load_multiformat_time_series(only_metadata, force)

print("Loading [Paris GeoJson]")
data.load_paris_iris_geojson()
examples.load_paris_iris_geojson(only_metadata, force)

print("Loading [San Francisco population polygons]")
data.load_sf_population_polygons()
examples.load_sf_population_polygons(only_metadata, force)

print("Loading [Flights data]")
data.load_flights()
examples.load_flights(only_metadata, force)

print("Loading [BART lines]")
data.load_bart_lines()
examples.load_bart_lines(only_metadata, force)

print("Loading [Multi Line]")
data.load_multi_line()
examples.load_multi_line(only_metadata)

print("Loading [Misc Charts] dashboard")
data.load_misc_dashboard()
examples.load_misc_dashboard()

print("Loading DECK.gl demo")
data.load_deck_dash()
examples.load_deck_dash()

print("Loading [Tabbed dashboard]")
data.load_tabbed_dashboard()
examples.load_tabbed_dashboard(only_metadata)


@app.cli.command()
@click.option("--load-test-data", "-t", is_flag=True, help="Load additional test data")
def load_examples(load_test_data):
@click.option(
"--only-metadata", "-m", is_flag=True, help="Only load metadata, skip actual data"
)
@click.option(
"--force", "-f", is_flag=True, help="Force load data even if table already exists"
)
def load_examples(load_test_data, only_metadata=False, force=False):
"""Loads a set of Slices and Dashboards and a supporting dataset """
load_examples_run(load_test_data)
load_examples_run(load_test_data, only_metadata, force)


@app.cli.command()
Expand Down Expand Up @@ -405,7 +416,7 @@ def load_test_users_run():
for perm in security_manager.find_role("Gamma").permissions:
security_manager.add_permission_role(gamma_sqllab_role, perm)
utils.get_or_create_main_db()
db_perm = utils.get_main_database(security_manager.get_session).perm
db_perm = utils.get_main_database().perm
security_manager.add_permission_view_menu("database_access", db_perm)
db_pvm = security_manager.find_permission_view_menu(
view_menu_name=db_perm, permission_name="database_access"
Expand Down
4 changes: 4 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,10 @@ class CeleryConfig(object):
"force_https_permanent": False,
}

# URI to database storing the example data, points to
# SQLALCHEMY_DATABASE_URI by default if set to `None`
SQLALCHEMY_EXAMPLES_URI = None

try:
if CONFIG_PATH_ENV_VAR in os.environ:
# Explicitly import config module that is not in pythonpath; useful
Expand Down
15 changes: 3 additions & 12 deletions superset/connectors/connector_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,9 @@ def get_datasource_by_name(
cls, session, datasource_type, datasource_name, schema, database_name
):
datasource_class = ConnectorRegistry.sources[datasource_type]
datasources = session.query(datasource_class).all()

# Filter datasoures that don't have database.
db_ds = [
d
for d in datasources
if d.database
and d.database.name == database_name
and d.name == datasource_name
and schema == schema
]
return db_ds[0]
return datasource_class.get_datasource_by_name(
session, datasource_name, schema, database_name
)

@classmethod
def query_datasources_by_permissions(cls, session, database, permissions):
Expand Down
10 changes: 10 additions & 0 deletions superset/connectors/druid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,16 @@ def time_offset(granularity):
return 6 * 24 * 3600 * 1000 # 6 days
return 0

@classmethod
def get_datasource_by_name(cls, session, datasource_name, schema, database_name):
query = (
session.query(cls)
.join(DruidCluster)
.filter(cls.datasource_name == datasource_name)
.filter(DruidCluster.cluster_name == database_name)
)
return query.first()

# uses https://en.wikipedia.org/wiki/ISO_8601
# http://druid.io/docs/0.8.0/querying/granularities.html
# TODO: pass origin from the UI
Expand Down
15 changes: 15 additions & 0 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,21 @@ def datasource_name(self):
def database_name(self):
return self.database.name

@classmethod
def get_datasource_by_name(cls, session, datasource_name, schema, database_name):
schema = schema or None
query = (
session.query(cls)
.join(Database)
.filter(cls.table_name == datasource_name)
.filter(Database.database_name == database_name)
)
# Handling schema being '' or None, which is easier to handle
# in python than in the SQLA query in a multi-dialect way
for tbl in query.all():
if schema == (tbl.schema or None):
return tbl

@property
def link(self):
name = escape(self.name)
Expand Down
File renamed without changes.
49 changes: 27 additions & 22 deletions superset/data/bart_lines.py → superset/examples/bart_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,37 +21,42 @@
from sqlalchemy import String, Text

from superset import db
from superset.utils.core import get_or_create_main_db
from .helpers import TBL, get_example_data
from superset.utils.core import get_example_database
from .helpers import get_example_data, TBL


def load_bart_lines():
def load_bart_lines(only_metadata=False, force=False):
tbl_name = "bart_lines"
content = get_example_data("bart-lines.json.gz")
df = pd.read_json(content, encoding="latin-1")
df["path_json"] = df.path.map(json.dumps)
df["polyline"] = df.path.map(polyline.encode)
del df["path"]
database = get_example_database()
table_exists = database.has_table_by_name(tbl_name)

if not only_metadata and (not table_exists or force):
content = get_example_data("bart-lines.json.gz")
df = pd.read_json(content, encoding="latin-1")
df["path_json"] = df.path.map(json.dumps)
df["polyline"] = df.path.map(polyline.encode)
del df["path"]

df.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"color": String(255),
"name": String(255),
"polyline": Text,
"path_json": Text,
},
index=False,
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, one more thing; when I added csv import functionality for BigQuery, I refactored db_engine_specs so that one can call db_engine_spec.df_to_sql(df, *kwargs) in place of df.to_sql(*kwargs) for engines that don't support df.to_sql(). So to make this work universally here, one would write

        database.db_engine_spec.df_to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "color": String(255),
                "name": String(255),
                "polyline": Text,
                "path_json": Text,
            },
            index=False,
        )

This doesn't necessarily have to be addressed in this PR; I can do that later, too, as I have a good test rig for that.


df.to_sql(
tbl_name,
db.engine,
if_exists="replace",
chunksize=500,
dtype={
"color": String(255),
"name": String(255),
"polyline": Text,
"path_json": Text,
},
index=False,
)
print("Creating table {} reference".format(tbl_name))
tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not tbl:
tbl = TBL(table_name=tbl_name)
tbl.description = "BART lines"
tbl.database = get_or_create_main_db()
tbl.database = database
db.session.merge(tbl)
db.session.commit()
tbl.fetch_metadata()
60 changes: 33 additions & 27 deletions superset/data/birth_names.py → superset/examples/birth_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from superset import db, security_manager
from superset.connectors.sqla.models import SqlMetric, TableColumn
from superset.utils.core import get_or_create_main_db
from superset.utils.core import get_example_database
from .helpers import (
config,
Dash,
Expand All @@ -36,33 +36,39 @@
)


def load_birth_names():
def load_birth_names(only_metadata=False, force=False):
"""Loading birth name dataset from a zip file in the repo"""
data = get_example_data("birth_names.json.gz")
pdf = pd.read_json(data)
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf.to_sql(
"birth_names",
db.engine,
if_exists="replace",
chunksize=500,
dtype={
"ds": DateTime,
"gender": String(16),
"state": String(10),
"name": String(255),
},
index=False,
)
print("Done loading table!")
print("-" * 80)
# pylint: disable=too-many-locals
tbl_name = "birth_names"
database = get_example_database()
table_exists = database.has_table_by_name(tbl_name)

if not only_metadata and (not table_exists or force):
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"ds": DateTime,
"gender": String(16),
"state": String(10),
"name": String(255),
},
index=False,
)
print("Done loading table!")
print("-" * 80)

print("Creating table [birth_names] reference")
obj = db.session.query(TBL).filter_by(table_name="birth_names").first()
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not obj:
obj = TBL(table_name="birth_names")
print(f"Creating table [{tbl_name}] reference")
obj = TBL(table_name=tbl_name)
db.session.add(obj)
obj.main_dttm_col = "ds"
obj.database = get_or_create_main_db()
obj.database = database
obj.filter_select_enabled = True

if not any(col.column_name == "num_california" for col in obj.columns):
Expand All @@ -79,7 +85,6 @@ def load_birth_names():
col = str(column("num").compile(db.engine))
obj.metrics.append(SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

db.session.merge(obj)
db.session.commit()
obj.fetch_metadata()
tbl = obj
Expand Down Expand Up @@ -384,10 +389,12 @@ def load_birth_names():
merge_slice(slc)

print("Creating a dashboard")
dash = db.session.query(Dash).filter_by(dashboard_title="Births").first()
dash = db.session.query(Dash).filter_by(slug="births").first()

if not dash:
dash = Dash()
db.session.add(dash)
dash.published = True
js = textwrap.dedent(
# pylint: disable=line-too-long
"""\
Expand Down Expand Up @@ -649,5 +656,4 @@ def load_birth_names():
dash.dashboard_title = "Births"
dash.position_json = json.dumps(pos, indent=4)
dash.slug = "births"
db.session.merge(dash)
db.session.commit()
File renamed without changes.
File renamed without changes.
Loading