From 963d1eff70a8227cb0d6835e7b95d71193d48432 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 3 May 2019 16:32:26 -0700 Subject: [PATCH 001/107] Adds 'superset export_dashboards --dashboard-id/-i --dashboard-title/-t' options to cli (cherry picked from commit 185aaf00c079af85830a620620f34f82fc42f7d5) --- superset/cli.py | 13 +++++++++++-- superset/utils/dashboard_import_export.py | 10 +++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index e631e0b2efaf..8023b2138be7 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -187,9 +187,18 @@ def import_dashboards(path, recursive): @click.option( '--print_stdout', '-p', is_flag=True, default=False, help='Print JSON to stdout') -def export_dashboards(print_stdout, dashboard_file): +@click.option( + '--dashboard-id', '-i', default=None, type=int, + help='Specify dashboard id to export') +@click.option( + '--dashboard-title', '-t', default=None, + help='Specify dashboard title to export') +def export_dashboards(print_stdout, dashboard_file, dashboard_id, dashboard_title): """Export dashboards to JSON""" - data = dashboard_import_export.export_dashboards(db.session) + data = dashboard_import_export.export_dashboards( + db.session, + dashboard_id=dashboard_id, + dashboard_title=dashboard_title) if print_stdout or not dashboard_file: print(data) if dashboard_file: diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 5f6782d9387e..5c637dd9f284 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -38,12 +38,16 @@ def import_dashboards(session, data_stream, import_time=None): session.commit() -def export_dashboards(session): +def export_dashboards(session, dashboard_id=None, dashboard_title=None): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') dashboards = session.query(Dashboard) dashboard_ids = [] for dashboard in dashboards: - dashboard_ids.append(dashboard.id) - data = Dashboard.export_dashboards(dashboard_ids) + if dashboard_id or dashboard_title: + if dashboard.id == dashboard_id or dashboard.dashboard_title == dashboard_title: + dashboard_ids.append(dashboard.id) + else: + dashboard_ids.append(dashboard.id) + data = Dashboard.export_dashboards(dashboard_ids) if dashboard_ids else {} return data From 590b6d10802908ea3787b70a6212efb0339d8c48 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 6 May 2019 18:13:41 -0700 Subject: [PATCH 002/107] Shortened line to pass flake8 test (cherry picked from commit 9a48598b0b459bedfa9a1773401f3d159c43f3d1) --- superset/utils/dashboard_import_export.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 5c637dd9f284..92acb40dfb60 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -45,7 +45,8 @@ def export_dashboards(session, dashboard_id=None, dashboard_title=None): dashboard_ids = [] for dashboard in dashboards: if dashboard_id or dashboard_title: - if dashboard.id == dashboard_id or dashboard.dashboard_title == dashboard_title: + if dashboard.id == dashboard_id or \ + dashboard.dashboard_title == dashboard_title: dashboard_ids.append(dashboard.id) else: dashboard_ids.append(dashboard.id) From 915e6dcf815cbc1cee995edf0c1da17d9e0890ab Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 16:25:03 -0700 Subject: [PATCH 003/107] Jazzed up superset export_dashboards -i/-t, now supports multiple dashboards and warns if none come up (cherry picked from commit d4f2fc29e6c1b8ae89eb53e4c8ee1b5327f1237e) --- superset/cli.py | 21 +++++++++------ superset/exceptions.py | 3 +++ superset/utils/dashboard_import_export.py | 32 ++++++++++++++--------- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 8023b2138be7..92cfc2758790 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -19,7 +19,7 @@ from datetime import datetime import logging from subprocess import Popen -from sys import stdout +from sys import stdout, exit import click from colorama import Fore, Style @@ -31,6 +31,7 @@ ) from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) +from superset.exceptions import DashboardNotFoundException config = app.config celery_app = utils.get_celery_app(config) @@ -188,17 +189,21 @@ def import_dashboards(path, recursive): '--print_stdout', '-p', is_flag=True, default=False, help='Print JSON to stdout') @click.option( - '--dashboard-id', '-i', default=None, type=int, + '--dashboard-ids', '-i', default=None, type=int, multiple=True, help='Specify dashboard id to export') @click.option( - '--dashboard-title', '-t', default=None, + '--dashboard-titles', '-t', default=None, multiple=True, help='Specify dashboard title to export') -def export_dashboards(print_stdout, dashboard_file, dashboard_id, dashboard_title): +def export_dashboards(print_stdout, dashboard_file, dashboard_ids, dashboard_titles): """Export dashboards to JSON""" - data = dashboard_import_export.export_dashboards( - db.session, - dashboard_id=dashboard_id, - dashboard_title=dashboard_title) + try: + data = dashboard_import_export.export_dashboards( + db.session, + dashboard_ids=dashboard_ids, + dashboard_titles=dashboard_titles) + except DashboardNotFoundException as e: + click.echo(click.style(str(e), fg='red')) + exit(1) if print_stdout or not dashboard_file: print(data) if dashboard_file: diff --git a/superset/exceptions.py b/superset/exceptions.py index ae491fc8d731..3eafefb8dee2 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -54,3 +54,6 @@ class SupersetTemplateException(SupersetException): class SpatialException(SupersetException): pass + +class DashboardNotFoundException(Exception): + pass diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 92acb40dfb60..a56a73ab0326 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -15,12 +15,15 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=C,R,W +import sys import json import logging import time +from superset import db from superset.models.core import Dashboard from superset.utils.core import decode_dashboards +from superset.exceptions import DashboardNotFoundException def import_dashboards(session, data_stream, import_time=None): @@ -37,18 +40,23 @@ def import_dashboards(session, data_stream, import_time=None): dashboard, import_time=import_time) session.commit() - -def export_dashboards(session, dashboard_id=None, dashboard_title=None): +def export_dashboards(session, dashboard_ids=None, dashboard_titles=None): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') - dashboards = session.query(Dashboard) - dashboard_ids = [] - for dashboard in dashboards: - if dashboard_id or dashboard_title: - if dashboard.id == dashboard_id or \ - dashboard.dashboard_title == dashboard_title: - dashboard_ids.append(dashboard.id) - else: - dashboard_ids.append(dashboard.id) - data = Dashboard.export_dashboards(dashboard_ids) if dashboard_ids else {} + export_dashboard_ids = [] + + session = db.session() + query = session.query(Dashboard) + if dashboard_ids or dashboard_titles: + query = query.filter(Dashboard.id.in_(dashboard_ids) | \ + Dashboard.dashboard_title.in_(dashboard_titles)) + + export_dashboard_ids = [d.id for d in query.all()] + + data = {} + if not export_dashboard_ids: + logging.error('No dashboards found!') + raise DashboardNotFoundException('No dashboards found!') + else: + data = Dashboard.export_dashboards(export_dashboard_ids) if export_dashboard_ids else {} return data From f960c4f4b9381ed30a372350b64521c9073ad30c Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 16:51:24 -0700 Subject: [PATCH 004/107] Work from ticket #7444 and start of this work --- superset/cli.py | 37 ++++++++++++- superset/config.py | 20 ++----- superset/data/__init__.py | 63 +++++++++++++++++++++++ superset/models/core.py | 21 +++++++- superset/models/helpers.py | 2 +- superset/utils/dashboard_import_export.py | 4 +- 6 files changed, 126 insertions(+), 21 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 92cfc2758790..4e621717ff73 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -19,7 +19,13 @@ from datetime import datetime import logging from subprocess import Popen +<<<<<<< Updated upstream from sys import stdout, exit +======= +from sys import stdout +import pkgutil +import importlib +>>>>>>> Stashed changes import click from colorama import Fore, Style @@ -125,6 +131,35 @@ def load_examples(load_test_data): """Loads a set of Slices and Dashboards and a supporting dataset """ load_examples_run(load_test_data) +@app.cli.group() +def examples(): + """Manages example Slices/Dashboards/datasets""" + pass + +@examples.command() +def show(): + """List example Slices/Dashboards/datasets""" + print('Available examples:\n') + for importer, modname, ispkg in pkgutil.iter_modules(data.__path__): + #print("Found submodule %s (is a package: %s)" % (modname, ispkg)) + module = importlib.import_module('superset.data.' + modname) + try: + print('{}: {}'.format(modname, module.DESCRIPTION)) + except AttributeError as e: + pass + + + +@examples.command() +def load(): + """Load an example Slice/Dashboard/dataset""" + pass + +@examples.command() +def remove(): + """Remove an example Slice/Dashboard/dataset""" + pass + @app.cli.command() @click.option('--datasource', '-d', help='Specify which datasource name to load, if ' @@ -189,6 +224,7 @@ def import_dashboards(path, recursive): '--print_stdout', '-p', is_flag=True, default=False, help='Print JSON to stdout') @click.option( +<<<<<<< Updated upstream '--dashboard-ids', '-i', default=None, type=int, multiple=True, help='Specify dashboard id to export') @click.option( @@ -211,7 +247,6 @@ def export_dashboards(print_stdout, dashboard_file, dashboard_ids, dashboard_tit with open(dashboard_file, 'w') as data_stream: data_stream.write(data) - @app.cli.command() @click.option( '--path', '-p', diff --git a/superset/config.py b/superset/config.py index 5a35f0b64128..f6564e869a73 100644 --- a/superset/config.py +++ b/superset/config.py @@ -598,23 +598,9 @@ class CeleryConfig(object): # Send user to a link where they can report bugs BUG_REPORT_URL = None -# Send user to a link where they can read more about Superset -DOCUMENTATION_URL = None - -# What is the Last N days relative in the time selector to: -# 'today' means it is midnight (00:00:00) of today in the local timezone -# 'now' means it is relative to the query issue time -DEFAULT_RELATIVE_END_TIME = 'today' - -# Is epoch_s/epoch_ms datetime format supposed to be considered since UTC ? -# If not, it is sassumed then the epoch_s/epoch_ms is seconds since 1/1/1970 -# localtime (in the tz where the superset webserver is running) -IS_EPOCH_S_TRULY_UTC = False - -# Configure which SQL validator to use for each engine -SQL_VALIDATORS_BY_ENGINE = { - 'presto': 'PrestoDBSQLValidator', -} + +# Directory to export data to +EXPORT_DIRECTORY = '/tmp/' try: if CONFIG_PATH_ENV_VAR in os.environ: diff --git a/superset/data/__init__.py b/superset/data/__init__.py index 5090effe5fc4..53fb818de73e 100644 --- a/superset/data/__init__.py +++ b/superset/data/__init__.py @@ -30,3 +30,66 @@ from .sf_population_polygons import load_sf_population_polygons # noqa from .unicode_test_data import load_unicode_test_data # noqa from .world_bank import load_world_bank_health_n_pop # noqa + +from abc import ABC + +class AbstractSupersetExample(ABC): + """Defines interface through which superset examples load themselves.""" + + def __init__(self, description): + self.description = description + + def load_data(self): + # Task 1: Load file and create pandas.DataFrame + # Task 2: Load data into SQL with pandas.DataFrame.to_sql() + # Task 3: Process through ORM to get back workable Table object from whichever data source the table is in + pass + + def create_metrics(self): + # Task 1: Build any TableColumns + # Task 2: Build Metrics - SQLMetrics + # Task 3: Store metrics in DB via ORM + pass + + def create_charts(self, slices): + # Task 1: Build Slice from config/JSON + # Task 2: Store to DB via - misc_dash_slices.add(slc.slice_name) / merge_slice(slc) + pass + + def create_dashboards(self, name, config): + # Task 1: Instantiate Dash via ORM + # Task 2: Configure Dash via JSON + # Task 3: Store to DB via ORM + pass + + +class SupersetConfigExample(): + """Defines interface through which superset examples define themselves""" + + def __init__(self, description): + self.description = description + + def load_data(self, data_path, data_types='csv', encoding='utf-8', dt_column=None): + # Task 1: Load file and create pandas.DataFrame + # Task 2: Load data into SQL with pandas.DataFrame.to_sql() + # Task 3: Process through ORM to get back workable Table object from whichever data source the table is in + + pass + + def create_metrics(self, metrics): + # Task 1: Build TableColumns + # Task 2: Build Metrics - SQLMetrics + # Task 3: Store metrics in DB via ORM + pass + + def create_charts(self, slices): + # Task 1: Build Slice from config/JSON + # Task 2: Store to DB via - misc_dash_slices.add(slc.slice_name) / merge_slice(slc) + pass + + def create_dashboards(self, name, config): + # Task 1: Instantiate Dash via ORM + # Task 2: Configure Dash via JSON + # Task 3: Store to DB via ORM + pass + diff --git a/superset/models/core.py b/superset/models/core.py index e16a234bfd72..2f22d89c962a 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -61,6 +61,7 @@ custom_password_store = config.get('SQLALCHEMY_CUSTOM_PASSWORD_STORE') stats_logger = config.get('STATS_LOGGER') log_query = config.get('QUERY_LOGGER') +export_dir = config.get('EXPORT_DIRECTORY') metadata = Model.metadata # pylint: disable=no-member PASSWORD_MASK = 'X' * 10 @@ -615,7 +616,7 @@ def alter_positions(dashboard, old_to_new_slc_id_dict): return copied_dash.id @classmethod - def export_dashboards(cls, dashboard_ids): + def export_dashboards(cls, dashboard_ids, export_data=False): copied_dashboards = [] datasource_ids = set() for dashboard_id in dashboard_ids: @@ -649,6 +650,24 @@ def export_dashboards(cls, dashboard_ids): ) make_transient(eager_datasource) eager_datasources.append(eager_datasource) + + if export_data: + for data_table in eager_datasources: + engine = data_table.database.get_sqla_engine() + columns = [c.get_sqla_col() for c in data_table.columns] + + qry = ( + select(columns) + .select_from(text(data_table.name)) + ) + qry.compile(engine) + sql = '{}'.format( + qry.compile(engine), + ) + + df = pd.read_sql_query(sql=sql, con=engine) + file_name = f'{export_dir}/{data_table.name}.csv.gz' + df.to_csv(file_name, compression='gzip') return json.dumps({ 'dashboards': copied_dashboards, diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 78b438d9f8f1..17021f815664 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -212,7 +212,7 @@ def export_to_dict(self, recursive=True, include_parent_ref=False, include_defaults=include_defaults, ) for child in getattr(self, c) ], - key=lambda k: sorted(k.items())) + key=lambda k: sorted(str(k.items()))) return dict_rep diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index a56a73ab0326..ff743cb6356d 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -40,6 +40,7 @@ def import_dashboards(session, data_stream, import_time=None): dashboard, import_time=import_time) session.commit() +<<<<<<< Updated upstream def export_dashboards(session, dashboard_ids=None, dashboard_titles=None): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') @@ -58,5 +59,6 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None): logging.error('No dashboards found!') raise DashboardNotFoundException('No dashboards found!') else: - data = Dashboard.export_dashboards(export_dashboard_ids) if export_dashboard_ids else {} + data = Dashboard.export_dashboards(export_dashboard_ids) + return data From a606a0a836c6700236ee87e7942ee5f540e5ce79 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 16:57:46 -0700 Subject: [PATCH 005/107] Import fix --- superset/cli.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 4e621717ff73..11eaeaaed49d 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -19,13 +19,9 @@ from datetime import datetime import logging from subprocess import Popen -<<<<<<< Updated upstream from sys import stdout, exit -======= -from sys import stdout import pkgutil import importlib ->>>>>>> Stashed changes import click from colorama import Fore, Style @@ -146,8 +142,8 @@ def show(): try: print('{}: {}'.format(modname, module.DESCRIPTION)) except AttributeError as e: + print(modname) pass - @examples.command() @@ -224,7 +220,6 @@ def import_dashboards(path, recursive): '--print_stdout', '-p', is_flag=True, default=False, help='Print JSON to stdout') @click.option( -<<<<<<< Updated upstream '--dashboard-ids', '-i', default=None, type=int, multiple=True, help='Specify dashboard id to export') @click.option( From a89a0fb39a232f59d9e9353fc63573caa70349d7 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 16:58:16 -0700 Subject: [PATCH 006/107] upstream merge --- superset/utils/dashboard_import_export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index ff743cb6356d..1562ad1962ad 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -40,7 +40,6 @@ def import_dashboards(session, data_stream, import_time=None): dashboard, import_time=import_time) session.commit() -<<<<<<< Updated upstream def export_dashboards(session, dashboard_ids=None, dashboard_titles=None): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') From 948254b34f187a37e0bf670c0b05b644f1bce37f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 17:18:30 -0700 Subject: [PATCH 007/107] pass flake8 complaints --- superset/cli.py | 9 +++++++-- superset/utils/dashboard_import_export.py | 12 ++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 11eaeaaed49d..00f8754e223c 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -31,9 +31,9 @@ from superset import ( app, appbuilder, data, db, security_manager, ) +from superset.exceptions import DashboardNotFoundException from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) -from superset.exceptions import DashboardNotFoundException config = app.config celery_app = utils.get_celery_app(config) @@ -225,7 +225,12 @@ def import_dashboards(path, recursive): @click.option( '--dashboard-titles', '-t', default=None, multiple=True, help='Specify dashboard title to export') -def export_dashboards(print_stdout, dashboard_file, dashboard_ids, dashboard_titles): +@click.option( + '--export-data-dir', '-d', default=config.get('EXPORT_DIRECTORY'), + help='Specify directory to export the dashboard\'s data tables as CSV files.' +) +def export_dashboards(print_stdout, dashboard_file, dashboard_ids, + dashboard_titles): """Export dashboards to JSON""" try: data = dashboard_import_export.export_dashboards( diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 1562ad1962ad..bfc832a97457 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -15,15 +15,14 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=C,R,W -import sys import json import logging import time from superset import db from superset.models.core import Dashboard -from superset.utils.core import decode_dashboards from superset.exceptions import DashboardNotFoundException +from superset.utils.core import decode_dashboards def import_dashboards(session, data_stream, import_time=None): @@ -40,17 +39,18 @@ def import_dashboards(session, data_stream, import_time=None): dashboard, import_time=import_time) session.commit() -def export_dashboards(session, dashboard_ids=None, dashboard_titles=None): + +def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data=False): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') export_dashboard_ids = [] - + session = db.session() query = session.query(Dashboard) if dashboard_ids or dashboard_titles: - query = query.filter(Dashboard.id.in_(dashboard_ids) | \ + query = query.filter(Dashboard.id.in_(dashboard_ids) | Dashboard.dashboard_title.in_(dashboard_titles)) - + export_dashboard_ids = [d.id for d in query.all()] data = {} From c94be1b4ddc7d84162d38370dcd2bc0bc94b027f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 18:01:20 -0700 Subject: [PATCH 008/107] Added --export-data/-x flag and --export-data-dir/-d directory/location to CLI. Changed config.EXPORT_DIRECTORY to config.DASHBOARD_EXPORT_DIR. Dashboard.export_dashboards gets export_data and export_data_dir args, does table export via SQL. dashboard_import_export.export_dashboards gets export_data/export_data_dir args. --- superset/cli.py | 14 ++++++++++---- superset/config.py | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 00f8754e223c..8cbadae76f28 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -226,17 +226,23 @@ def import_dashboards(path, recursive): '--dashboard-titles', '-t', default=None, multiple=True, help='Specify dashboard title to export') @click.option( - '--export-data-dir', '-d', default=config.get('EXPORT_DIRECTORY'), - help='Specify directory to export the dashboard\'s data tables as CSV files.' + '--export-data', '-x', default=None, is_flag=True, + help='Export the dashboard\'s data tables as CSV files.' +) +@click.option( + '--export-data-dir', '-d', default=config.get('DASHBOARD_EXPORT_DIR'), + help='Specify export directory path. Defaults to \'/tmp\'.' ) def export_dashboards(print_stdout, dashboard_file, dashboard_ids, - dashboard_titles): + dashboard_titles, export_data, export_data_dir): """Export dashboards to JSON""" try: data = dashboard_import_export.export_dashboards( db.session, dashboard_ids=dashboard_ids, - dashboard_titles=dashboard_titles) + dashboard_titles=dashboard_titles, + export_data=export_data, + export_data_dir=export_data_dir) except DashboardNotFoundException as e: click.echo(click.style(str(e), fg='red')) exit(1) diff --git a/superset/config.py b/superset/config.py index f6564e869a73..b83f4a41a8c7 100644 --- a/superset/config.py +++ b/superset/config.py @@ -599,8 +599,8 @@ class CeleryConfig(object): # Send user to a link where they can report bugs BUG_REPORT_URL = None -# Directory to export data to -EXPORT_DIRECTORY = '/tmp/' +# Default dashboard export directory +DASHBOARD_EXPORT_DIR = '/tmp/' try: if CONFIG_PATH_ENV_VAR in os.environ: From 2beb594c661b5f3f34e4392453860f8242d9e688 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 18:13:10 -0700 Subject: [PATCH 009/107] Added "includes_data" flag to dashboard export to facilitate import. --- superset/config.py | 2 +- superset/models/core.py | 15 ++++++++++----- superset/utils/dashboard_import_export.py | 6 ++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/superset/config.py b/superset/config.py index b83f4a41a8c7..afbcc177a510 100644 --- a/superset/config.py +++ b/superset/config.py @@ -600,7 +600,7 @@ class CeleryConfig(object): BUG_REPORT_URL = None # Default dashboard export directory -DASHBOARD_EXPORT_DIR = '/tmp/' +DASHBOARD_EXPORT_DIR = '/tmp' try: if CONFIG_PATH_ENV_VAR in os.environ: diff --git a/superset/models/core.py b/superset/models/core.py index 2f22d89c962a..87436835b461 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -41,6 +41,7 @@ from sqlalchemy.orm.session import make_transient from sqlalchemy.pool import NullPool from sqlalchemy.schema import UniqueConstraint +from sqlalchemy.sql import select, text from sqlalchemy_utils import EncryptedType import sqlparse @@ -61,7 +62,6 @@ custom_password_store = config.get('SQLALCHEMY_CUSTOM_PASSWORD_STORE') stats_logger = config.get('STATS_LOGGER') log_query = config.get('QUERY_LOGGER') -export_dir = config.get('EXPORT_DIRECTORY') metadata = Model.metadata # pylint: disable=no-member PASSWORD_MASK = 'X' * 10 @@ -616,7 +616,8 @@ def alter_positions(dashboard, old_to_new_slc_id_dict): return copied_dash.id @classmethod - def export_dashboards(cls, dashboard_ids, export_data=False): + def export_dashboards(cls, dashboard_ids, export_data=False, + export_data_dir=None): copied_dashboards = [] datasource_ids = set() for dashboard_id in dashboard_ids: @@ -651,7 +652,8 @@ def export_dashboards(cls, dashboard_ids, export_data=False): make_transient(eager_datasource) eager_datasources.append(eager_datasource) - if export_data: + export_files = [] + if export_data and export_data_dir: for data_table in eager_datasources: engine = data_table.database.get_sqla_engine() columns = [c.get_sqla_col() for c in data_table.columns] @@ -664,14 +666,17 @@ def export_dashboards(cls, dashboard_ids, export_data=False): sql = '{}'.format( qry.compile(engine), ) - + df = pd.read_sql_query(sql=sql, con=engine) - file_name = f'{export_dir}/{data_table.name}.csv.gz' + file_name = f'{export_data_dir}/{data_table.name}.csv.gz' + export_files.append(file_name) df.to_csv(file_name, compression='gzip') return json.dumps({ 'dashboards': copied_dashboards, 'datasources': eager_datasources, + 'files': export_files, + 'includes_data': True if export_data else False }, cls=utils.DashboardEncoder, indent=4) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index bfc832a97457..159ec8b612e9 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -40,7 +40,8 @@ def import_dashboards(session, data_stream, import_time=None): session.commit() -def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data=False): +def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, + export_data=False, export_data_dir=None): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') export_dashboard_ids = [] @@ -58,6 +59,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export logging.error('No dashboards found!') raise DashboardNotFoundException('No dashboards found!') else: - data = Dashboard.export_dashboards(export_dashboard_ids) + data = Dashboard.export_dashboards(export_dashboard_ids, + export_data, export_data_dir) return data From 4a49f78757b7d30a92b08913c8cacc3370400964 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 20:23:01 -0700 Subject: [PATCH 010/107] Created SQLALCHEMY_IMPORT_URI config string for where to import dashboard tables. --- superset/config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index afbcc177a510..870f56ba68b8 100644 --- a/superset/config.py +++ b/superset/config.py @@ -80,7 +80,12 @@ # SQLALCHEMY_DATABASE_URI = 'mysql://myapp@localhost/myapp' # SQLALCHEMY_DATABASE_URI = 'postgresql://root:password@localhost/myapp' -# In order to hook up a custom password store for all SQLACHEMY connections +# The SQLAlchemy connection string for incoming examples +SQLALCHEMY_IMPORT_URI = 'sqlite:///' + os.path.join(DATA_DIR, 'examples.db') +# SQLALCHEMY_IMPORT_URI = 'mysql://myapp@localhost/examples' +# SQLALCHEMY_IMPORT_URI = 'postgresql://root:password@localhost/examples' + +# In order to hook up a custom password store for all SQLALCHEMY connections # implement a function that takes a single argument of type 'sqla.engine.url', # returns a password and set SQLALCHEMY_CUSTOM_PASSWORD_STORE. # From 7ad501e00cccf4590a61275203b6c7d38c6977fc Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 20:23:18 -0700 Subject: [PATCH 011/107] Reworked Dashboard.export_dashboards to print better records on exported tables. --- superset/models/core.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 87436835b461..ba225d8ed748 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -652,11 +652,12 @@ def export_dashboards(cls, dashboard_ids, export_data=False, make_transient(eager_datasource) eager_datasources.append(eager_datasource) - export_files = [] + data = {'tables': []} if export_data and export_data_dir: for data_table in eager_datasources: engine = data_table.database.get_sqla_engine() columns = [c.get_sqla_col() for c in data_table.columns] + types = {c.name:c.type for c in columns} qry = ( select(columns) @@ -669,14 +670,19 @@ def export_dashboards(cls, dashboard_ids, export_data=False, df = pd.read_sql_query(sql=sql, con=engine) file_name = f'{export_data_dir}/{data_table.name}.csv.gz' - export_files.append(file_name) + table_record = { + 'name': data_table.name, + 'file_path': file_name, + 'types': types, + } + data['tables'].append(table_record) df.to_csv(file_name, compression='gzip') + data['includes_data'] = len(data['tables']) > 0 return json.dumps({ 'dashboards': copied_dashboards, 'datasources': eager_datasources, - 'files': export_files, - 'includes_data': True if export_data else False + 'data': data, }, cls=utils.DashboardEncoder, indent=4) From e65b976748b1a5beb21a5c6f1f10e4685b8fb08f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 20:25:02 -0700 Subject: [PATCH 012/107] Created get_or_create_import_db_engine() to fetch a DB engine for imports. --- superset/utils/core.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 3e80c76355d2..df1e31cf87ac 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -51,10 +51,11 @@ import parsedatetime from pydruid.utils.having import Having import sqlalchemy as sa -from sqlalchemy import event, exc, select, Text +from sqlalchemy import create_engine, event, exc, select, Text from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlalchemy.sql.type_api import Variant from sqlalchemy.types import TEXT, TypeDecorator +from sqlalchemy_utils import database_exists, create_database from superset.exceptions import SupersetException, SupersetTimeoutException from superset.utils.dates import datetime_to_epoch, EPOCH @@ -881,6 +882,15 @@ def get_or_create_main_db(): return dbobj +def get_or_create_import_db_engine(): + """Get a SQLAlchemy engine for imported dashboard data""" + from superset import conf + engine = create_engine(conf.get('SQLALCHEMY_IMPORT_URI')) + if not database_exists(engine.url): + create_database(engine.url) + return engine + + def get_main_database(session): from superset.models import core as models return ( From ec1ff2639023c00b131b4d57a451afdaab622180 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 7 May 2019 20:26:49 -0700 Subject: [PATCH 013/107] Now importing data tables to import db if a Dashboard file we're loading has a 'tables' section. Uses type inference atm. --- superset/utils/dashboard_import_export.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 159ec8b612e9..4487031a9bb5 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -19,10 +19,11 @@ import logging import time +import pandas as pd from superset import db from superset.models.core import Dashboard from superset.exceptions import DashboardNotFoundException -from superset.utils.core import decode_dashboards +from superset.utils.core import decode_dashboards, get_or_create_import_db_engine def import_dashboards(session, data_stream, import_time=None): @@ -30,13 +31,25 @@ def import_dashboards(session, data_stream, import_time=None): current_tt = int(time.time()) import_time = current_tt if import_time is None else import_time data = json.loads(data_stream.read(), object_hook=decode_dashboards) + # TODO: import DRUID datasources - for table in data['datasources']: - type(table).import_obj(table, import_time=import_time) session.commit() for dashboard in data['dashboards']: Dashboard.import_obj( dashboard, import_time=import_time) + + if data['data']['includes_data']: + engine = get_or_create_import_db_engine() + for table in data['data']['tables']: + df = pd.read_csv(table['file_path'], parse_dates=True, + infer_datetime_format=True, compression='infer') + df.to_sql( + table['name'], + engine, + if_exists='replace', + chunksize=500, + index=False) + session.commit() From 72da3209dfebbb42a6543800d554c31da84a222a Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 8 May 2019 15:05:05 -0700 Subject: [PATCH 014/107] Added Dashboards.uuid column to SQLAlchemy model with Alembic migration --- .../e5200a951e62_add_dashboards_uuid.py | 41 +++++++++++++++++++ superset/models/core.py | 5 ++- 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py new file mode 100644 index 000000000000..c7b3b1c0c0c6 --- /dev/null +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Adds a dashboards.uuid column. + +Revision ID: e5200a951e62 +Revises: e9df189e5c7e +Create Date: 2019-05-08 13:42:48.479145 + +""" + +# revision identifiers, used by Alembic. +revision = 'e5200a951e62' +down_revision = 'e9df189e5c7e' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy_utils.types.uuid import UUIDType + + +def upgrade(): + print('Adding columns dashboards.uuid=UUIDType') + op.add_column('dashboards', sa.Column('uuid', UUIDType(binary=False), nullable=True)) + +def downgrade(): + print('Removing column dashboards.uuid=UUIDType') + with op.batch_alter_table('dashboards') as batch_op: + batch_op.drop_column('uuid') diff --git a/superset/models/core.py b/superset/models/core.py index ba225d8ed748..f3d1a29d17ce 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -23,6 +23,7 @@ import json import logging import textwrap +import uuid from flask import escape, g, Markup, request from flask_appbuilder import Model @@ -43,6 +44,7 @@ from sqlalchemy.schema import UniqueConstraint from sqlalchemy.sql import select, text from sqlalchemy_utils import EncryptedType +from sqlalchemy_utils.types.uuid import UUIDType import sqlparse from superset import app, db, db_engine_specs, security_manager @@ -399,6 +401,7 @@ class Dashboard(Model, AuditMixinNullable, ImportMixin): __tablename__ = 'dashboards' id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) dashboard_title = Column(String(500)) position_json = Column(utils.MediumText()) description = Column(Text) @@ -676,7 +679,7 @@ def export_dashboards(cls, dashboard_ids, export_data=False, 'types': types, } data['tables'].append(table_record) - df.to_csv(file_name, compression='gzip') + df.to_csv(file_name) data['includes_data'] = len(data['tables']) > 0 return json.dumps({ From 95636a834a55c0757752375a15ed8a4c37e1d533 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 8 May 2019 23:24:05 -0700 Subject: [PATCH 015/107] Added uuid to dashboards/slices/databases/datasources via ImportMixin, along with a migration. --- .../e5200a951e62_add_dashboards_uuid.py | 90 +++++++++++++++++-- superset/models/core.py | 3 - superset/models/helpers.py | 6 +- 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index c7b3b1c0c0c6..ba73fb072294 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -21,21 +21,99 @@ Create Date: 2019-05-08 13:42:48.479145 """ +import uuid +from alembic import op +from sqlalchemy import Column, Integer +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy_utils.types.uuid import UUIDType + +from superset import db # revision identifiers, used by Alembic. revision = 'e5200a951e62' down_revision = 'e9df189e5c7e' -from alembic import op -import sqlalchemy as sa -from sqlalchemy_utils.types.uuid import UUIDType +Base = declarative_base() + + +get_uuid = lambda: str(uuid.uuid4()) + +class Dashboard(Base): + __tablename__ = 'dashboards' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class Datasource(Base): + __tablename__ = 'datasources' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class Slice(Base): + __tablename__ = 'slices' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class Database(Base): + __tablename__ = 'dbs' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) def upgrade(): - print('Adding columns dashboards.uuid=UUIDType') - op.add_column('dashboards', sa.Column('uuid', UUIDType(binary=False), nullable=True)) + bind = op.get_bind() + session = db.Session(bind=bind) + + with op.batch_alter_table('dashboards') as batch_op: + batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) + for d in session.query(Dashboard): + d.uuid = get_uuid() + session.merge(d) + with op.batch_alter_table('dashboards') as batch_op: + batch_op.alter_column('uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', 'uuid') + session.commit() + + with op.batch_alter_table('datasources') as batch_op: + batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) + for d in session.query(Datasource): + d.uuid = get_uuid() + session.merge(d) + with op.batch_alter_table('datasources') as batch_op: + batch_op.alter_column('uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', 'uuid') + session.commit() + + with op.batch_alter_table('slices') as batch_op: + batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) + for s in session.query(Slice): + s.uuid = get_uuid() + session.merge(s) + with op.batch_alter_table('slices') as batch_op: + batch_op.alter_column('uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', 'uuid') + session.commit() + + with op.batch_alter_table('dbs') as batch_op: + batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) + for d in session.query(Database): + d.uuid = get_uuid() + session.merge(d) + with op.batch_alter_table('slices') as batch_op: + batch_op.alter_column('uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', 'uuid') + session.commit() + + session.close() def downgrade(): - print('Removing column dashboards.uuid=UUIDType') with op.batch_alter_table('dashboards') as batch_op: batch_op.drop_column('uuid') + + with op.batch_alter_table('datasources') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('slices') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('dbs') as batch_op: + batch_op.drop_column('uuid') diff --git a/superset/models/core.py b/superset/models/core.py index f3d1a29d17ce..039ccbad9087 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -23,7 +23,6 @@ import json import logging import textwrap -import uuid from flask import escape, g, Markup, request from flask_appbuilder import Model @@ -44,7 +43,6 @@ from sqlalchemy.schema import UniqueConstraint from sqlalchemy.sql import select, text from sqlalchemy_utils import EncryptedType -from sqlalchemy_utils.types.uuid import UUIDType import sqlparse from superset import app, db, db_engine_specs, security_manager @@ -401,7 +399,6 @@ class Dashboard(Model, AuditMixinNullable, ImportMixin): __tablename__ = 'dashboards' id = Column(Integer, primary_key=True) - uuid = Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) dashboard_title = Column(String(500)) position_json = Column(utils.MediumText()) description = Column(Text) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 17021f815664..c15173705ceb 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -20,18 +20,20 @@ import json import logging import re +import uuid from flask import escape, Markup from flask_appbuilder.models.decorators import renders from flask_appbuilder.models.mixins import AuditMixin import humanize import sqlalchemy as sa -from sqlalchemy import and_, or_, UniqueConstraint +from sqlalchemy import and_, or_, UniqueConstraint, Column from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm.exc import MultipleResultsFound import yaml from superset.utils.core import QueryStatus +from sqlalchemy_utils.types.uuid import UUIDType def json_to_dict(json_str): @@ -56,6 +58,8 @@ class ImportMixin(object): # The names of the attributes # that are available for import and export + uuid = Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) + @classmethod def _parent_foreign_key_mappings(cls): """Get a mapping of foreign name to the local name of foreign keys""" From 605e0ff1744157f868bf781c5af3d8eff4574b7b Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 18 May 2019 15:46:23 +0200 Subject: [PATCH 016/107] Added python-git to requirements --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 857b9ad07d09..e21870c1dbb1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,6 +31,7 @@ pycodestyle==2.4.0 pyhive==0.6.1 pylint==1.9.2 python-dotenv==0.10.1 +python-git==2018.2.1 redis==2.10.6 statsd==3.3.0 thrift==0.11.0 From 634555e0980cbac422467b00df0230236bb173d7 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 18 May 2019 22:11:24 +0200 Subject: [PATCH 017/107] Now migrating all tables that inherit directly or indirectly from ImportMixin. --- .../e5200a951e62_add_dashboards_uuid.py | 119 +++++++++++------- 1 file changed, 75 insertions(+), 44 deletions(-) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index ba73fb072294..103a1bb94662 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Adds a dashboards.uuid column. +"""Adds uuid columns to all classes with ImportMixin: dashboards, datasources, dbs, slices, tables, dashboard_email_schedules, slice_email_schedules Revision ID: e5200a951e62 Revises: e9df189e5c7e @@ -31,7 +31,7 @@ # revision identifiers, used by Alembic. revision = 'e5200a951e62' -down_revision = 'e9df189e5c7e' +down_revision = 'afc69274c25a' Base = declarative_base() @@ -48,13 +48,43 @@ class Datasource(Base): id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) +class Database(Base): + __tablename__ = 'dbs' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class DruidCluster(Base): + __tablename__ = 'clusters' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class DruidMetric(Base): + __tablename__ = 'metrics' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + class Slice(Base): __tablename__ = 'slices' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) -class Database(Base): - __tablename__ = 'dbs' +class SqlaTable(Base): + __tablename__ = 'tables' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class SqlMetric(Base): + __tablename__ = 'sql_metrics' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class DashboardEmailSchedule(Base): + __tablename__ = 'dashboard_email_schedules' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + +class SliceEmailSchedule(Base): + __tablename__ = 'slice_email_schedules' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) @@ -63,46 +93,29 @@ def upgrade(): bind = op.get_bind() session = db.Session(bind=bind) - with op.batch_alter_table('dashboards') as batch_op: - batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) - for d in session.query(Dashboard): - d.uuid = get_uuid() - session.merge(d) - with op.batch_alter_table('dashboards') as batch_op: - batch_op.alter_column('uuid', nullable=False) - batch_op.create_unique_constraint('uq_uuid', 'uuid') - session.commit() - - with op.batch_alter_table('datasources') as batch_op: - batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) - for d in session.query(Datasource): - d.uuid = get_uuid() - session.merge(d) - with op.batch_alter_table('datasources') as batch_op: - batch_op.alter_column('uuid', nullable=False) - batch_op.create_unique_constraint('uq_uuid', 'uuid') - session.commit() - - with op.batch_alter_table('slices') as batch_op: - batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) - for s in session.query(Slice): - s.uuid = get_uuid() - session.merge(s) - with op.batch_alter_table('slices') as batch_op: - batch_op.alter_column('uuid', nullable=False) - batch_op.create_unique_constraint('uq_uuid', 'uuid') - session.commit() + def add_uuid_column(col_name, _type): + """Add a uuid column to a given table""" + with op.batch_alter_table(col_name) as batch_op: + batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) + for s in session.query(_type): + s.uuid = get_uuid() + session.merge(s) + with op.batch_alter_table(col_name) as batch_op: + batch_op.alter_column('uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', 'uuid') + session.commit() + + add_uuid_column('dashboards', Dashboard) + add_uuid_column('datasources', Datasource) + add_uuid_column('dbs', Database) + add_uuid_column('clusters', DruidCluster) + add_uuid_column('metrics', DruidMetric) + add_uuid_column('slices', Slice) + add_uuid_column('sql_metrics', SqlMetric) + add_uuid_column('tables', SqlaTable) + add_uuid_column('dashboard_email_schedules', DashboardEmailSchedule) + add_uuid_column('slice_email_schedules', SliceEmailSchedule) - with op.batch_alter_table('dbs') as batch_op: - batch_op.add_column(Column('uuid', UUIDType(binary=False), default=get_uuid)) - for d in session.query(Database): - d.uuid = get_uuid() - session.merge(d) - with op.batch_alter_table('slices') as batch_op: - batch_op.alter_column('uuid', nullable=False) - batch_op.create_unique_constraint('uq_uuid', 'uuid') - session.commit() - session.close() def downgrade(): @@ -112,8 +125,26 @@ def downgrade(): with op.batch_alter_table('datasources') as batch_op: batch_op.drop_column('uuid') + with op.batch_alter_table('dbs') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('clusters') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('metrics') as batch_op: + batch_op.drop_column('uuid') + with op.batch_alter_table('slices') as batch_op: batch_op.drop_column('uuid') - with op.batch_alter_table('dbs') as batch_op: + with op.batch_alter_table('sql_metrics') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('tables') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('dashboard_email_schedules') as batch_op: + batch_op.drop_column('uuid') + + with op.batch_alter_table('slice_email_schedules') as batch_op: batch_op.drop_column('uuid') From 35f028d02ca65c22751210e4bcb7e429a5918394 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 19 May 2019 10:03:26 -0700 Subject: [PATCH 018/107] Use sa.Column --- superset/models/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index c15173705ceb..6ce8230769d1 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -58,7 +58,7 @@ class ImportMixin(object): # The names of the attributes # that are available for import and export - uuid = Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) + uuid = sa.Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) @classmethod def _parent_foreign_key_mappings(cls): From 76edb939730c33e522ae17cf2fafee3dcfd4e437 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 19 May 2019 10:12:06 -0700 Subject: [PATCH 019/107] Added TableColumn/table_columns table --- .../versions/e5200a951e62_add_dashboards_uuid.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index 103a1bb94662..707e7f3aad49 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -78,6 +78,11 @@ class SqlMetric(Base): id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) +class TableColumn(Base): + __tablename__ = 'table_columns' + id = Column(Integer, primary_key=True) + uuid = Column(UUIDType(binary=False), default=get_uuid) + class DashboardEmailSchedule(Base): __tablename__ = 'dashboard_email_schedules' id = Column(Integer, primary_key=True) @@ -113,6 +118,7 @@ def add_uuid_column(col_name, _type): add_uuid_column('slices', Slice) add_uuid_column('sql_metrics', SqlMetric) add_uuid_column('tables', SqlaTable) + add_uuid_column('table_columns', TableColumn) add_uuid_column('dashboard_email_schedules', DashboardEmailSchedule) add_uuid_column('slice_email_schedules', SliceEmailSchedule) @@ -143,6 +149,9 @@ def downgrade(): with op.batch_alter_table('tables') as batch_op: batch_op.drop_column('uuid') + with op.batch_alter_table('table_columns') as batch_op: + batch_op.drop_column('uuid') + with op.batch_alter_table('dashboard_email_schedules') as batch_op: batch_op.drop_column('uuid') From d5ea88aff8b2a3a480dcbb0e09c81f28a8f3fb87 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 19 May 2019 12:23:58 -0700 Subject: [PATCH 020/107] Initial CLI tests --- tests/cli_tests.py | 100 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 tests/cli_tests.py diff --git a/tests/cli_tests.py b/tests/cli_tests.py new file mode 100644 index 000000000000..ddac62a15276 --- /dev/null +++ b/tests/cli_tests.py @@ -0,0 +1,100 @@ +import csv +from io import StringIO +import json +import logging +import unittest + +from superset import app, cli +from tests.base_tests import SupersetTestCase + +config = app.config + + +class SupersetCliTestCase(SupersetTestCase): + + @classmethod + def setUp(self): + self.runner = app.test_cli_runner() + + def test_version(self): + """Test `superset version`""" + version_result = self.runner.invoke(app.cli, ['version']) + # Version result should contain version string + logging.error(config.get('VERSION_STRING')) + logging.error(version_result.output) + self.assertTrue(config.get('VERSION_STRING') in version_result.output) + + def test_export_all_test_dashboards(self): + """Test `superset export_dashboards`""" + self.runner.invoke(app.cli, ['load_examples']) + result = self.runner.invoke(app.cli, ['export_dashboards']) + logging.error(result.output) + data = json.loads(result.output) + + # Should export at least all 5 test dashboards + self.assertGreaterEqual(len(data['dashboards']), 5) + + def test_export_dashboard_by_id(self): + """Test `superset export_dashboards -i 3`""" + self.runner.invoke(app.cli, ['load_examples']) + result = self.runner.invoke(app.cli, ['export_dashboards', '-i', '5']) + data = json.loads(result.output) + + # Should export 1 dashboard with matching id + ids = list(map(lambda d: d['__Dashboard__']['id'], data['dashboards'])) + self.assertEqual(len(ids), 1) + self.assertEqual(ids[0], 5) + + def test_export_dashboard_by_title(self): + """Test `superset export_dashboards -i 3`""" + self.runner.invoke(app.cli, ['load_examples']) + result = self.runner.invoke( + app.cli, ['export_dashboards', '-t', "World's Bank Data"]) + data = json.loads(result.output) + + # Should export 1 dashboard with matching id + ids = list(map( + lambda d: d['__Dashboard__']['dashboard_title'], data['dashboards'])) + self.assertEqual(len(ids), 1) + self.assertEqual(ids[0], "World's Bank Data") + + def test_examples_menu(self): + """Test `superset examples`""" + result = self.runner.invoke(app.cli, ['examples']) + self.assertIn('load', result.output) + self.assertIn('list', result.output) + self.assertIn('remove', result.output) + + def test_examples_list(self): + """Test `superset examples list`""" + result = self.runner.invoke( + app.cli, ['examples', 'list'] + ) + output_f = StringIO(result.output) + csv_reader = csv.DictReader(output_f, delimiter="\t", + fieldnames=['title', 'description', 'total_size_mb', + 'total_rows', 'updated_at']) + examples = [] + for example in csv_reader: + examples.append(example) + examples = [e for e in csv_reader] + self.assertGreater(len(examples), 0) + + wb = {'title': "World's Bank Data"} + title_matches = list(filter(lambda x: all(item in x.items() for item in wb.items()) > 0, examples)) + self.assertEqual(len(title_matches), 1) + + def test_examples_load(self): + """Test `superset examples load`""" + pass + + def test_examples_remove(self): + """Test `superset examples remove`""" + pass + + def test_examples_create(self): + """Test `superset examples create`""" + pass + +if __name__ == '__main__': + unittest.main() From a06804ef7ce4206f7617fcc375e469ce442eb034 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 20 May 2019 11:42:43 -0700 Subject: [PATCH 021/107] Moved JSON serialization from Dashboard.export_dashboards to superset.utils.dashboard_import_export.export_dashboards in order to add metadata fields for examples --- superset/models/core.py | 58 ++++++++++++++++++----- superset/utils/dashboard_import_export.py | 13 +++-- 2 files changed, 55 insertions(+), 16 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 039ccbad9087..680cb7a92485 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -22,6 +22,8 @@ import functools import json import logging +import os +import pathlib import textwrap from flask import escape, g, Markup, request @@ -161,7 +163,7 @@ class Slice(Model, AuditMixinNullable, ImportMixin): owners = relationship(security_manager.user_model, secondary=slice_user) export_fields = ('slice_name', 'datasource_type', 'datasource_name', - 'viz_type', 'params', 'cache_timeout') + 'viz_type', 'params', 'cache_timeout', 'uuid') def __repr__(self): return self.slice_name or str(self.id) @@ -410,7 +412,7 @@ class Dashboard(Model, AuditMixinNullable, ImportMixin): owners = relationship(security_manager.user_model, secondary=dashboard_user) export_fields = ('dashboard_title', 'position_json', 'json_metadata', - 'description', 'css', 'slug') + 'description', 'css', 'slug', 'uuid') def __repr__(self): return self.dashboard_title or str(self.id) @@ -652,8 +654,11 @@ def export_dashboards(cls, dashboard_ids, export_data=False, make_transient(eager_datasource) eager_datasources.append(eager_datasource) - data = {'tables': []} + files = [] + total_file_size = 0 + total_file_rows = 0 if export_data and export_data_dir: + for data_table in eager_datasources: engine = data_table.database.get_sqla_engine() columns = [c.get_sqla_col() for c in data_table.columns] @@ -669,21 +674,48 @@ def export_dashboards(cls, dashboard_ids, export_data=False, ) df = pd.read_sql_query(sql=sql, con=engine) - file_name = f'{export_data_dir}/{data_table.name}.csv.gz' + row_count = len(df.index) + + file_name = f'{data_table.name}.csv.gz' + file_dir = f'{export_data_dir}/{data_table.name}' + file_path = f'{file_dir}/{file_name}' + + if not os.path.exists(file_dir): + os.makedirs(file_dir) + df.to_csv(file_path) + + file_size = os.path.getsize(file_path) + table_record = { - 'name': data_table.name, - 'file_path': file_name, + 'file_name': file_name, + 'file_path': file_path, + 'rows': row_count, + 'size': file_size, + 'table_name': data_table.name, 'types': types, + #'uri': pathlib.Path(file_path).as_uri() } - data['tables'].append(table_record) - df.to_csv(file_name) - data['includes_data'] = len(data['tables']) > 0 + + total_file_rows += row_count + total_file_size += file_size + + files.append(table_record) + + # Partially fill out the bibliography + desc = { + 'total_size': total_file_size, + 'total_size_mb': round(total_file_size / (1024.0 * 1024.0), 2), + 'total_rows': total_file_rows, + 'file_count': len(files), + 'created_at': datetime.now().isoformat() + } - return json.dumps({ + return { + 'description': desc, 'dashboards': copied_dashboards, 'datasources': eager_datasources, - 'data': data, - }, cls=utils.DashboardEncoder, indent=4) + 'files': files + } class Database(Model, AuditMixinNullable, ImportMixin): @@ -721,7 +753,7 @@ class Database(Model, AuditMixinNullable, ImportMixin): impersonate_user = Column(Boolean, default=False) export_fields = ('database_name', 'sqlalchemy_uri', 'cache_timeout', 'expose_in_sqllab', 'allow_run_async', - 'allow_ctas', 'allow_csv_upload', 'extra') + 'allow_ctas', 'allow_csv_upload', 'extra', 'uuid') export_children = ['tables'] def __repr__(self): diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 4487031a9bb5..52c3e0044415 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -23,7 +23,7 @@ from superset import db from superset.models.core import Dashboard from superset.exceptions import DashboardNotFoundException -from superset.utils.core import decode_dashboards, get_or_create_import_db_engine +from superset.utils.core import DashboardEncoder, decode_dashboards, get_or_create_import_db_engine def import_dashboards(session, data_stream, import_time=None): @@ -54,7 +54,8 @@ def import_dashboards(session, data_stream, import_time=None): def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, - export_data=False, export_data_dir=None): + export_data=False, export_data_dir=None, description=None, + export_title=None, _license='Apache 2.0'): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') export_dashboard_ids = [] @@ -75,4 +76,10 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data = Dashboard.export_dashboards(export_dashboard_ids, export_data, export_data_dir) - return data + if export_title: + data['title'] = export_title + if description: + data['description'] = description + data['license'] = _license + + return json.dumps(data, cls=DashboardEncoder, indent=4) From 5a97aea9cd1865862a5b19cd7fc9fd53ac9a2a7a Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 20 May 2019 11:43:43 -0700 Subject: [PATCH 022/107] Fixed test title --- tests/cli_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index ddac62a15276..d00c9fb76d9b 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -46,7 +46,7 @@ def test_export_dashboard_by_id(self): self.assertEqual(ids[0], 5) def test_export_dashboard_by_title(self): - """Test `superset export_dashboards -i 3`""" + """Test `superset export_dashboards -t World's Bank Data`""" self.runner.invoke(app.cli, ['load_examples']) result = self.runner.invoke( app.cli, ['export_dashboards', '-t', "World's Bank Data"]) From ee7b245100d269952e2eaaf5f777e0957a7c2220 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 20 May 2019 15:52:32 -0700 Subject: [PATCH 023/107] Remove git dev requirement --- requirements-dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index e21870c1dbb1..857b9ad07d09 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,7 +31,6 @@ pycodestyle==2.4.0 pyhive==0.6.1 pylint==1.9.2 python-dotenv==0.10.1 -python-git==2018.2.1 redis==2.10.6 statsd==3.3.0 thrift==0.11.0 From f6b0b976d224e515e4a3e391773310de4a05c5e3 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 20 May 2019 17:03:05 -0700 Subject: [PATCH 024/107] Working prototype create/list examples --- requirements.txt | 1 + superset/cli.py | 101 +++++++++++++++++----- superset/config.py | 3 + superset/data/helpers.py | 41 ++++++++- superset/models/core.py | 8 +- superset/utils/dashboard_import_export.py | 21 ++++- tests/cli_tests.py | 16 ++-- 7 files changed, 152 insertions(+), 39 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3076586a7947..f4d3f6f83b19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -56,6 +56,7 @@ pandas==0.23.4 parsedatetime==2.0.0 pathlib2==2.3.0 polyline==1.3.2 +PTable==0.9.2 prison==0.1.0 # via flask-appbuilder py==1.7.0 # via retry pycparser==2.19 # via cffi diff --git a/superset/cli.py b/superset/cli.py index 5c9ab0da1bcf..f2f72470bba0 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -18,10 +18,12 @@ # pylint: disable=C,R,W from datetime import datetime import logging +import os from subprocess import Popen from sys import stdout, exit import pkgutil -import importlib +import tarfile +import tempfile import click from colorama import Fore, Style @@ -31,6 +33,7 @@ from superset import ( app, appbuilder, data, db, security_manager, ) +from superset.data.helpers import list_examples_table from superset.exceptions import DashboardNotFoundException from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) @@ -127,35 +130,93 @@ def load_examples_run(load_test_data): @app.cli.command() @click.option('--load-test-data', '-t', is_flag=True, help='Load additional test data') def load_examples(load_test_data): - """Loads a set of Slices and Dashboards and a supporting dataset """ + """Loads a set of Slices and Dashboards and a supporting dataset""" load_examples_run(load_test_data) +def exclusive(ctx_params, exclusive_params, error_message): + """Provide exclusive option grouping""" + if sum([1 if ctx_params[p] else 0 for p in exclusive_params]) > 1: + raise click.UsageError(error_message) + @app.cli.group() def examples(): """Manages example Slices/Dashboards/datasets""" pass -@examples.command() -def show(): +@examples.command('create') +@click.option( + '--dashboard-id', '-i', default=None, type=int, + help='Specify dashboard id to export') +@click.option( + '--dashboard-title', '-t', default=None, + help='Specify dashboard title to export') +@click.option( + '--description', '-d', help='Description of new example', required=True) +@click.option( + '--example-title', '-e', help='Title for new example', required=True) +@click.option( + '--file-name', '-f', default='dashboard.tar.gz', + help='Specify export file name. Defaults to dashboard.tar.gz') +@click.option( + '--license', '-l', '_license', default='Apache 2.0', + help='License of the example dashboard') +def create_example(dashboard_id, dashboard_title, description, example_title, + file_name, _license): + """Create example Slice/Dashboard/datasets""" + if not (dashboard_id or dashboard_title): + raise click.UsageError('must supply --dashboard-id/-i or --dashboard-title/-t') + exclusive( + click.get_current_context().params, + ['dashboard_id', 'dashboard_title'], + 'options --dashboard-id/-i and --dashboard-title/-t mutually exclusive') + + # Export into a temporary directory and then tarball that directory + with tempfile.TemporaryDirectory() as tmp_dir_name: + + try: + data = dashboard_import_export.export_dashboards( + db.session, + dashboard_ids=[dashboard_id], + dashboard_titles=[dashboard_title], + export_data=True, + export_data_dir=tmp_dir_name, + description=description, + export_title=example_title, + _license=_license) + + dashboard_slug = dashboard_import_export.get_slug( + db.session, + dashboard_id=dashboard_id, + dashboard_title=dashboard_title) + + out_path = f'{tmp_dir_name}/dashboard.json' + + with open(out_path, 'w') as data_stream: + data_stream.write(data) + + with tarfile.open(file_name, "w:gz") as tar: + tar.add(tmp_dir_name, arcname=f'{dashboard_slug}') + + click.echo(f'Exported example to {file_name}') + + except DashboardNotFoundException as e: + click.echo(click.style(str(e), fg='red')) + exit(1) + +@examples.command('list') +def _list_examples(): """List example Slices/Dashboards/datasets""" - print('Available examples:\n') - for importer, modname, ispkg in pkgutil.iter_modules(data.__path__): - #print("Found submodule %s (is a package: %s)" % (modname, ispkg)) - module = importlib.import_module('superset.data.' + modname) - try: - print('{}: {}'.format(modname, module.DESCRIPTION)) - except AttributeError as e: - print(modname) - pass - - -@examples.command() -def load(): + click.echo( + list_examples_table(config.get('EXAMPLES_GIT_TAG'))) + pass + +@examples.command('load') +def load_example(): """Load an example Slice/Dashboard/dataset""" pass -@examples.command() -def remove(): +@examples.command('remove') +def remove_example(): """Remove an example Slice/Dashboard/dataset""" pass @@ -238,7 +299,7 @@ def import_dashboards(path, recursive): ) def export_dashboards(print_stdout, dashboard_file, dashboard_ids, dashboard_titles, export_data, export_data_dir): - """Export dashboards to JSON""" + """Export dashboards to JSON and optionally tables to CSV""" try: data = dashboard_import_export.export_dashboards( db.session, diff --git a/superset/config.py b/superset/config.py index 698966345d02..fc61cb00028b 100644 --- a/superset/config.py +++ b/superset/config.py @@ -633,3 +633,6 @@ class CeleryConfig(object): superset_config.__file__)) except ImportError: pass + +# The git tag of examples to load +EXAMPLES_GIT_TAG = 'v0.0.3' diff --git a/superset/data/helpers.py b/superset/data/helpers.py index f876dc9105ad..39390ddf3d1b 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -16,9 +16,11 @@ # under the License. """Loads datasets, dashboards and slices in a new superset instance""" # pylint: disable=C,R,W +import csv from io import BytesIO import json import os +import sys import zlib import requests @@ -27,8 +29,6 @@ from superset.connectors.connector_registry import ConnectorRegistry from superset.models import core as models -BASE_URL = 'https://github.com/apache-superset/examples-data/blob/master/' - # Shortcuts DB = models.Database Slice = models.Slice @@ -38,6 +38,11 @@ config = app.config +BLOB_BASE_URL = f'https://github.com/rjurney/examples-data/blob/{ config.get("EXAMPLES_GIT_TAG") }/' +RAW_BASE_URL = f'https://github.com/rjurney/examples-data/raw/{ config.get("EXAMPLES_GIT_TAG") }/' +LIST_URL = f'https://api.github.com/repos/rjurney/examples-data/contents/?ref={ config.get("EXAMPLES_GIT_TAG") }' +RAW_BASE_URL = f'https://github.com/rjurney/examples-data/raw/{ config.get("EXAMPLES_GIT_TAG") }/' + DATA_FOLDER = os.path.join(config.get('BASE_DIR'), 'data') misc_dash_slices = set() # slices assembled in a 'Misc Chart' dashboard @@ -69,9 +74,39 @@ def get_slice_json(defaults, **kwargs): def get_example_data(filepath, is_gzip=True, make_bytes=False): - content = requests.get(f'{BASE_URL}{filepath}?raw=true').content + content = requests.get(f'{BLOB_BASE_URL}{filepath}?raw=true').content if is_gzip: content = zlib.decompress(content, zlib.MAX_WBITS|16) if make_bytes: content = BytesIO(content) return content + + +def list_examples(tag='master'): + """Use the Github Get contents API to list available examples""" + content = json.loads(requests.get(LIST_URL).content) + dirs = [x for x in content if x['type'] == 'dir'] + + # Write CSV to stdout + csv_writer = csv.DictWriter(sys.stdout, + fieldnames=['Title', 'Description', 'Total Size (MB)', 'Total Rows', + 'File Count', 'Created Date', 'Updated Date'], + delimiter="\t") + csv_writer.writeheader() + + for _dir in dirs: + link = _dir['_links']['self'] + sub_content = json.loads(requests.get(link).content) + dashboard = list(filter(lambda x: x['name'] == 'dashboard.json', sub_content))[0] + files = filter(lambda x: x['name'] != 'dashboard.json', sub_content) + + bio = dashboard['bibliography'] + csv_writer.writerow({ + 'Title': bio['title'], + 'Description': bio['description'], + 'Total Size (MB)': bio['total_size_mb'], + 'Total Rows': bio['total_rows'], + 'File Count': bio['file_count'], + 'Created Date': bio['created_at'], + 'Updated Date': bio['updated_at']}) + diff --git a/superset/models/core.py b/superset/models/core.py index 680cb7a92485..83a4b5b45458 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -677,18 +677,16 @@ def export_dashboards(cls, dashboard_ids, export_data=False, row_count = len(df.index) file_name = f'{data_table.name}.csv.gz' - file_dir = f'{export_data_dir}/{data_table.name}' - file_path = f'{file_dir}/{file_name}' + file_path = f'{export_data_dir}/{file_name}' - if not os.path.exists(file_dir): - os.makedirs(file_dir) + if not os.path.exists(export_data_dir): + os.makedirs(export_data_dir) df.to_csv(file_path) file_size = os.path.getsize(file_path) table_record = { 'file_name': file_name, - 'file_path': file_path, 'rows': row_count, 'size': file_size, 'table_name': data_table.name, diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 52c3e0044415..ea83a111c1d5 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -60,7 +60,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, logging.info('Starting export') export_dashboard_ids = [] - session = db.session() + session = db.session() if not session else session query = session.query(Dashboard) if dashboard_ids or dashboard_titles: query = query.filter(Dashboard.id.in_(dashboard_ids) | @@ -77,9 +77,22 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data, export_data_dir) if export_title: - data['title'] = export_title + data['description']['title'] = export_title if description: - data['description'] = description - data['license'] = _license + data['description']['description'] = description + data['description']['license'] = _license return json.dumps(data, cls=DashboardEncoder, indent=4) + + +def get_slug(session, dashboard_id=None, dashboard_title=None): + """Get the slug for the name of the directory inside the tarballed example""" + session = db.session() if not session else session + query = session.query(Dashboard) + slug = None + if dashboard_id or dashboard_title: + query = query.filter((Dashboard.id == dashboard_id) | + (Dashboard.dashboard_title == dashboard_title)) + dashboard = query.first() + slug = getattr(dashboard, 'slug', None) + return slug \ No newline at end of file diff --git a/tests/cli_tests.py b/tests/cli_tests.py index d00c9fb76d9b..9b6d3c136f06 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -20,15 +20,12 @@ def test_version(self): """Test `superset version`""" version_result = self.runner.invoke(app.cli, ['version']) # Version result should contain version string - logging.error(config.get('VERSION_STRING')) - logging.error(version_result.output) self.assertTrue(config.get('VERSION_STRING') in version_result.output) def test_export_all_test_dashboards(self): """Test `superset export_dashboards`""" self.runner.invoke(app.cli, ['load_examples']) result = self.runner.invoke(app.cli, ['export_dashboards']) - logging.error(result.output) data = json.loads(result.output) # Should export at least all 5 test dashboards @@ -94,7 +91,12 @@ def test_examples_remove(self): def test_examples_create(self): """Test `superset examples create`""" - pass - -if __name__ == '__main__': - unittest.main() + self.runner.invoke(app.cli, ['load_examples']) + result = self.runner.invoke( + app.cli, + ['examples', 'create', '--dashboard-title', 'World\'s Bank Data', '--description', + 'World Bank Data example about world health populations from 1960-2010.', + '--example-title', 'World Bank Health Information'] + ) + logging.info(result.output) + From ea67d98531bd3fececbd7274d22b075595e0d700 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 20 May 2019 17:04:48 -0700 Subject: [PATCH 025/107] Prints table for examples --- superset/data/helpers.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/superset/data/helpers.py b/superset/data/helpers.py index 39390ddf3d1b..51f6bddb137d 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -23,6 +23,7 @@ import sys import zlib +from prettytable import PrettyTable import requests from superset import app, db @@ -82,31 +83,24 @@ def get_example_data(filepath, is_gzip=True, make_bytes=False): return content -def list_examples(tag='master'): +def list_examples_table(tag='master'): """Use the Github Get contents API to list available examples""" content = json.loads(requests.get(LIST_URL).content) dirs = [x for x in content if x['type'] == 'dir'] # Write CSV to stdout - csv_writer = csv.DictWriter(sys.stdout, - fieldnames=['Title', 'Description', 'Total Size (MB)', 'Total Rows', - 'File Count', 'Created Date', 'Updated Date'], - delimiter="\t") - csv_writer.writeheader() + t = PrettyTable(field_names=['Title', 'Description', 'Total Size (MB)', 'Total Rows', + 'File Count', 'Created Date']) for _dir in dirs: link = _dir['_links']['self'] sub_content = json.loads(requests.get(link).content) - dashboard = list(filter(lambda x: x['name'] == 'dashboard.json', sub_content))[0] - files = filter(lambda x: x['name'] != 'dashboard.json', sub_content) - - bio = dashboard['bibliography'] - csv_writer.writerow({ - 'Title': bio['title'], - 'Description': bio['description'], - 'Total Size (MB)': bio['total_size_mb'], - 'Total Rows': bio['total_rows'], - 'File Count': bio['file_count'], - 'Created Date': bio['created_at'], - 'Updated Date': bio['updated_at']}) - + dashboard_info = list(filter(lambda x: x['name'] == 'dashboard.json', sub_content))[0] + #file_urls = filter(lambda x: x['name'] != 'dashboard.json', sub_content) + + d = json.loads(requests.get(dashboard_info['download_url']).content)['description'] + t.add_row([ + d['title'], d['description'], d['total_size_mb'], d['total_rows'], + d['file_count'], d['created_at']]) + + return t \ No newline at end of file From 404819b050af776aecab621a9d692ac0cb42d862 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 21 May 2019 14:38:23 -0700 Subject: [PATCH 026/107] Lots of new options added to CLI --- superset/cli.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index f2f72470bba0..c14d43a9934f 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -204,19 +204,39 @@ def create_example(dashboard_id, dashboard_title, description, example_title, exit(1) @examples.command('list') -def _list_examples(): +@click.option( + '--examples-revision', '-r', help='Revision of examples to list', + default=config.get('EXAMPLES_GIT_TAG') +) +def _list_examples(revision): """List example Slices/Dashboards/datasets""" click.echo( - list_examples_table(config.get('EXAMPLES_GIT_TAG'))) + list_examples_table(revision)) pass @examples.command('load') -def load_example(): +@click.option( + '--database-uri', '-d', help='Database URI to load example to', + default=config.get('SQLALCHEMY_EXAMPLES_URI') +) +@click.option( + '--examples-revision', '-r', help='Revision of examples to list', + default=config.get('EXAMPLES_GIT_TAG') +) +@click.option( + '--example-title', '-e', help='Title of example to load', required=True) +def load_example(example_title, database_uri): """Load an example Slice/Dashboard/dataset""" pass @examples.command('remove') -def remove_example(): +@click.option( + '--example-title', '-e', help='Title of example to remove', required=True) +@click.option( + '--database-uri', '-d', help='Database URI to load example to', + default=config.get('SQLALCHEMY_EXAMPLES_URI') +) +def remove_example(example_title, database_uri): """Remove an example Slice/Dashboard/dataset""" pass @@ -249,7 +269,7 @@ def refresh_druid(datasource, merge): @app.cli.command() @click.option( - '--path', '-p', + '--path', '-p', required=True, help='Path to a single JSON file or path containing multiple JSON files' 'files to import (*.json)') @click.option( From c5402fc39511d61069c4909ac7419521a836a28d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 21 May 2019 14:39:39 -0700 Subject: [PATCH 027/107] Moved from SQLALCHEMY_IMPORT_URI to SQLALCHEMY_EXAMPLES_URI --- superset/config.py | 6 +++--- superset/utils/core.py | 4 ++-- superset/utils/dashboard_import_export.py | 7 ++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/superset/config.py b/superset/config.py index fc61cb00028b..dde02987097a 100644 --- a/superset/config.py +++ b/superset/config.py @@ -81,9 +81,9 @@ # SQLALCHEMY_DATABASE_URI = 'postgresql://root:password@localhost/myapp' # The SQLAlchemy connection string for incoming examples -SQLALCHEMY_IMPORT_URI = 'sqlite:///' + os.path.join(DATA_DIR, 'examples.db') -# SQLALCHEMY_IMPORT_URI = 'mysql://myapp@localhost/examples' -# SQLALCHEMY_IMPORT_URI = 'postgresql://root:password@localhost/examples' +SQLALCHEMY_EXAMPLES_URI = 'sqlite:///' + os.path.join(DATA_DIR, 'examples.db') +# SQLALCHEMY_EXAMPLES_URI = 'mysql://myapp@localhost/examples' +# SQLALCHEMY_EXAMPLES_URI = 'postgresql://root:password@localhost/examples' # In order to hook up a custom password store for all SQLALCHEMY connections # implement a function that takes a single argument of type 'sqla.engine.url', diff --git a/superset/utils/core.py b/superset/utils/core.py index 7b93be706fe9..332e00ec7315 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -893,10 +893,10 @@ def get_or_create_main_db(): return dbobj -def get_or_create_import_db_engine(): +def get_or_create_example_db_engine(): """Get a SQLAlchemy engine for imported dashboard data""" from superset import conf - engine = create_engine(conf.get('SQLALCHEMY_IMPORT_URI')) + engine = create_engine(conf.get('SQLALCHEMY_EXAMPLE_URI')) if not database_exists(engine.url): create_database(engine.url) return engine diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index ea83a111c1d5..8bf2f4f14510 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -23,7 +23,8 @@ from superset import db from superset.models.core import Dashboard from superset.exceptions import DashboardNotFoundException -from superset.utils.core import DashboardEncoder, decode_dashboards, get_or_create_import_db_engine +from superset.utils.core import DashboardEncoder, decode_dashboards, get_or_create_main_db, \ + get_or_create_example_db_engine def import_dashboards(session, data_stream, import_time=None): @@ -39,7 +40,7 @@ def import_dashboards(session, data_stream, import_time=None): dashboard, import_time=import_time) if data['data']['includes_data']: - engine = get_or_create_import_db_engine() + engine = get_or_create_main_db() for table in data['data']['tables']: df = pd.read_csv(table['file_path'], parse_dates=True, infer_datetime_format=True, compression='infer') @@ -82,7 +83,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data['description']['description'] = description data['description']['license'] = _license - return json.dumps(data, cls=DashboardEncoder, indent=4) + return json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) def get_slug(session, dashboard_id=None, dashboard_title=None): From ded42a5d04f7d420b41c9d0a2dc56e5dc161bf8b Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 24 May 2019 17:35:36 -0700 Subject: [PATCH 028/107] Added EXAMPLE_REPOS_TAGS and GITHUB_AUTH_TOKEN items --- superset/config.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/superset/config.py b/superset/config.py index dde02987097a..ece7edd7bd1a 100644 --- a/superset/config.py +++ b/superset/config.py @@ -634,5 +634,10 @@ class CeleryConfig(object): except ImportError: pass -# The git tag of examples to load -EXAMPLES_GIT_TAG = 'v0.0.3' +# Tuple format: Gitub repo full name, tag/branch +EXAMPLE_REPOS_TAGS = [ + ('rjurney/examples-data', 'v0.0.3') +] + +# Github Authorization Token - in case the examples commands exceed rate limits +GITHUB_AUTH_TOKEN = None From f73ca66885d8f490990fefb2a799fd3a6207393e Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 24 May 2019 17:37:55 -0700 Subject: [PATCH 029/107] get_examples_uris expands repo name/tag to content/blog uris, list_examples_table produces a PrettyTable of example metadata, uses multiple repo/tags and an optional Github token --- superset/data/helpers.py | 99 ++++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 29 deletions(-) diff --git a/superset/data/helpers.py b/superset/data/helpers.py index 51f6bddb137d..aff6f869cd1d 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -16,11 +16,10 @@ # under the License. """Loads datasets, dashboards and slices in a new superset instance""" # pylint: disable=C,R,W -import csv +from datetime import datetime from io import BytesIO import json import os -import sys import zlib from prettytable import PrettyTable @@ -39,11 +38,6 @@ config = app.config -BLOB_BASE_URL = f'https://github.com/rjurney/examples-data/blob/{ config.get("EXAMPLES_GIT_TAG") }/' -RAW_BASE_URL = f'https://github.com/rjurney/examples-data/raw/{ config.get("EXAMPLES_GIT_TAG") }/' -LIST_URL = f'https://api.github.com/repos/rjurney/examples-data/contents/?ref={ config.get("EXAMPLES_GIT_TAG") }' -RAW_BASE_URL = f'https://github.com/rjurney/examples-data/raw/{ config.get("EXAMPLES_GIT_TAG") }/' - DATA_FOLDER = os.path.join(config.get('BASE_DIR'), 'data') misc_dash_slices = set() # slices assembled in a 'Misc Chart' dashboard @@ -74,33 +68,80 @@ def get_slice_json(defaults, **kwargs): return json.dumps(d, indent=4, sort_keys=True) +def get_examples_uris(repo_name, tag): + """Given a full Github repo name return the base urls to the contents and blog APIs""" + contents_uri = f'https://api.github.com/repos/{repo_name}/contents/?ref={tag}' + blob_uri = f'https://github.com/{repo_name}/blob/{tag}/' + print(contents_uri, blob_uri) + return contents_uri, blob_uri + + def get_example_data(filepath, is_gzip=True, make_bytes=False): - content = requests.get(f'{BLOB_BASE_URL}{filepath}?raw=true').content + examples_repos_uris = \ + [get_examples_uris(r[0], r[1]) for r in config.get('EXAMPLE_REPOS_TAGS')] + contents_uri, blob_uri = examples_repos_uris[0] + content = requests.get(f'{blob_uri}/{filepath}?raw=true').content if is_gzip: - content = zlib.decompress(content, zlib.MAX_WBITS|16) + content = zlib.decompress(content, zlib.MAX_WBITS | 16) if make_bytes: content = BytesIO(content) return content -def list_examples_table(tag='master'): +def list_examples_table(examples_repo, examples_tag='master'): """Use the Github Get contents API to list available examples""" - content = json.loads(requests.get(LIST_URL).content) - dirs = [x for x in content if x['type'] == 'dir'] - - # Write CSV to stdout - t = PrettyTable(field_names=['Title', 'Description', 'Total Size (MB)', 'Total Rows', - 'File Count', 'Created Date']) - - for _dir in dirs: - link = _dir['_links']['self'] - sub_content = json.loads(requests.get(link).content) - dashboard_info = list(filter(lambda x: x['name'] == 'dashboard.json', sub_content))[0] - #file_urls = filter(lambda x: x['name'] != 'dashboard.json', sub_content) - - d = json.loads(requests.get(dashboard_info['download_url']).content)['description'] - t.add_row([ - d['title'], d['description'], d['total_size_mb'], d['total_rows'], - d['file_count'], d['created_at']]) - - return t \ No newline at end of file + # Write a pretty table to stdout + t = PrettyTable(field_names=['Title', 'Description', 'Size (MB)', 'Rows', + 'Files', 'Created Date', 'Repository', 'Tag']) + + # Optionally replace the default examples repo with a specified one + examples_repos_uris = [(r[0], r[1]) + get_examples_uris(r[0], r[1]) + for r in config.get('EXAMPLE_REPOS_TAGS')] + if examples_repo: + examples_repos_uris = [ + (examples_repo, + examples_tag) + + get_examples_uris(examples_repo, examples_tag), + ] + + def shorten(val, length): + result = val + if len(val) > length: + result = val[0:length] + '...' + return result + + def date_format(iso_date): + dt = datetime.strptime(iso_date, '%Y-%m-%dT%H:%M:%S.%f') + return dt.isoformat(timespec='minutes') + + for (repo_name, repo_tag, contents_url, blob_url) in examples_repos_uris: + + # Github authentication via a Personal Access Token for rate limit problems + headers = None + token = config.get('GITHUB_AUTH_TOKEN') + if token: + headers = {'Authorization': 'token %s' % config.get('GITHUB_AUTH_TOKEN')} + + content = json.loads(requests.get(contents_url, headers=headers).content) + dirs = [x for x in content if x['type'] == 'dir'] + + for _dir in dirs: + link = _dir['_links']['self'] + sub_content = json.loads(requests.get(link, headers=headers).content) + dashboard_info = list(filter( + lambda x: x['name'] == 'dashboard.json', sub_content))[0] + + d = json.loads( + requests.get(dashboard_info['download_url']).content)['description'] + t.add_row([ + d['title'], + shorten(d['description'], 50), + d['total_size_mb'], + d['total_rows'], + d['file_count'], + date_format(d['created_at']), + shorten(repo_name, 30), + shorten(repo_tag, 20), + ]) + + return t From 272cde874e86cc8691808a516b33d2198ce7da17 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 24 May 2019 17:38:40 -0700 Subject: [PATCH 030/107] Removed types field from Dashboard files export, they were always type Null --- superset/models/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 83a4b5b45458..65cf44eea4fa 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -662,7 +662,6 @@ def export_dashboards(cls, dashboard_ids, export_data=False, for data_table in eager_datasources: engine = data_table.database.get_sqla_engine() columns = [c.get_sqla_col() for c in data_table.columns] - types = {c.name:c.type for c in columns} qry = ( select(columns) @@ -690,7 +689,6 @@ def export_dashboards(cls, dashboard_ids, export_data=False, 'rows': row_count, 'size': file_size, 'table_name': data_table.name, - 'types': types, #'uri': pathlib.Path(file_path).as_uri() } From 6c14f1660365a6f16051a9003e3b90cf42cfa349 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 24 May 2019 17:39:03 -0700 Subject: [PATCH 031/107] flake8 fixes --- superset/models/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 6ce8230769d1..9655c6c161a2 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -27,13 +27,13 @@ from flask_appbuilder.models.mixins import AuditMixin import humanize import sqlalchemy as sa -from sqlalchemy import and_, or_, UniqueConstraint, Column +from sqlalchemy import and_, or_, UniqueConstraint from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm.exc import MultipleResultsFound +from sqlalchemy_utils.types.uuid import UUIDType import yaml from superset.utils.core import QueryStatus -from sqlalchemy_utils.types.uuid import UUIDType def json_to_dict(json_str): @@ -58,7 +58,7 @@ class ImportMixin(object): # The names of the attributes # that are available for import and export - uuid = sa.Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) + uuid = sa.Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) @classmethod def _parent_foreign_key_mappings(cls): From 18ba41a64a8131163be53e96f0864a0784b96545 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 24 May 2019 17:50:22 -0700 Subject: [PATCH 032/107] Removed refernces to slices from examples command UI, added --examples-uri/--examples-tag to, create->export, load->import, flake8 fixes, implemented `superset examples list` --- superset/cli.py | 100 +++++++++++++++++++++++++-------------------- tests/cli_tests.py | 62 ++++++++++++++-------------- 2 files changed, 86 insertions(+), 76 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index c14d43a9934f..c833600ec390 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -18,10 +18,8 @@ # pylint: disable=C,R,W from datetime import datetime import logging -import os from subprocess import Popen -from sys import stdout, exit -import pkgutil +from sys import exit, stdout import tarfile import tempfile @@ -130,20 +128,23 @@ def load_examples_run(load_test_data): @app.cli.command() @click.option('--load-test-data', '-t', is_flag=True, help='Load additional test data') def load_examples(load_test_data): - """Loads a set of Slices and Dashboards and a supporting dataset""" + """Loads a set of charts and dashboards and a supporting dataset""" load_examples_run(load_test_data) + def exclusive(ctx_params, exclusive_params, error_message): """Provide exclusive option grouping""" if sum([1 if ctx_params[p] else 0 for p in exclusive_params]) > 1: raise click.UsageError(error_message) + @app.cli.group() def examples(): - """Manages example Slices/Dashboards/datasets""" + """Manages example dashboards/datasets""" pass -@examples.command('create') + +@examples.command('export') @click.option( '--dashboard-id', '-i', default=None, type=int, help='Specify dashboard id to export') @@ -158,35 +159,35 @@ def examples(): '--file-name', '-f', default='dashboard.tar.gz', help='Specify export file name. Defaults to dashboard.tar.gz') @click.option( - '--license', '-l', '_license', default='Apache 2.0', + '--license', '-l', '_license', default='Apache 2.0', help='License of the example dashboard') -def create_example(dashboard_id, dashboard_title, description, example_title, +def export_example(dashboard_id, dashboard_title, description, example_title, file_name, _license): - """Create example Slice/Dashboard/datasets""" + """Exmport example dashboard/datasets tarball""" if not (dashboard_id or dashboard_title): raise click.UsageError('must supply --dashboard-id/-i or --dashboard-title/-t') exclusive( - click.get_current_context().params, - ['dashboard_id', 'dashboard_title'], + click.get_current_context().params, + ['dashboard_id', 'dashboard_title'], 'options --dashboard-id/-i and --dashboard-title/-t mutually exclusive') - + # Export into a temporary directory and then tarball that directory with tempfile.TemporaryDirectory() as tmp_dir_name: try: data = dashboard_import_export.export_dashboards( - db.session, - dashboard_ids=[dashboard_id], + db.session, + dashboard_ids=[dashboard_id], dashboard_titles=[dashboard_title], export_data=True, export_data_dir=tmp_dir_name, description=description, export_title=example_title, _license=_license) - + dashboard_slug = dashboard_import_export.get_slug( db.session, - dashboard_id=dashboard_id, + dashboard_id=dashboard_id, dashboard_title=dashboard_title) out_path = f'{tmp_dir_name}/dashboard.json' @@ -194,50 +195,60 @@ def create_example(dashboard_id, dashboard_title, description, example_title, with open(out_path, 'w') as data_stream: data_stream.write(data) - with tarfile.open(file_name, "w:gz") as tar: + with tarfile.open(file_name, 'w:gz') as tar: tar.add(tmp_dir_name, arcname=f'{dashboard_slug}') - + click.echo(f'Exported example to {file_name}') except DashboardNotFoundException as e: click.echo(click.style(str(e), fg='red')) exit(1) + @examples.command('list') @click.option( - '--examples-revision', '-r', help='Revision of examples to list', - default=config.get('EXAMPLES_GIT_TAG') -) -def _list_examples(revision): - """List example Slices/Dashboards/datasets""" + '--examples-repo', '-r', + help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', + default=None) +@click.option( + '--examples-tag', '-r', + help='Tag or branch of Github repository containing examples. Defaults to \'master\'', + default='master') +def _list_examples(examples_repo, examples_tag): + """List example dashboards/datasets""" + click.echo( - list_examples_table(revision)) + list_examples_table(examples_repo, examples_tag=examples_tag)) pass -@examples.command('load') + +@examples.command('import') @click.option( - '--database-uri', '-d', help='Database URI to load example to', - default=config.get('SQLALCHEMY_EXAMPLES_URI') -) + '--database-uri', '-d', help='Database URI to import example to', + default=config.get('SQLALCHEMY_EXAMPLES_URI')) @click.option( - '--examples-revision', '-r', help='Revision of examples to list', - default=config.get('EXAMPLES_GIT_TAG') -) + '--examples-repo', '-r', + help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', + default=None) +@click.option( + '--examples-tag', '-r', + help='Tag or branch of Github repository containing examples. Defaults to \'master\'', + default='master') @click.option( - '--example-title', '-e', help='Title of example to load', required=True) -def load_example(example_title, database_uri): - """Load an example Slice/Dashboard/dataset""" + '--example-title', '-e', help='Title of example to import', required=True) +def import_example(example_title, examples_repo, examples_tag, database_uri): + """Import an example dashboard/dataset""" pass + @examples.command('remove') @click.option( '--example-title', '-e', help='Title of example to remove', required=True) @click.option( - '--database-uri', '-d', help='Database URI to load example to', - default=config.get('SQLALCHEMY_EXAMPLES_URI') -) + '--database-uri', '-d', help='Database URI to remove example from', + default=config.get('SQLALCHEMY_EXAMPLES_URI')) def remove_example(example_title, database_uri): - """Remove an example Slice/Dashboard/dataset""" + """Remove an example dashboard/dataset""" pass @@ -311,19 +322,17 @@ def import_dashboards(path, recursive): help='Specify dashboard title to export') @click.option( '--export-data', '-x', default=None, is_flag=True, - help='Export the dashboard\'s data tables as CSV files.' -) + help='Export the dashboard\'s data tables as CSV files.') @click.option( '--export-data-dir', '-d', default=config.get('DASHBOARD_EXPORT_DIR'), - help='Specify export directory path. Defaults to \'/tmp\'.' -) -def export_dashboards(print_stdout, dashboard_file, dashboard_ids, + help='Specify export directory path. Defaults to \'/tmp\'.') +def export_dashboards(print_stdout, dashboard_file, dashboard_ids, dashboard_titles, export_data, export_data_dir): """Export dashboards to JSON and optionally tables to CSV""" try: data = dashboard_import_export.export_dashboards( - db.session, - dashboard_ids=dashboard_ids, + db.session, + dashboard_ids=dashboard_ids, dashboard_titles=dashboard_titles, export_data=export_data, export_data_dir=export_data_dir) @@ -337,6 +346,7 @@ def export_dashboards(print_stdout, dashboard_file, dashboard_ids, with open(dashboard_file, 'w') as data_stream: data_stream.write(data) + @app.cli.command() @click.option( '--path', '-p', diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 9b6d3c136f06..077b6e8b585f 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -1,8 +1,5 @@ -import csv -from io import StringIO import json import logging -import unittest from superset import app, cli from tests.base_tests import SupersetTestCase @@ -13,8 +10,8 @@ class SupersetCliTestCase(SupersetTestCase): @classmethod - def setUp(self): - self.runner = app.test_cli_runner() + def setUp(cls): + cls.runner = app.test_cli_runner() def test_version(self): """Test `superset version`""" @@ -56,47 +53,50 @@ def test_export_dashboard_by_title(self): self.assertEqual(ids[0], "World's Bank Data") def test_examples_menu(self): - """Test `superset examples`""" + """Test `superset examples` menu""" result = self.runner.invoke(app.cli, ['examples']) - self.assertIn('load', result.output) + self.assertIn('import', result.output) self.assertIn('list', result.output) self.assertIn('remove', result.output) + self.assertIn('export', result.output) def test_examples_list(self): """Test `superset examples list`""" result = self.runner.invoke( - app.cli, ['examples', 'list'] - ) - output_f = StringIO(result.output) - csv_reader = csv.DictReader(output_f, delimiter="\t", - fieldnames=['title', 'description', 'total_size_mb', - 'total_rows', 'updated_at']) - examples = [] - for example in csv_reader: - examples.append(example) - examples = [e for e in csv_reader] - self.assertGreater(len(examples), 0) - - wb = {'title': "World's Bank Data"} - title_matches = list(filter(lambda x: all(item in x.items() for item in wb.items()) > 0, examples)) - self.assertEqual(len(title_matches), 1) - - def test_examples_load(self): - """Test `superset examples load`""" + app.cli, ['examples', 'list']) + + found = False + for i, line in enumerate(result.output.split('\n')): + # skip header + if i < 3: + continue + # Odd lines have data + if (i % 2) != 1: + row = line[1:-1] + parts = [i.strip() for i in row.split('|')] + if parts[0] == 'World Bank Health Information': + found = True + + # Did we find the example in the list? + self.assertEqual(found, True) + + def test_examples_import(self): + """Test `superset examples import`""" pass def test_examples_remove(self): """Test `superset examples remove`""" pass - def test_examples_create(self): - """Test `superset examples create`""" + def test_examples_export(self): + """Test `superset examples export`""" self.runner.invoke(app.cli, ['load_examples']) result = self.runner.invoke( app.cli, - ['examples', 'create', '--dashboard-title', 'World\'s Bank Data', '--description', + [ + 'examples', 'export', '--dashboard-title', 'World\'s Bank Data', + '--description', 'World Bank Data example about world health populations from 1960-2010.', - '--example-title', 'World Bank Health Information'] - ) + '--example-title', 'World Bank Health Information', + ]) logging.info(result.output) - From b5a9c018e11c74046a745081c5c4266f52a4a26f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 25 May 2019 12:41:00 -0700 Subject: [PATCH 033/107] flake8 cleanup --- superset/cli.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index c53aa3d69802..e55ed18e576e 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -147,10 +147,10 @@ def examples(): @examples.command('export') @click.option( '--dashboard-id', '-i', default=None, type=int, - help='Specify dashboard id to export') + help='Specify a single dashboard id to export') @click.option( '--dashboard-title', '-t', default=None, - help='Specify dashboard title to export') + help='Specify a single dashboard title to export') @click.option( '--description', '-d', help='Description of new example', required=True) @click.option( @@ -207,11 +207,11 @@ def export_example(dashboard_id, dashboard_title, description, example_title, @examples.command('list') @click.option( - '--examples-repo', '-r', + '--examples-repo', '-r', help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', default=None) @click.option( - '--examples-tag', '-r', + '--examples-tag', '-r', help='Tag or branch of Github repository containing examples. Defaults to \'master\'', default='master') def _list_examples(examples_repo, examples_tag): @@ -227,11 +227,11 @@ def _list_examples(examples_repo, examples_tag): '--database-uri', '-d', help='Database URI to import example to', default=config.get('SQLALCHEMY_EXAMPLES_URI')) @click.option( - '--examples-repo', '-r', + '--examples-repo', '-r', help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', default=None) @click.option( - '--examples-tag', '-r', + '--examples-tag', '-r', help='Tag or branch of Github repository containing examples. Defaults to \'master\'', default='master') @click.option( From 2c72e7ffb7c37f001dbb87f99332c242f864f23d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 25 May 2019 12:41:16 -0700 Subject: [PATCH 034/107] flake8 cleanup --- superset/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index 9baf2a435313..f086f75adce1 100644 --- a/superset/config.py +++ b/superset/config.py @@ -637,7 +637,7 @@ class CeleryConfig(object): # Tuple format: Gitub repo full name, tag/branch EXAMPLE_REPOS_TAGS = [ - ('rjurney/examples-data', 'v0.0.3') + ('rjurney/examples-data', 'v0.0.3'), ] # Github Authorization Token - in case the examples commands exceed rate limits From ed2a9b84cfbfdc42d353fbe874d56246ac91f465 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 25 May 2019 12:42:39 -0700 Subject: [PATCH 035/107] flake8 cleanup --- superset/models/core.py | 18 ++++++++---------- superset/utils/dashboard_import_export.py | 17 +++++++++-------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 65cf44eea4fa..59458de5bd91 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -23,7 +23,6 @@ import json import logging import os -import pathlib import textwrap from flask import escape, g, Markup, request @@ -618,7 +617,7 @@ def alter_positions(dashboard, old_to_new_slc_id_dict): return copied_dash.id @classmethod - def export_dashboards(cls, dashboard_ids, export_data=False, + def export_dashboards(cls, dashboard_ids, export_data=False, export_data_dir=None): copied_dashboards = [] datasource_ids = set() @@ -653,7 +652,7 @@ def export_dashboards(cls, dashboard_ids, export_data=False, ) make_transient(eager_datasource) eager_datasources.append(eager_datasource) - + files = [] total_file_size = 0 total_file_rows = 0 @@ -673,14 +672,14 @@ def export_dashboards(cls, dashboard_ids, export_data=False, ) df = pd.read_sql_query(sql=sql, con=engine) - row_count = len(df.index) + row_count = len(df.index) + 1 # plus one for header file_name = f'{data_table.name}.csv.gz' file_path = f'{export_data_dir}/{file_name}' - + if not os.path.exists(export_data_dir): os.makedirs(export_data_dir) - df.to_csv(file_path) + df.to_csv(file_path, compression='gzip') file_size = os.path.getsize(file_path) @@ -689,9 +688,8 @@ def export_dashboards(cls, dashboard_ids, export_data=False, 'rows': row_count, 'size': file_size, 'table_name': data_table.name, - #'uri': pathlib.Path(file_path).as_uri() } - + total_file_rows += row_count total_file_size += file_size @@ -703,14 +701,14 @@ def export_dashboards(cls, dashboard_ids, export_data=False, 'total_size_mb': round(total_file_size / (1024.0 * 1024.0), 2), 'total_rows': total_file_rows, 'file_count': len(files), - 'created_at': datetime.now().isoformat() + 'created_at': datetime.now().isoformat(), } return { 'description': desc, 'dashboards': copied_dashboards, 'datasources': eager_datasources, - 'files': files + 'files': files, } diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 8bf2f4f14510..39e6e4c8d163 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -20,11 +20,12 @@ import time import pandas as pd + from superset import db -from superset.models.core import Dashboard from superset.exceptions import DashboardNotFoundException -from superset.utils.core import DashboardEncoder, decode_dashboards, get_or_create_main_db, \ - get_or_create_example_db_engine +from superset.models.core import Dashboard +from superset.utils.core import DashboardEncoder, decode_dashboards, \ + get_or_create_example_db_engine, get_or_create_main_db def import_dashboards(session, data_stream, import_time=None): @@ -42,7 +43,7 @@ def import_dashboards(session, data_stream, import_time=None): if data['data']['includes_data']: engine = get_or_create_main_db() for table in data['data']['tables']: - df = pd.read_csv(table['file_path'], parse_dates=True, + df = pd.read_csv(table['file_path'], parse_dates=True, infer_datetime_format=True, compression='infer') df.to_sql( table['name'], @@ -54,7 +55,7 @@ def import_dashboards(session, data_stream, import_time=None): session.commit() -def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, +def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data=False, export_data_dir=None, description=None, export_title=None, _license='Apache 2.0'): """Returns all dashboards metadata as a json dump""" @@ -74,7 +75,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, logging.error('No dashboards found!') raise DashboardNotFoundException('No dashboards found!') else: - data = Dashboard.export_dashboards(export_dashboard_ids, + data = Dashboard.export_dashboards(export_dashboard_ids, export_data, export_data_dir) if export_title: @@ -82,7 +83,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, if description: data['description']['description'] = description data['description']['license'] = _license - + return json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) @@ -96,4 +97,4 @@ def get_slug(session, dashboard_id=None, dashboard_title=None): (Dashboard.dashboard_title == dashboard_title)) dashboard = query.first() slug = getattr(dashboard, 'slug', None) - return slug \ No newline at end of file + return slug From 6cf48511eac1eef156b6e2d75f45a1286934ff8b Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 25 May 2019 12:43:27 -0700 Subject: [PATCH 036/107] Completed test for `superset examples export` which tests tarball size, row count, metadata, etc. --- tests/cli_tests.py | 58 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 077b6e8b585f..057938b00ede 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -1,5 +1,10 @@ +import gzip import json import logging +import os +import struct +import tarfile +import tempfile from superset import app, cli from tests.base_tests import SupersetTestCase @@ -13,6 +18,21 @@ class SupersetCliTestCase(SupersetTestCase): def setUp(cls): cls.runner = app.test_cli_runner() + @classmethod + def get_uncompressed_size(cls, file_path): + """Last 4 bytes of a gzip file contain uncompressed size""" + with open(file_path, 'rb') as f: + f.seek(-4, 2) + return struct.unpack('I', f.read(4))[0] + + @classmethod + def gzip_file_line_count(cls, file_path): + """Get the line count of a gzip'd CSV file""" + with gzip.open(file_path, 'r') as f: + for i, l in enumerate(f): + pass + return i + 1 + def test_version(self): """Test `superset version`""" version_result = self.runner.invoke(app.cli, ['version']) @@ -90,7 +110,7 @@ def test_examples_remove(self): def test_examples_export(self): """Test `superset examples export`""" - self.runner.invoke(app.cli, ['load_examples']) + # self.runner.invoke(app.cli, ['load_examples']) result = self.runner.invoke( app.cli, [ @@ -100,3 +120,39 @@ def test_examples_export(self): '--example-title', 'World Bank Health Information', ]) logging.info(result.output) + + # Inspect the tarball + with tarfile.open('dashboard.tar.gz', 'r:gz') as tar: + + # Extract all exported files to a temporary directory + out_d = tempfile.TemporaryDirectory() + + tar.extractall(out_d.name) + world_health_path = f'{out_d.name}{os.path.sep}world_health{os.path.sep}' + + # Check the Dashboard metadata export + json_f = open(f'{world_health_path}/dashboard.json', 'r') + dashboard = json.loads(json_f.read()) + desc = dashboard['description'] + self.assertEqual(desc['title'], 'World Bank Health Information') + self.assertEqual( + desc['description'], + 'World Bank Data example about world health populations from 1960-2010.', + ) + + # Check the data export by writing out the tarball, getting the file size + # and comparing to the metadata size + data_file_path = f'{world_health_path}/wb_health_population.csv.gz' + + file_size = SupersetCliTestCase.get_uncompressed_size(data_file_path) + file_size = os.path.getsize(data_file_path) + self.assertEqual( + desc['total_size'], + file_size) + + # Check the data export row count against the example's description metadata + self.assertEqual( + desc['total_rows'], + SupersetCliTestCase.gzip_file_line_count(data_file_path)) + + out_d.cleanup() From 2132b5d8ce3bebf42ca953382ac1ac0fcf4d4bae Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 25 May 2019 12:44:26 -0700 Subject: [PATCH 037/107] squash me --- superset/models/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/models/core.py b/superset/models/core.py index 59458de5bd91..a325418ac2b2 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -672,7 +672,7 @@ def export_dashboards(cls, dashboard_ids, export_data=False, ) df = pd.read_sql_query(sql=sql, con=engine) - row_count = len(df.index) + 1 # plus one for header + row_count = len(df.index) + 1 # plus one for header file_name = f'{data_table.name}.csv.gz' file_path = f'{export_data_dir}/{file_name}' From b4048c53c0f3f2f86ddf6427aa30619d2bc0e27c Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 28 May 2019 18:42:01 -0700 Subject: [PATCH 038/107] In progress paranoia commit, listing works, creating works, working on importing --- superset/cli.py | 46 +++++++++++-- superset/data/helpers.py | 84 ++++++++++++++--------- superset/exceptions.py | 7 +- superset/utils/dashboard_import_export.py | 13 +++- 4 files changed, 111 insertions(+), 39 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index e55ed18e576e..985f391320dd 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -17,7 +17,9 @@ # under the License. # pylint: disable=C,R,W from datetime import datetime +import json import logging +import requests from subprocess import Popen from sys import exit, stdout import tarfile @@ -31,8 +33,9 @@ from superset import ( app, appbuilder, data, db, security_manager, ) -from superset.data.helpers import list_examples_table -from superset.exceptions import DashboardNotFoundException +from superset.data.helpers import get_examples_file_list, get_examples_uris, \ + list_examples_table +from superset.exceptions import DashboardNotFoundException, ExampleNotFoundException from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) @@ -183,7 +186,8 @@ def export_example(dashboard_id, dashboard_title, description, example_title, export_data_dir=tmp_dir_name, description=description, export_title=example_title, - _license=_license) + _license=_license, + strip_database=True) dashboard_slug = dashboard_import_export.get_slug( db.session, @@ -216,7 +220,6 @@ def export_example(dashboard_id, dashboard_title, description, example_title, default='master') def _list_examples(examples_repo, examples_tag): """List example dashboards/datasets""" - click.echo( list_examples_table(examples_repo, examples_tag=examples_tag)) pass @@ -238,7 +241,40 @@ def _list_examples(examples_repo, examples_tag): '--example-title', '-e', help='Title of example to import', required=True) def import_example(example_title, examples_repo, examples_tag, database_uri): """Import an example dashboard/dataset""" - pass + + # First fetch the example information from Github + examples_repos = [(examples_repo, examples_tag)] \ + if examples_repo else config.get('EXAMPLE_REPOS_TAGS') + examples_repos_uris = [(r[0], r[1]) + get_examples_uris(r[0], r[1]) + for r in examples_repos] + examples_files = get_examples_file_list(examples_repos_uris) + + # Github authentication via a Personal Access Token for rate limit problems + headers = None + token = config.get('GITHUB_AUTH_TOKEN') + if token: + headers = {'Authorization': 'token %s' % token} + + download_urls = [x['metadata_file']['download_url'] for x in examples_files] + + import_example_metadata = None + for download_url in download_urls: + example_metadata = json.loads(requests.get(download_url, headers=headers).content) + if example_metadata['description']['title'] == example_title: + import_example_metadata = example_metadata + logging.info('Importing example \'{example_title}\' from {download_url} ...') + + if not import_example_metadata: + raise ExampleNotFoundException(f'Example {example_title} not found!') + + try: + with f.open() as data_stream: + dashboard_import_export.import_dashboards( + db.session, data_stream) + except Exception as e: + logging.error('Error when importing dashboard from file %s', f) + logging.error(e) + @examples.command('remove') diff --git a/superset/data/helpers.py b/superset/data/helpers.py index aff6f869cd1d..849b54ae5c5f 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -72,7 +72,6 @@ def get_examples_uris(repo_name, tag): """Given a full Github repo name return the base urls to the contents and blog APIs""" contents_uri = f'https://api.github.com/repos/{repo_name}/contents/?ref={tag}' blob_uri = f'https://github.com/{repo_name}/blob/{tag}/' - print(contents_uri, blob_uri) return contents_uri, blob_uri @@ -88,8 +87,40 @@ def get_example_data(filepath, is_gzip=True, make_bytes=False): return content +def get_examples_file_list(examples_repos_uris, examples_tag='master'): + """Use the Github get contents API to list available examples""" + examples = [] + + for (repo_name, repo_tag, contents_uri, blob_uri) in examples_repos_uris: + + # Github authentication via a Personal Access Token for rate limit problems + headers = None + token = config.get('GITHUB_AUTH_TOKEN') + if token: + headers = {'Authorization': 'token %s' % token} + + content = json.loads(requests.get(contents_uri, headers=headers).content) + dirs = [x for x in content if x['type'] == 'dir'] # examples are in sub-dirs + + for _dir in dirs: + link = _dir['_links']['self'] + sub_content = json.loads(requests.get(link, headers=headers).content) + dashboard_info = list(filter( + lambda x: x['name'] == 'dashboard.json', sub_content))[0] + data_files = list(filter( + lambda x: x['name'].endswith('.csv.gz'), sub_content)) + examples.append({ + 'repo_name': repo_name, + 'repo_tag': repo_tag, + 'metadata_file': dashboard_info, + 'data_files': data_files, + }) + + return examples + + def list_examples_table(examples_repo, examples_tag='master'): - """Use the Github Get contents API to list available examples""" + """Turn a list of available examples into a PrettyTable""" # Write a pretty table to stdout t = PrettyTable(field_names=['Title', 'Description', 'Size (MB)', 'Rows', 'Files', 'Created Date', 'Repository', 'Tag']) @@ -97,6 +128,8 @@ def list_examples_table(examples_repo, examples_tag='master'): # Optionally replace the default examples repo with a specified one examples_repos_uris = [(r[0], r[1]) + get_examples_uris(r[0], r[1]) for r in config.get('EXAMPLE_REPOS_TAGS')] + + # Replace the configured repos with the examples repo specified if examples_repo: examples_repos_uris = [ (examples_repo, @@ -104,6 +137,8 @@ def list_examples_table(examples_repo, examples_tag='master'): get_examples_uris(examples_repo, examples_tag), ] + file_info_list = get_examples_file_list(examples_repos_uris) + def shorten(val, length): result = val if len(val) > length: @@ -114,34 +149,21 @@ def date_format(iso_date): dt = datetime.strptime(iso_date, '%Y-%m-%dT%H:%M:%S.%f') return dt.isoformat(timespec='minutes') - for (repo_name, repo_tag, contents_url, blob_url) in examples_repos_uris: - - # Github authentication via a Personal Access Token for rate limit problems - headers = None - token = config.get('GITHUB_AUTH_TOKEN') - if token: - headers = {'Authorization': 'token %s' % config.get('GITHUB_AUTH_TOKEN')} - - content = json.loads(requests.get(contents_url, headers=headers).content) - dirs = [x for x in content if x['type'] == 'dir'] - - for _dir in dirs: - link = _dir['_links']['self'] - sub_content = json.loads(requests.get(link, headers=headers).content) - dashboard_info = list(filter( - lambda x: x['name'] == 'dashboard.json', sub_content))[0] - - d = json.loads( - requests.get(dashboard_info['download_url']).content)['description'] - t.add_row([ - d['title'], - shorten(d['description'], 50), - d['total_size_mb'], - d['total_rows'], - d['file_count'], - date_format(d['created_at']), - shorten(repo_name, 30), - shorten(repo_tag, 20), - ]) + for file_info in file_info_list: + + d = json.loads( + requests.get( + file_info['metadata_file']['download_url']).content)['description'] + row = [ + d['title'], + shorten(d['description'], 50), + d['total_size_mb'], + d['total_rows'], + d['file_count'], + date_format(d['created_at']), + shorten(file_info['repo_name'], 30), + shorten(file_info['repo_tag'], 20), + ] + t.add_row(row) return t diff --git a/superset/exceptions.py b/superset/exceptions.py index 3eafefb8dee2..e0284045b301 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -55,5 +55,10 @@ class SupersetTemplateException(SupersetException): class SpatialException(SupersetException): pass -class DashboardNotFoundException(Exception): + +class DashboardNotFoundException(SupersetException): + pass + + +class ExampleNotFoundException(SupersetException): pass diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 39e6e4c8d163..a5d0998cece0 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -57,7 +57,7 @@ def import_dashboards(session, data_stream, import_time=None): def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data=False, export_data_dir=None, description=None, - export_title=None, _license='Apache 2.0'): + export_title=None, _license='Apache 2.0', strip_database=False): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') export_dashboard_ids = [] @@ -84,7 +84,16 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data['description']['description'] = description data['description']['license'] = _license - return json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) + export_json = json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) + + # Remove datasources[].__SqlaTable__.database for example export + if strip_database: + parsed_json = json.loads(export_json) + for datasource in parsed_json['datasources']: + datasource['__SqlaTable__']['database'] = None + export_json = json.dumps(parsed_json, indent=4, sort_keys=True) + + return export_json def get_slug(session, dashboard_id=None, dashboard_title=None): From 4edca2d7cdc39fd2f3912eff88a4c72f151409a8 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 28 May 2019 18:43:42 -0700 Subject: [PATCH 039/107] Doc string for get_example_data --- superset/data/helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/superset/data/helpers.py b/superset/data/helpers.py index 849b54ae5c5f..4b106bdd926d 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -76,6 +76,7 @@ def get_examples_uris(repo_name, tag): def get_example_data(filepath, is_gzip=True, make_bytes=False): + """Get the examples data for the legacy examples""" examples_repos_uris = \ [get_examples_uris(r[0], r[1]) for r in config.get('EXAMPLE_REPOS_TAGS')] contents_uri, blob_uri = examples_repos_uris[0] From 8bb7350dfbb9c7fe819cf986fdd9f594ac663765 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 28 May 2019 20:16:51 -0700 Subject: [PATCH 040/107] Implemented CLI for example import --- superset/cli.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 985f391320dd..45031eb59337 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -17,9 +17,9 @@ # under the License. # pylint: disable=C,R,W from datetime import datetime +from io import StringIO import json import logging -import requests from subprocess import Popen from sys import exit, stdout import tarfile @@ -28,6 +28,7 @@ import click from colorama import Fore, Style from pathlib2 import Path +import requests import yaml from superset import ( @@ -39,6 +40,8 @@ from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) +logging.getLogger("urllib3").setLevel(logging.WARNING) + config = app.config celery_app = utils.get_celery_app(config) @@ -257,26 +260,30 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): download_urls = [x['metadata_file']['download_url'] for x in examples_files] + import_example_json = None import_example_metadata = None for download_url in download_urls: - example_metadata = json.loads(requests.get(download_url, headers=headers).content) + example_json = requests.get(download_url, headers=headers).content + example_metadata = json.loads(example_json) if example_metadata['description']['title'] == example_title: + import_example_json = example_json import_example_metadata = example_metadata - logging.info('Importing example \'{example_title}\' from {download_url} ...') + logging.info(f'Importing example \'{example_title}\' from {download_url} ...') - if not import_example_metadata: - raise ExampleNotFoundException(f'Example {example_title} not found!') + if not (import_example_json and import_example_metadata): + e = ExampleNotFoundException(f'Example {example_title} not found!') + click.echo(click.style(str(e), fg='red')) + exit(1) + data_stream = StringIO(import_example_json.decode()) try: - with f.open() as data_stream: - dashboard_import_export.import_dashboards( - db.session, data_stream) + dashboard_import_export.import_dashboards( + db.session, data_stream, database_uri=database_uri) except Exception as e: - logging.error('Error when importing dashboard from file %s', f) + logging.error(f'Error importing example dashboard \'{example_title}\'!') logging.error(e) - @examples.command('remove') @click.option( '--example-title', '-e', help='Title of example to remove', required=True) From f9ca92039b8ca63b77a82bdf7b6cfafd5376ee94 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 29 May 2019 00:27:41 -0700 Subject: [PATCH 041/107] Moved from reusing dashboard_import_export.import_dashboards for examples import to custom dashboard_import_export.import_example_dashboard --- superset/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 45031eb59337..238b1fa49d8f 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -261,24 +261,23 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): download_urls = [x['metadata_file']['download_url'] for x in examples_files] import_example_json = None - import_example_metadata = None for download_url in download_urls: example_json = requests.get(download_url, headers=headers).content example_metadata = json.loads(example_json) if example_metadata['description']['title'] == example_title: import_example_json = example_json - import_example_metadata = example_metadata logging.info(f'Importing example \'{example_title}\' from {download_url} ...') - if not (import_example_json and import_example_metadata): + if not import_example_json: e = ExampleNotFoundException(f'Example {example_title} not found!') click.echo(click.style(str(e), fg='red')) exit(1) - data_stream = StringIO(import_example_json.decode()) try: - dashboard_import_export.import_dashboards( - db.session, data_stream, database_uri=database_uri) + dashboard_import_export.import_example_dashboard( + db.session, + import_example_json, + database_uri) except Exception as e: logging.error(f'Error importing example dashboard \'{example_title}\'!') logging.error(e) From 3ee163fff363d27b13a47052de26abaf8d5523fd Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 29 May 2019 00:28:29 -0700 Subject: [PATCH 042/107] Added get_examples_database and get_or_create_example_db at the expense of get_or_create_example_db_engine --- superset/utils/core.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 332e00ec7315..9553ad227629 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -893,15 +893,6 @@ def get_or_create_main_db(): return dbobj -def get_or_create_example_db_engine(): - """Get a SQLAlchemy engine for imported dashboard data""" - from superset import conf - engine = create_engine(conf.get('SQLALCHEMY_EXAMPLE_URI')) - if not database_exists(engine.url): - create_database(engine.url) - return engine - - def get_main_database(session): from superset.models import core as models return ( @@ -911,6 +902,35 @@ def get_main_database(session): ) +def get_or_create_example_db(database_uri=None): + """Get or create the examples Database connection""" + from superset import conf, db + from superset.models import core as models + + logging.info('Creating database reference') + dbobj = get_examples_database(db.session) + if not dbobj: + dbobj = models.Database( + database_name='examples', + allow_csv_upload=True, + expose_in_sqllab=True, + ) + dbobj.set_sqlalchemy_uri( + database_uri or conf.get('SQLALCHEMY_EXAMPLE_URI')) + db.session.add(dbobj) + db.session.commit() + return dbobj + + +def get_examples_database(session): + from superset.models import core as models + return ( + session.query(models.Database) + .filter_by(database_name='examples') + .first() + ) + + def is_adhoc_metric(metric) -> bool: return ( isinstance(metric, dict) and From 8b8eeea8f1463a3f4cbcecadb8cc4ad482790290 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 29 May 2019 15:38:01 -0700 Subject: [PATCH 043/107] Added get_uuid method to ensure a string uuid is default value for uuid field in ImportMixin --- superset/models/helpers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 9655c6c161a2..ff2e048277d5 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -45,6 +45,10 @@ def json_to_dict(json_str): return {} +def get_uuid(): + return str(uuid.uuid4()) + + class ImportMixin(object): export_parent = None # The name of the attribute @@ -58,7 +62,7 @@ class ImportMixin(object): # The names of the attributes # that are available for import and export - uuid = sa.Column(UUIDType(binary=False), unique=True, default=uuid.uuid4) + uuid = sa.Column(UUIDType(binary=False), unique=True, default=get_uuid) @classmethod def _parent_foreign_key_mappings(cls): From 004d9172934c2537daef4c184814b64740580c49 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 29 May 2019 15:39:21 -0700 Subject: [PATCH 044/107] Now serializing uuid fields in superset.utils.core.DashboardEncoder as strings --- superset/utils/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/superset/utils/core.py b/superset/utils/core.py index 9553ad227629..1ca4edd776a3 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -287,11 +287,14 @@ def decode_dashboards(o): class DashboardEncoder(json.JSONEncoder): + """JSONEncoder for Dashboard and their Slice objects""" # pylint: disable=E0202 def default(self, o): try: vals = { k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} + if type(o) == uuid.UUID: + return str(o) return {'__{}__'.format(o.__class__.__name__): vals} except Exception: if type(o) == datetime: From ce09dc835d06d0700500b59c54e00b2c47e3749b Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 29 May 2019 15:48:18 -0700 Subject: [PATCH 045/107] Now using tag v0.0.4 of rjurney examples-data --- superset/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index f086f75adce1..4bedf3db55df 100644 --- a/superset/config.py +++ b/superset/config.py @@ -637,7 +637,7 @@ class CeleryConfig(object): # Tuple format: Gitub repo full name, tag/branch EXAMPLE_REPOS_TAGS = [ - ('rjurney/examples-data', 'v0.0.3'), + ('rjurney/examples-data', 'v0.0.4'), ] # Github Authorization Token - in case the examples commands exceed rate limits From d675427ecafdb328f2a1aa03a9e6ea494d0a41cd Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 29 May 2019 15:54:17 -0700 Subject: [PATCH 046/107] Print more than just minutes of the example created timestamp --- superset/data/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/data/helpers.py b/superset/data/helpers.py index 4b106bdd926d..7b86e3867663 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -148,7 +148,7 @@ def shorten(val, length): def date_format(iso_date): dt = datetime.strptime(iso_date, '%Y-%m-%dT%H:%M:%S.%f') - return dt.isoformat(timespec='minutes') + return dt.isoformat() for file_info in file_info_list: From ad74c3c06c0144d708951875903a8c9e4b3b4d2f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 3 Jun 2019 18:03:27 -0700 Subject: [PATCH 047/107] In progress commit --- requirements.txt | 6 +- superset/cli.py | 39 +++++++--- superset/config.py | 16 ++-- superset/data/helpers.py | 21 +++++- superset/exceptions.py | 4 + superset/utils/core.py | 9 ++- superset/utils/dashboard_import_export.py | 92 +++++++++++++++++------ tests/cli_tests.py | 8 +- 8 files changed, 146 insertions(+), 49 deletions(-) diff --git a/requirements.txt b/requirements.txt index ec4c0ee721c1..34356370bc49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ croniter==0.3.29 cryptography==2.4.2 decorator==4.3.0 # via retry defusedxml==0.5.0 # via python3-openid -flask-appbuilder==2.0.0 +Flask-AppBuilder>=2.1.3 flask-babel==0.11.1 # via flask-appbuilder flask-caching==1.4.0 flask-compress==1.4.0 @@ -32,7 +32,7 @@ flask-jwt-extended==3.18.1 # via flask-appbuilder flask-login==0.4.1 # via flask-appbuilder flask-migrate==2.1.1 flask-openid==1.2.5 # via flask-appbuilder -flask-sqlalchemy==2.3.2 # via flask-appbuilder, flask-migrate +flask-sqlalchemy==2.4.0 # via flask-appbuilder, flask-migrate flask-talisman==0.6.0 flask-wtf==0.14.2 flask==1.0.2 @@ -75,7 +75,7 @@ retry==0.9.2 selenium==3.141.0 simplejson==3.15.0 six==1.11.0 # via bleach, cryptography, flask-jwt-extended, flask-talisman, isodate, jsonschema, pathlib2, polyline, prison, pydruid, pyrsistent, python-dateutil, sqlalchemy-utils, wtforms-json -sqlalchemy-utils==0.32.21 +sqlalchemy-utils>=0.33.9 sqlalchemy==1.3.1 sqlparse==0.2.4 unicodecsv==0.14.1 diff --git a/superset/cli.py b/superset/cli.py index 238b1fa49d8f..0d90125154b4 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -17,7 +17,6 @@ # under the License. # pylint: disable=C,R,W from datetime import datetime -from io import StringIO import json import logging from subprocess import Popen @@ -35,7 +34,7 @@ app, appbuilder, data, db, security_manager, ) from superset.data.helpers import get_examples_file_list, get_examples_uris, \ - list_examples_table + list_examples_table, download_url_to_blob_url from superset.exceptions import DashboardNotFoundException, ExampleNotFoundException from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) @@ -59,6 +58,7 @@ def make_shell_context(): def init(): """Inits the Superset application""" utils.get_or_create_main_db() + utils.get_or_create_example_db() appbuilder.add_permissions(update_perms=True) security_manager.sync_role_definitions() @@ -205,7 +205,7 @@ def export_example(dashboard_id, dashboard_title, description, example_title, with tarfile.open(file_name, 'w:gz') as tar: tar.add(tmp_dir_name, arcname=f'{dashboard_slug}') - click.echo(f'Exported example to {file_name}') + click.echo(click.style(str(f'Exported example to {file_name}'), fg='blue')) except DashboardNotFoundException as e: click.echo(click.style(str(e), fg='red')) @@ -258,25 +258,44 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): if token: headers = {'Authorization': 'token %s' % token} - download_urls = [x['metadata_file']['download_url'] for x in examples_files] - import_example_json = None - for download_url in download_urls: - example_json = requests.get(download_url, headers=headers).content - example_metadata = json.loads(example_json) + import_data_info = None + for example_file in examples_files: + + metadata_download_url = example_file['metadata_file']['download_url'] + example_metadata_json = requests.get(metadata_download_url, + headers=headers).content + # Cheaply load json without generating objects + example_metadata = json.loads(example_metadata_json) if example_metadata['description']['title'] == example_title: - import_example_json = example_json - logging.info(f'Importing example \'{example_title}\' from {download_url} ...') + import_example_json = example_metadata_json + import_data_info = example_file['data_files'] + logging.info( + f'Will import example \'{example_title}\' from {metadata_download_url}') + break if not import_example_json: e = ExampleNotFoundException(f'Example {example_title} not found!') click.echo(click.style(str(e), fg='red')) exit(1) + # Parse data to get file download_urls -> blob_urls + example_metadata = json.loads(import_example_json, + object_hook=utils.decode_dashboards) + + # The given download url won't work for data files, need a blob url + data_blob_urls = {} + for ex_file in example_metadata['files']: + github_info = [t for t in import_data_info + if t['name'] == ex_file['file_name']][0] + blob_url = download_url_to_blob_url(github_info['download_url']) + data_blob_urls[github_info['name']] = blob_url + try: dashboard_import_export.import_example_dashboard( db.session, import_example_json, + data_blob_urls, database_uri) except Exception as e: logging.error(f'Error importing example dashboard \'{example_title}\'!') diff --git a/superset/config.py b/superset/config.py index 4bedf3db55df..61f4956976f6 100644 --- a/superset/config.py +++ b/superset/config.py @@ -613,6 +613,14 @@ class CeleryConfig(object): 'force_https_permanent': False, } +# Tuple format: Gitub repo full name, tag/branch +EXAMPLE_REPOS_TAGS = [ + ('rjurney/examples-data', 'v0.0.4'), +] + +# Github Authorization Token - in case the examples commands exceed rate limits +GITHUB_AUTH_TOKEN = None + try: if CONFIG_PATH_ENV_VAR in os.environ: # Explicitly import config module that is not in pythonpath; useful @@ -634,11 +642,3 @@ class CeleryConfig(object): superset_config.__file__)) except ImportError: pass - -# Tuple format: Gitub repo full name, tag/branch -EXAMPLE_REPOS_TAGS = [ - ('rjurney/examples-data', 'v0.0.4'), -] - -# Github Authorization Token - in case the examples commands exceed rate limits -GITHUB_AUTH_TOKEN = None diff --git a/superset/data/helpers.py b/superset/data/helpers.py index 7b86e3867663..d8377967959e 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -20,6 +20,7 @@ from io import BytesIO import json import os +import re import zlib from prettytable import PrettyTable @@ -27,6 +28,7 @@ from superset import app, db from superset.connectors.connector_registry import ConnectorRegistry +from superset.exceptions import BadGithubUrlConvertException from superset.models import core as models # Shortcuts @@ -75,6 +77,24 @@ def get_examples_uris(repo_name, tag): return contents_uri, blob_uri +def download_url_to_blob_url(download_url): + """Get a download link for a large file in the examples-data repo + + Example input: + 'https://raw.githubusercontent.com/rjurney/examples-data/v0.0.4/world_health/wb_health_population.csv.gz' + Example output: + 'https://github.com/rjurney/examples-data/raw/v0.0.4/world_health/wb_health_population.csv.gz' + """ + + hits = re.search( + 'https://raw.githubusercontent.com/(.+?/.+?)/(.+?)/(.*)', download_url) + if len(hits.groups()) < 3: + raise BadGithubUrlConvertException(f'Bad input url: {download_url}') + + blob_url = f'https://github.com/{hits.group(1)}/raw/{hits.group(2)}/{hits.group(3)}' + return blob_url + + def get_example_data(filepath, is_gzip=True, make_bytes=False): """Get the examples data for the legacy examples""" examples_repos_uris = \ @@ -91,7 +111,6 @@ def get_example_data(filepath, is_gzip=True, make_bytes=False): def get_examples_file_list(examples_repos_uris, examples_tag='master'): """Use the Github get contents API to list available examples""" examples = [] - for (repo_name, repo_tag, contents_uri, blob_uri) in examples_repos_uris: # Github authentication via a Personal Access Token for rate limit problems diff --git a/superset/exceptions.py b/superset/exceptions.py index e0284045b301..928d785c4b35 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -62,3 +62,7 @@ class DashboardNotFoundException(SupersetException): class ExampleNotFoundException(SupersetException): pass + + +class BadGithubUrlConvertException(SupersetException): + pass diff --git a/superset/utils/core.py b/superset/utils/core.py index 1ca4edd776a3..aba91df907a2 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -56,7 +56,6 @@ from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlalchemy.sql.type_api import Variant from sqlalchemy.types import TEXT, TypeDecorator -from sqlalchemy_utils import database_exists, create_database from superset.exceptions import SupersetException, SupersetTimeoutException from superset.utils.dates import datetime_to_epoch, EPOCH @@ -287,7 +286,7 @@ def decode_dashboards(o): class DashboardEncoder(json.JSONEncoder): - """JSONEncoder for Dashboard and their Slice objects""" + # pylint: disable=E0202 def default(self, o): try: @@ -910,6 +909,9 @@ def get_or_create_example_db(database_uri=None): from superset import conf, db from superset.models import core as models + if not database_uri: + database_uri = conf.get('SQLALCHEMY_EXAMPLES_URI') + logging.info('Creating database reference') dbobj = get_examples_database(db.session) if not dbobj: @@ -918,8 +920,7 @@ def get_or_create_example_db(database_uri=None): allow_csv_upload=True, expose_in_sqllab=True, ) - dbobj.set_sqlalchemy_uri( - database_uri or conf.get('SQLALCHEMY_EXAMPLE_URI')) + dbobj.set_sqlalchemy_uri(database_uri) db.session.add(dbobj) db.session.commit() return dbobj diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index a5d0998cece0..66d120f59b77 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -17,22 +17,59 @@ # pylint: disable=C,R,W import json import logging +import os +import shutil +import tempfile import time import pandas as pd +import requests from superset import db -from superset.exceptions import DashboardNotFoundException +from superset.exceptions import SupersetException from superset.models.core import Dashboard -from superset.utils.core import DashboardEncoder, decode_dashboards, \ - get_or_create_example_db_engine, get_or_create_main_db +from superset.utils import core as utils def import_dashboards(session, data_stream, import_time=None): """Imports dashboards from a stream to databases""" current_tt = int(time.time()) import_time = current_tt if import_time is None else import_time - data = json.loads(data_stream.read(), object_hook=decode_dashboards) + + data = json.loads(data_stream.read(), object_hook=utils.decode_dashboards) + + for table in data['datasources']: + type(table).import_obj(table, import_time=import_time) + + # TODO: import DRUID datasources + session.commit() + for dashboard in data['dashboards']: + Dashboard.import_obj( + dashboard, import_time=import_time) + + # Import any files in this exported Dashboard + if 'files' in data: + if len(data['files']) > 0: + examples_engine = utils.get_or_create_main_db() + for table in data['files']: + logging.info(f'Import data from file {table["file_name"]} into table ' + + f'{table["table_name"]}') + df = pd.read_csv(table['file_name'], parse_dates=True, + infer_datetime_format=True, compression='infer') + df.to_sql( + table['table_name'], + examples_engine.get_sqla_engine(), + if_exists='replace', + chunksize=500, + index=False) + + session.commit() + + +def import_example_dashboard(session, import_example_json, data_blob_urls, + database_uri, import_time=None): + """Imports dashboards from a JSON string and data files to databases""" + data = json.loads(import_example_json, object_hook=utils.decode_dashboards) # TODO: import DRUID datasources session.commit() @@ -40,17 +77,28 @@ def import_dashboards(session, data_stream, import_time=None): Dashboard.import_obj( dashboard, import_time=import_time) - if data['data']['includes_data']: - engine = get_or_create_main_db() - for table in data['data']['tables']: - df = pd.read_csv(table['file_path'], parse_dates=True, - infer_datetime_format=True, compression='infer') - df.to_sql( - table['name'], - engine, - if_exists='replace', - chunksize=500, - index=False) + if len(data['files']) > 0: + examples_engine = utils.get_or_create_example_db(database_uri) + + with tempfile.TemporaryDirectory() as tmpdir: + for file_info in data['files']: + # Get the github info for the file + blob_file_path = f'{tmpdir.name}{os.path.sep}{file_info["file_name"]}' + blob_url = data_blob_urls[file_info['file_name']] + + response = requests.get(blob_url, stream=True) + with open(blob_file_path, 'wb') as out_file: + shutil.copyfileobj(response.raw, out_file) + del response + + df = pd.read_csv(blob_file_path, parse_dates=True, + infer_datetime_format=True, compression='infer') + df.to_sql( + file_info['table_name'], + examples_engine.get_sqla_engine(), + if_exists='replace', + chunksize=500, + index=False) session.commit() @@ -73,7 +121,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data = {} if not export_dashboard_ids: logging.error('No dashboards found!') - raise DashboardNotFoundException('No dashboards found!') + raise SupersetException('No dashboards found!') else: data = Dashboard.export_dashboards(export_dashboard_ids, export_data, export_data_dir) @@ -84,14 +132,14 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data['description']['description'] = description data['description']['license'] = _license - export_json = json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) + export_json = json.dumps(data, cls=utils.DashboardEncoder, indent=4, sort_keys=True) # Remove datasources[].__SqlaTable__.database for example export - if strip_database: - parsed_json = json.loads(export_json) - for datasource in parsed_json['datasources']: - datasource['__SqlaTable__']['database'] = None - export_json = json.dumps(parsed_json, indent=4, sort_keys=True) + # if strip_database: + # parsed_json = json.loads(export_json) + # for datasource in parsed_json['datasources']: + # datasource['__SqlaTable__']['database'] = None + # export_json = '{}' # json.dumps(parsed_json, indent=4, sort_keys=True) return export_json diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 057938b00ede..0d388c0f1aa9 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -85,6 +85,7 @@ def test_examples_list(self): result = self.runner.invoke( app.cli, ['examples', 'list']) + print("results.output", result.output) found = False for i, line in enumerate(result.output.split('\n')): # skip header @@ -102,7 +103,12 @@ def test_examples_list(self): def test_examples_import(self): """Test `superset examples import`""" - pass + result = self.runner.invoke( + app.cli, + [ + 'examples', 'import', + ] + ) def test_examples_remove(self): """Test `superset examples remove`""" From 2031cd4fb35ac76eaf14f851a8f68a93bd5b2cb6 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 4 Jun 2019 16:46:18 -0700 Subject: [PATCH 048/107] migration now works for mysql --- .../versions/e5200a951e62_add_dashboards_uuid.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index 707e7f3aad49..39f3598e9eab 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -23,7 +23,7 @@ """ import uuid from alembic import op -from sqlalchemy import Column, Integer +from sqlalchemy import Column, Integer, CHAR from sqlalchemy.ext.declarative import declarative_base from sqlalchemy_utils.types.uuid import UUIDType @@ -106,8 +106,9 @@ def add_uuid_column(col_name, _type): s.uuid = get_uuid() session.merge(s) with op.batch_alter_table(col_name) as batch_op: - batch_op.alter_column('uuid', nullable=False) - batch_op.create_unique_constraint('uq_uuid', 'uuid') + batch_op.alter_column('uuid', existing_type=CHAR(32), + new_column_name='uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', ['uuid']) session.commit() add_uuid_column('dashboards', Dashboard) From 41789b55f24e9827094f2783b3843890908174ff Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 5 Jun 2019 13:57:20 -0700 Subject: [PATCH 049/107] Use Flask-AppBuilder master --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8a1045292fc8..8e13031cae98 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,8 @@ croniter==0.3.29 cryptography==2.4.2 decorator==4.3.0 # via retry defusedxml==0.5.0 # via python3-openid -Flask-AppBuilder>=2.1.3 +# Flask-AppBuilder>=2.1.4 +-e git+git://github.com/dpgaspar/Flask-AppBuilder.git@master#egg=Flask-AppBuilder flask-babel==0.11.1 # via flask-appbuilder flask-caching==1.4.0 flask-compress==1.4.0 From c713de838a499993479791ea0912a3e5d675913c Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 5 Jun 2019 14:08:13 -0700 Subject: [PATCH 050/107] Debug printing for json serialization --- superset/utils/core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index d045345e13d1..57921ca59206 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -290,14 +290,18 @@ class DashboardEncoder(json.JSONEncoder): # pylint: disable=E0202 def default(self, o): try: - vals = { - k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} if type(o) == uuid.UUID: + print(type(o), o, o.__dict__) return str(o) + vals = { + k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} + print(type(o), o, o.__dict__) return {'__{}__'.format(o.__class__.__name__): vals} except Exception: if type(o) == datetime: + print(type(o), o) return {'__datetime__': o.replace(microsecond=0).isoformat()} + print(type(o), o, o.__dict__) return json.JSONEncoder.default(self, o) From 232a2f80698ed9d8154e36c583c567ee2b080cb4 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 5 Jun 2019 14:16:29 -0700 Subject: [PATCH 051/107] Reduced import of superset.utils.core in superset.utils.dashboard_import_export --- superset/utils/dashboard_import_export.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 66d120f59b77..79277a07a064 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -28,7 +28,8 @@ from superset import db from superset.exceptions import SupersetException from superset.models.core import Dashboard -from superset.utils import core as utils +from superset.utils.core import DashboardEncoder, decode_dashboards, \ + get_or_create_example_db, get_or_create_main_db def import_dashboards(session, data_stream, import_time=None): @@ -36,7 +37,7 @@ def import_dashboards(session, data_stream, import_time=None): current_tt = int(time.time()) import_time = current_tt if import_time is None else import_time - data = json.loads(data_stream.read(), object_hook=utils.decode_dashboards) + data = json.loads(data_stream.read(), object_hook=decode_dashboards) for table in data['datasources']: type(table).import_obj(table, import_time=import_time) @@ -50,7 +51,7 @@ def import_dashboards(session, data_stream, import_time=None): # Import any files in this exported Dashboard if 'files' in data: if len(data['files']) > 0: - examples_engine = utils.get_or_create_main_db() + examples_engine = get_or_create_main_db() for table in data['files']: logging.info(f'Import data from file {table["file_name"]} into table ' + f'{table["table_name"]}') @@ -69,7 +70,7 @@ def import_dashboards(session, data_stream, import_time=None): def import_example_dashboard(session, import_example_json, data_blob_urls, database_uri, import_time=None): """Imports dashboards from a JSON string and data files to databases""" - data = json.loads(import_example_json, object_hook=utils.decode_dashboards) + data = json.loads(import_example_json, object_hook=decode_dashboards) # TODO: import DRUID datasources session.commit() @@ -78,7 +79,7 @@ def import_example_dashboard(session, import_example_json, data_blob_urls, dashboard, import_time=import_time) if len(data['files']) > 0: - examples_engine = utils.get_or_create_example_db(database_uri) + examples_engine = get_or_create_example_db(database_uri) with tempfile.TemporaryDirectory() as tmpdir: for file_info in data['files']: @@ -132,7 +133,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data['description']['description'] = description data['description']['license'] = _license - export_json = json.dumps(data, cls=utils.DashboardEncoder, indent=4, sort_keys=True) + export_json = json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) # Remove datasources[].__SqlaTable__.database for example export # if strip_database: From 83ca4ec2a7396a2bd869a76a8c793b6e0eb4dcbe Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 5 Jun 2019 14:37:05 -0700 Subject: [PATCH 052/107] Error in example import tempdir name fixed --- superset/cli.py | 2 +- superset/utils/dashboard_import_export.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 943da7a8b0fe..9ed1e24dd4c5 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -299,7 +299,7 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): database_uri) except Exception as e: logging.error(f'Error importing example dashboard \'{example_title}\'!') - logging.error(e) + logging.exception(e) @examples.command('remove') diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 79277a07a064..6e038b548ed5 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -84,7 +84,7 @@ def import_example_dashboard(session, import_example_json, data_blob_urls, with tempfile.TemporaryDirectory() as tmpdir: for file_info in data['files']: # Get the github info for the file - blob_file_path = f'{tmpdir.name}{os.path.sep}{file_info["file_name"]}' + blob_file_path = f'{tmpdir}{os.path.sep}{file_info["file_name"]}' blob_url = data_blob_urls[file_info['file_name']] response = requests.get(blob_url, stream=True) From 3e270a2c63a6086a04ec0d15089417b1df0bde83 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 5 Jun 2019 14:42:30 -0700 Subject: [PATCH 053/107] Set default example export title to dashboard title --- superset/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 9ed1e24dd4c5..a9d0c9e31b2b 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -160,7 +160,7 @@ def examples(): @click.option( '--description', '-d', help='Description of new example', required=True) @click.option( - '--example-title', '-e', help='Title for new example', required=True) + '--example-title', '-e', help='Title for new example', required=False) @click.option( '--file-name', '-f', default='dashboard.tar.gz', help='Specify export file name. Defaults to dashboard.tar.gz') @@ -188,7 +188,7 @@ def export_example(dashboard_id, dashboard_title, description, example_title, export_data=True, export_data_dir=tmp_dir_name, description=description, - export_title=example_title, + export_title=example_title or dashboard_title, _license=_license, strip_database=True) From ed5b3084186c1977c568b15a0522047bce5067d6 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 5 Jun 2019 17:15:33 -0700 Subject: [PATCH 054/107] Debug DashboardEncoder, cleanup --- superset/utils/core.py | 52 ++++++++++++++++++++--- superset/utils/dashboard_import_export.py | 2 +- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 57921ca59206..3cbad5ca5cda 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -27,6 +27,7 @@ import functools import json import logging +import marshmallow as m import os import signal import smtplib @@ -52,7 +53,7 @@ import parsedatetime from pydruid.utils.having import Having import sqlalchemy as sa -from sqlalchemy import create_engine, event, exc, select, Text +from sqlalchemy import event, exc, select, Text from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlalchemy.sql.type_api import Variant from sqlalchemy.types import TEXT, TypeDecorator @@ -287,21 +288,62 @@ def decode_dashboards(o): class DashboardEncoder(json.JSONEncoder): + def date_handler(self, o): + print('date_handler', self, o) + sys.stdout.flush() + if hasattr(o, 'replace') and hasattr(o, 'isoformat'): + print('hasattr(o, \'replace\') and hasattr(o, \'isoformat\')') + sys.stdout.flush() + try: + short_d = o.replace(microsecond=0) + print('short_d', short_d) + sys.stdout.flush() + if hasattr(short_d, 'isoformat'): + iso_d = short_d.isoformat() + print('iso_d', iso_d) + sys.stdout.flush() + a = {'__datetime__': str(iso_d)} + print('a', a) + sys.stdout.flush() + return a + except Exception as e: + print('Exception!') + logging.exception(e) + sys.stdout.flush() + sys.exit(1) + else: + print("Error! Error serializing datetime!") + sys.stdout.flush() + sys.exit(1) + try: + json.JSONEncoder.default(self, o) + except Exception as e: + print('Exception!') + logging.exception(e) + sys.stdout.flush() + # pylint: disable=E0202 def default(self, o): try: if type(o) == uuid.UUID: - print(type(o), o, o.__dict__) + print(type(o), o, str(o)) + sys.stdout.flush() return str(o) vals = { k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} print(type(o), o, o.__dict__) + sys.stdout.flush() return {'__{}__'.format(o.__class__.__name__): vals} - except Exception: + except Exception as e: + print('Caught Exception!') + logging.exception(e) + sys.stdout.flush() if type(o) == datetime: print(type(o), o) - return {'__datetime__': o.replace(microsecond=0).isoformat()} - print(type(o), o, o.__dict__) + sys.stdout.flush() + return self.date_handler(o) + print(type(o), o) + sys.stdout.flush() return json.JSONEncoder.default(self, o) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 6e038b548ed5..71084af17318 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -29,7 +29,7 @@ from superset.exceptions import SupersetException from superset.models.core import Dashboard from superset.utils.core import DashboardEncoder, decode_dashboards, \ - get_or_create_example_db, get_or_create_main_db + get_or_create_example_db, get_or_create_main_db def import_dashboards(session, data_stream, import_time=None): From e2f9a5e849e83a23ef05998f1295ea3dc5579b6d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 6 Jun 2019 14:55:49 -0700 Subject: [PATCH 055/107] Fixes model serialization by using ImportMixin.export_to_dict on the eager_datasources in Dashboard.export_dashboards --- superset/models/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/models/core.py b/superset/models/core.py index 7d7b49db9055..7d9e41c2f276 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -709,7 +709,7 @@ def export_dashboards(cls, dashboard_ids, export_data=False, return { 'description': desc, 'dashboards': copied_dashboards, - 'datasources': eager_datasources, + 'datasources': [o.export_to_dict() for o in eager_datasources], 'files': files, } From 0a9c607aad25c04b5c06a83cf4c6912908949b64 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 6 Jun 2019 15:25:47 -0700 Subject: [PATCH 056/107] Revert "Debug DashboardEncoder, cleanup" This reverts commit ed5b3084186c1977c568b15a0522047bce5067d6. --- superset/utils/core.py | 52 +++-------------------- superset/utils/dashboard_import_export.py | 2 +- 2 files changed, 6 insertions(+), 48 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 3cbad5ca5cda..57921ca59206 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -27,7 +27,6 @@ import functools import json import logging -import marshmallow as m import os import signal import smtplib @@ -53,7 +52,7 @@ import parsedatetime from pydruid.utils.having import Having import sqlalchemy as sa -from sqlalchemy import event, exc, select, Text +from sqlalchemy import create_engine, event, exc, select, Text from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlalchemy.sql.type_api import Variant from sqlalchemy.types import TEXT, TypeDecorator @@ -288,62 +287,21 @@ def decode_dashboards(o): class DashboardEncoder(json.JSONEncoder): - def date_handler(self, o): - print('date_handler', self, o) - sys.stdout.flush() - if hasattr(o, 'replace') and hasattr(o, 'isoformat'): - print('hasattr(o, \'replace\') and hasattr(o, \'isoformat\')') - sys.stdout.flush() - try: - short_d = o.replace(microsecond=0) - print('short_d', short_d) - sys.stdout.flush() - if hasattr(short_d, 'isoformat'): - iso_d = short_d.isoformat() - print('iso_d', iso_d) - sys.stdout.flush() - a = {'__datetime__': str(iso_d)} - print('a', a) - sys.stdout.flush() - return a - except Exception as e: - print('Exception!') - logging.exception(e) - sys.stdout.flush() - sys.exit(1) - else: - print("Error! Error serializing datetime!") - sys.stdout.flush() - sys.exit(1) - try: - json.JSONEncoder.default(self, o) - except Exception as e: - print('Exception!') - logging.exception(e) - sys.stdout.flush() - # pylint: disable=E0202 def default(self, o): try: if type(o) == uuid.UUID: - print(type(o), o, str(o)) - sys.stdout.flush() + print(type(o), o, o.__dict__) return str(o) vals = { k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} print(type(o), o, o.__dict__) - sys.stdout.flush() return {'__{}__'.format(o.__class__.__name__): vals} - except Exception as e: - print('Caught Exception!') - logging.exception(e) - sys.stdout.flush() + except Exception: if type(o) == datetime: print(type(o), o) - sys.stdout.flush() - return self.date_handler(o) - print(type(o), o) - sys.stdout.flush() + return {'__datetime__': o.replace(microsecond=0).isoformat()} + print(type(o), o, o.__dict__) return json.JSONEncoder.default(self, o) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 71084af17318..6e038b548ed5 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -29,7 +29,7 @@ from superset.exceptions import SupersetException from superset.models.core import Dashboard from superset.utils.core import DashboardEncoder, decode_dashboards, \ - get_or_create_example_db, get_or_create_main_db + get_or_create_example_db, get_or_create_main_db def import_dashboards(session, data_stream, import_time=None): From fb6dbbac6ea21b7bc0b4d7e193aeceaf3b4f2e20 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 7 Jun 2019 14:30:15 -0700 Subject: [PATCH 057/107] Remove debug from DashboardEncoder --- superset/utils/core.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 57921ca59206..c97209f4418f 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -291,17 +291,13 @@ class DashboardEncoder(json.JSONEncoder): def default(self, o): try: if type(o) == uuid.UUID: - print(type(o), o, o.__dict__) return str(o) vals = { k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} - print(type(o), o, o.__dict__) return {'__{}__'.format(o.__class__.__name__): vals} except Exception: if type(o) == datetime: - print(type(o), o) return {'__datetime__': o.replace(microsecond=0).isoformat()} - print(type(o), o, o.__dict__) return json.JSONEncoder.default(self, o) From 38fbac14a15de00d4d067eb123d1d99a5164aab6 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 7 Jun 2019 16:33:20 -0700 Subject: [PATCH 058/107] Changed name of superset.model.helpers.ImportMixin to ImportExportMixin as it does both --- superset/connectors/base/models.py | 8 ++++---- superset/connectors/druid/models.py | 4 ++-- .../versions/e5200a951e62_add_dashboards_uuid.py | 2 +- superset/models/core.py | 8 ++++---- superset/models/helpers.py | 2 +- superset/models/schedules.py | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index 2e8bc25adff0..a25212143fa7 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -24,11 +24,11 @@ from sqlalchemy.orm import foreign, relationship from superset.models.core import Slice -from superset.models.helpers import AuditMixinNullable, ImportMixin +from superset.models.helpers import AuditMixinNullable, ImportExportMixin from superset.utils import core as utils -class BaseDatasource(AuditMixinNullable, ImportMixin): +class BaseDatasource(AuditMixinNullable, ImportExportMixin): """A common interface to objects that are queryable (tables and datasources)""" @@ -341,7 +341,7 @@ def update_from_object(self, obj): obj.get('columns'), self.columns, self.column_class, 'column_name') -class BaseColumn(AuditMixinNullable, ImportMixin): +class BaseColumn(AuditMixinNullable, ImportExportMixin): """Interface for column""" __tablename__ = None # {connector_name}_column @@ -404,7 +404,7 @@ def data(self): return {s: getattr(self, s) for s in attrs if hasattr(self, s)} -class BaseMetric(AuditMixinNullable, ImportMixin): +class BaseMetric(AuditMixinNullable, ImportExportMixin): """Interface for Metrics""" diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py index c71bc8061962..def02689da49 100644 --- a/superset/connectors/druid/models.py +++ b/superset/connectors/druid/models.py @@ -51,7 +51,7 @@ from superset.connectors.base.models import BaseColumn, BaseDatasource, BaseMetric from superset.exceptions import MetricPermException, SupersetException from superset.models.helpers import ( - AuditMixinNullable, ImportMixin, QueryResult, + AuditMixinNullable, ImportExportMixin, QueryResult, ) from superset.utils import core as utils, import_datasource from superset.utils.core import ( @@ -87,7 +87,7 @@ def __init__(self, name, post_aggregator): self.post_aggregator = post_aggregator -class DruidCluster(Model, AuditMixinNullable, ImportMixin): +class DruidCluster(Model, AuditMixinNullable, ImportExportMixin): """ORM object referencing the Druid clusters""" diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index 39f3598e9eab..0e46763c59d4 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Adds uuid columns to all classes with ImportMixin: dashboards, datasources, dbs, slices, tables, dashboard_email_schedules, slice_email_schedules +"""Adds uuid columns to all classes with ImportExportMixin: dashboards, datasources, dbs, slices, tables, dashboard_email_schedules, slice_email_schedules Revision ID: e5200a951e62 Revises: e9df189e5c7e diff --git a/superset/models/core.py b/superset/models/core.py index 7d9e41c2f276..ea73878d6cb5 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -50,7 +50,7 @@ from superset import app, db, db_engine_specs, security_manager from superset.connectors.connector_registry import ConnectorRegistry from superset.legacy import update_time_range -from superset.models.helpers import AuditMixinNullable, ImportMixin +from superset.models.helpers import AuditMixinNullable, ImportExportMixin from superset.models.tags import ChartUpdater, DashboardUpdater, FavStarUpdater from superset.models.user_attributes import UserAttribute from superset.utils import ( @@ -146,7 +146,7 @@ class CssTemplate(Model, AuditMixinNullable): Column('slice_id', Integer, ForeignKey('slices.id'))) -class Slice(Model, AuditMixinNullable, ImportMixin): +class Slice(Model, AuditMixinNullable, ImportExportMixin): """A slice is essentially a report or a view on data""" @@ -396,7 +396,7 @@ def url(self): ) -class Dashboard(Model, AuditMixinNullable, ImportMixin): +class Dashboard(Model, AuditMixinNullable, ImportExportMixin): """The dashboard object!""" @@ -714,7 +714,7 @@ def export_dashboards(cls, dashboard_ids, export_data=False, } -class Database(Model, AuditMixinNullable, ImportMixin): +class Database(Model, AuditMixinNullable, ImportExportMixin): """An ORM object that stores Database related information""" diff --git a/superset/models/helpers.py b/superset/models/helpers.py index ff2e048277d5..1294913bab48 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -49,7 +49,7 @@ def get_uuid(): return str(uuid.uuid4()) -class ImportMixin(object): +class ImportExportMixin(object): export_parent = None # The name of the attribute # with the SQL Alchemy back reference diff --git a/superset/models/schedules.py b/superset/models/schedules.py index fdd6636699f4..0dcae0b74ae8 100644 --- a/superset/models/schedules.py +++ b/superset/models/schedules.py @@ -27,7 +27,7 @@ from sqlalchemy.orm import relationship from superset import security_manager -from superset.models.helpers import AuditMixinNullable, ImportMixin +from superset.models.helpers import AuditMixinNullable, ImportExportMixin metadata = Model.metadata # pylint: disable=no-member @@ -77,7 +77,7 @@ def user(self): class DashboardEmailSchedule(Model, AuditMixinNullable, - ImportMixin, + ImportExportMixin, EmailSchedule): __tablename__ = 'dashboard_email_schedules' dashboard_id = Column(Integer, ForeignKey('dashboards.id')) @@ -90,7 +90,7 @@ class DashboardEmailSchedule(Model, class SliceEmailSchedule(Model, AuditMixinNullable, - ImportMixin, + ImportExportMixin, EmailSchedule): __tablename__ = 'slice_email_schedules' slice_id = Column(Integer, ForeignKey('slices.id')) From ebb77dd0a0e06663f798ca498cc8868d8b75ad3d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 7 Jun 2019 22:08:12 -0700 Subject: [PATCH 059/107] Nearly working json encoding --- superset/models/core.py | 4 +-- superset/models/helpers.py | 7 +++- superset/utils/core.py | 43 +++++++++++++++++------ superset/utils/dashboard_import_export.py | 6 ++-- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index ea73878d6cb5..1f54febd05c0 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -708,8 +708,8 @@ def export_dashboards(cls, dashboard_ids, export_data=False, return { 'description': desc, - 'dashboards': copied_dashboards, - 'datasources': [o.export_to_dict() for o in eager_datasources], + 'dashboards': [o.export_to_json() for o in copied_dashboards], + 'datasources': [o.export_to_json() for o in eager_datasources], 'files': files, } diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 1294913bab48..96fc02b1035a 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -33,7 +33,7 @@ from sqlalchemy_utils.types.uuid import UUIDType import yaml -from superset.utils.core import QueryStatus +from superset.utils.core import DashboardEncoder, QueryStatus def json_to_dict(json_str): @@ -191,6 +191,10 @@ def import_from_dict(cls, session, dict_rep, parent=None, return obj + def export_to_json(self, recursive=True): + """Export obj to json""" + return DashboardEncoder.encode(self) + def export_to_dict(self, recursive=True, include_parent_ref=False, include_defaults=False): """Export obj to dictionary""" @@ -200,6 +204,7 @@ def export_to_dict(self, recursive=True, include_parent_ref=False, parent_ref = cls.__mapper__.relationships.get(cls.export_parent) if parent_ref: parent_excludes = {c.name for c in parent_ref.local_columns} + dict_rep = {c.name: getattr(self, c.name) for c in cls.__table__.columns if (c.name in self.export_fields and diff --git a/superset/utils/core.py b/superset/utils/core.py index c97209f4418f..51e4443b8d6f 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -25,7 +25,8 @@ from email.utils import formatdate import errno import functools -import json +# import json +from json.encoder import JSONEncoder import logging import os import signal @@ -285,20 +286,42 @@ def decode_dashboards(o): return o -class DashboardEncoder(json.JSONEncoder): +class DashboardEncoder(): # pylint: disable=E0202 - def default(self, o): + @classmethod + def encode(cls, o): + j = JSONEncoder() try: - if type(o) == uuid.UUID: + print(type(o)) + if isinstance(o, uuid.UUID): + logging.debug('UUID') return str(o) - vals = { - k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'} - return {'__{}__'.format(o.__class__.__name__): vals} - except Exception: - if type(o) == datetime: + if isinstance(o, datetime): + logging.debug('datetime') return {'__datetime__': o.replace(microsecond=0).isoformat()} - return json.JSONEncoder.default(self, o) + if isinstance(o, list): + logging.debug('list') + return [DashboardEncoder.encode(i) for i in o] + if hasattr(o, '__dict__'): + logging.debug('__dict__') + vals = {} + for k, v in o.__dict__.items(): + if k == '_sa_instance_state': + logging.debug('skipping _sa_instance_state') + continue + elif k.startswith('json') or k.endswith('json'): + logging.debug(f'found json... {k}') + vals[k] = v + else: + vals[k] = DashboardEncoder.encode(v) + return {'__{}__'.format(o.__class__.__name__): vals} + else: + logging.debug('else JSONEncoder().encode(o)') + return j.encode(o) + except Exception as e: + logging.exception(e) + return j.encode(o) def parse_human_timedelta(s: str): diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 6e038b548ed5..ef0881162b3e 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -28,8 +28,8 @@ from superset import db from superset.exceptions import SupersetException from superset.models.core import Dashboard -from superset.utils.core import DashboardEncoder, decode_dashboards, \ - get_or_create_example_db, get_or_create_main_db +from superset.utils.core import decode_dashboards, get_or_create_example_db, \ + get_or_create_main_db def import_dashboards(session, data_stream, import_time=None): @@ -133,7 +133,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data['description']['description'] = description data['description']['license'] = _license - export_json = json.dumps(data, cls=DashboardEncoder, indent=4, sort_keys=True) + export_json = json.dumps(data, indent=4, sort_keys=True) # Remove datasources[].__SqlaTable__.database for example export # if strip_database: From b340fcb9c886ad5c31c0184955f917fb5a7b603d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 7 Jun 2019 22:18:30 -0700 Subject: [PATCH 060/107] Removed JSONEncoder from DashboardEncoder completely --- superset/utils/core.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 51e4443b8d6f..a8a7cf4cae5e 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -25,8 +25,6 @@ from email.utils import formatdate import errno import functools -# import json -from json.encoder import JSONEncoder import logging import os import signal @@ -53,7 +51,7 @@ import parsedatetime from pydruid.utils.having import Having import sqlalchemy as sa -from sqlalchemy import create_engine, event, exc, select, Text +from sqlalchemy import event, exc, select, Text from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlalchemy.sql.type_api import Variant from sqlalchemy.types import TEXT, TypeDecorator @@ -291,37 +289,28 @@ class DashboardEncoder(): # pylint: disable=E0202 @classmethod def encode(cls, o): - j = JSONEncoder() try: - print(type(o)) if isinstance(o, uuid.UUID): - logging.debug('UUID') return str(o) if isinstance(o, datetime): - logging.debug('datetime') return {'__datetime__': o.replace(microsecond=0).isoformat()} if isinstance(o, list): - logging.debug('list') return [DashboardEncoder.encode(i) for i in o] if hasattr(o, '__dict__'): - logging.debug('__dict__') vals = {} for k, v in o.__dict__.items(): if k == '_sa_instance_state': - logging.debug('skipping _sa_instance_state') continue elif k.startswith('json') or k.endswith('json'): - logging.debug(f'found json... {k}') vals[k] = v else: vals[k] = DashboardEncoder.encode(v) return {'__{}__'.format(o.__class__.__name__): vals} else: - logging.debug('else JSONEncoder().encode(o)') - return j.encode(o) + return o except Exception as e: logging.exception(e) - return j.encode(o) + return o def parse_human_timedelta(s: str): From c31bba5923a31da5803a6ff53b164e195cc882b9 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 7 Jun 2019 23:39:27 -0700 Subject: [PATCH 061/107] Added --url option to examples export and --full-fields to examples list --- superset/cli.py | 17 ++-- superset/config.py | 2 +- superset/data/helpers.py | 18 ++-- superset/dev.py | 103 ++++++++++++++++++++++ superset/utils/dashboard_import_export.py | 5 +- 5 files changed, 132 insertions(+), 13 deletions(-) create mode 100644 superset/dev.py diff --git a/superset/cli.py b/superset/cli.py index a9d0c9e31b2b..d7a9a2989d15 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -33,8 +33,8 @@ from superset import ( app, appbuilder, data, db, security_manager, ) -from superset.data.helpers import get_examples_file_list, get_examples_uris, \ - list_examples_table, download_url_to_blob_url +from superset.data.helpers import download_url_to_blob_url, get_examples_file_list, \ + get_examples_uris, list_examples_table from superset.exceptions import DashboardNotFoundException, ExampleNotFoundException from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) @@ -167,8 +167,10 @@ def examples(): @click.option( '--license', '-l', '_license', default='Apache 2.0', help='License of the example dashboard') +@click.option( + '--url', '-u', default=None, help='URL of dataset home page') def export_example(dashboard_id, dashboard_title, description, example_title, - file_name, _license): + file_name, _license, url): """Exmport example dashboard/datasets tarball""" if not (dashboard_id or dashboard_title): raise click.UsageError('must supply --dashboard-id/-i or --dashboard-title/-t') @@ -190,6 +192,7 @@ def export_example(dashboard_id, dashboard_title, description, example_title, description=description, export_title=example_title or dashboard_title, _license=_license, + url=url, strip_database=True) dashboard_slug = dashboard_import_export.get_slug( @@ -221,11 +224,13 @@ def export_example(dashboard_id, dashboard_title, description, example_title, '--examples-tag', '-r', help='Tag or branch of Github repository containing examples. Defaults to \'master\'', default='master') -def _list_examples(examples_repo, examples_tag): +@click.option( + '--full-fields', '-ff', is_flag=True, default=False, help='Print full length fields') +def _list_examples(examples_repo, examples_tag, full_fields): """List example dashboards/datasets""" click.echo( - list_examples_table(examples_repo, examples_tag=examples_tag)) - pass + list_examples_table(examples_repo, examples_tag=examples_tag, + full_fields=full_fields)) @examples.command('import') diff --git a/superset/config.py b/superset/config.py index 43ef4956a01b..dd6c3d5fe616 100644 --- a/superset/config.py +++ b/superset/config.py @@ -638,7 +638,7 @@ class CeleryConfig(object): # Tuple format: Gitub repo full name, tag/branch EXAMPLE_REPOS_TAGS = [ - ('rjurney/examples-data', 'v0.0.4'), + ('rjurney/examples-data', 'v0.0.6'), ] # Github Authorization Token - in case the examples commands exceed rate limits diff --git a/superset/data/helpers.py b/superset/data/helpers.py index d8377967959e..8ea2efaee444 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -139,11 +139,11 @@ def get_examples_file_list(examples_repos_uris, examples_tag='master'): return examples -def list_examples_table(examples_repo, examples_tag='master'): +def list_examples_table(examples_repo, examples_tag='master', full_fields=True): """Turn a list of available examples into a PrettyTable""" # Write a pretty table to stdout t = PrettyTable(field_names=['Title', 'Description', 'Size (MB)', 'Rows', - 'Files', 'Created Date', 'Repository', 'Tag']) + 'Files', 'URL', 'Created Date', 'Repository', 'Tag']) # Optionally replace the default examples repo with a specified one examples_repos_uris = [(r[0], r[1]) + get_examples_uris(r[0], r[1]) @@ -174,15 +174,23 @@ def date_format(iso_date): d = json.loads( requests.get( file_info['metadata_file']['download_url']).content)['description'] + + if not full_fields: + file_info['repo_name'] = shorten(file_info['repo_name'], 30) + file_info['repo_tag'] = shorten(file_info['repo_tag'], 20) + d['description'] = shorten(d['description'], 50) + d['url'] = shorten(d['url'], 30) + row = [ d['title'], - shorten(d['description'], 50), + d['description'], d['total_size_mb'], d['total_rows'], d['file_count'], + d['url'], date_format(d['created_at']), - shorten(file_info['repo_name'], 30), - shorten(file_info['repo_tag'], 20), + file_info['repo_name'], + file_info['repo_tag'], ] t.add_row(row) diff --git a/superset/dev.py b/superset/dev.py new file mode 100644 index 000000000000..02c611a27824 --- /dev/null +++ b/superset/dev.py @@ -0,0 +1,103 @@ +# superset.core.models imports +"""A collection of ORM sqlalchemy models for Superset""" +from contextlib import closing +from copy import copy, deepcopy +from datetime import datetime +import functools +import json +import logging +import os +import sys +import textwrap +from typing import List + +from flask import escape, g, Markup, request +from flask_appbuilder import Model +from flask_appbuilder.models.decorators import renders +from flask_appbuilder.security.sqla.models import User +import numpy +import pandas as pd +import sqlalchemy as sqla +from sqlalchemy import ( + Boolean, Column, create_engine, DateTime, ForeignKey, Integer, + MetaData, String, Table, Text, +) +from sqlalchemy.engine import url +from sqlalchemy.engine.url import make_url +from sqlalchemy.orm import relationship, sessionmaker, subqueryload +from sqlalchemy.orm.session import make_transient +from sqlalchemy.pool import NullPool +from sqlalchemy.schema import UniqueConstraint +from sqlalchemy.sql import select, text +from sqlalchemy_utils import EncryptedType +import sqlparse + +from superset import app, db, db_engine_specs, security_manager +from superset.connectors.connector_registry import ConnectorRegistry +from superset.legacy import update_time_range +from superset.models.helpers import AuditMixinNullable, ImportExportMixin +from superset.models.tags import ChartUpdater, DashboardUpdater, FavStarUpdater +from superset.models.user_attributes import UserAttribute +from superset.utils import ( + cache as cache_util, + core as utils, +) +from superset.viz import viz_types +from urllib import parse # noqa + +config = app.config +custom_password_store = config.get('SQLALCHEMY_CUSTOM_PASSWORD_STORE') +stats_logger = config.get('STATS_LOGGER') +log_query = config.get('QUERY_LOGGER') +metadata = Model.metadata # pylint: disable=no-member + +PASSWORD_MASK = 'X' * 10 + + +# My setup +dashboard_ids=[1] +dashboard_titles=[] +export_data=True +export_data_dir='.' + +session = db.session() + + +# superset.utils.dashboard_import_export imports +from superset.models.core import Dashboard + + +# superset.models.core.export_dashboards internals +copied_dashboards = [] +datasource_ids = set() +for dashboard_id in dashboard_ids: + # make sure that dashboard_id is an integer + dashboard_id = int(dashboard_id) + copied_dashboard = ( + db.session.query(Dashboard) + .options(subqueryload(Dashboard.slices)) + .filter_by(id=dashboard_id).first() + ) + make_transient(copied_dashboard) + for slc in copied_dashboard.slices: + datasource_ids.add((slc.datasource_id, slc.datasource_type)) + # add extra params for the import + slc.alter_params( + remote_id=slc.id, + datasource_name=slc.datasource.name, + schema=slc.datasource.name, + database_name=slc.datasource.database.name, + ) + copied_dashboard.alter_params(remote_id=dashboard_id) + copied_dashboards.append(copied_dashboard) + + eager_datasources = [] + for dashboard_id, dashboard_type in datasource_ids: + eager_datasource = ConnectorRegistry.get_eager_datasource( + db.session, dashboard_type, dashboard_id) + eager_datasource.alter_params( + remote_id=eager_datasource.id, + database_name=eager_datasource.database.name, + ) + make_transient(eager_datasource) + eager_datasources.append(eager_datasource) \ No newline at end of file diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index ef0881162b3e..878cf4f2bfe2 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -106,7 +106,8 @@ def import_example_dashboard(session, import_example_json, data_blob_urls, def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data=False, export_data_dir=None, description=None, - export_title=None, _license='Apache 2.0', strip_database=False): + export_title=None, _license='Apache 2.0', url=None, + strip_database=False): """Returns all dashboards metadata as a json dump""" logging.info('Starting export') export_dashboard_ids = [] @@ -131,6 +132,8 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data['description']['title'] = export_title if description: data['description']['description'] = description + if url: + data['description']['url'] = url data['description']['license'] = _license export_json = json.dumps(data, indent=4, sort_keys=True) From 1ba0bf65e1ddaa7a441db0d50d9a32c85a077ec7 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 9 Jun 2019 15:42:25 -0700 Subject: [PATCH 062/107] Changed config from examples db to main/superset --- superset/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index dd6c3d5fe616..0ddb0fd7ea67 100644 --- a/superset/config.py +++ b/superset/config.py @@ -81,7 +81,7 @@ # SQLALCHEMY_DATABASE_URI = 'postgresql://root:password@localhost/myapp' # The SQLAlchemy connection string for incoming examples -SQLALCHEMY_EXAMPLES_URI = 'sqlite:///' + os.path.join(DATA_DIR, 'examples.db') +SQLALCHEMY_EXAMPLES_URI = 'sqlite:///' + os.path.join(DATA_DIR, 'superset.db') # SQLALCHEMY_EXAMPLES_URI = 'mysql://myapp@localhost/examples' # SQLALCHEMY_EXAMPLES_URI = 'postgresql://root:password@localhost/examples' From a7b1e9218b2856ca1599af7c1d853cbb7c1779f7 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 9 Jun 2019 15:42:58 -0700 Subject: [PATCH 063/107] Substitute example db utilities added --- superset/utils/dashboard_import_export.py | 31 ++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 878cf4f2bfe2..e549df39febc 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -18,16 +18,18 @@ import json import logging import os +import re import shutil import tempfile import time import pandas as pd import requests +from sqlalchemy.engine.url import make_url -from superset import db +from superset import app, db from superset.exceptions import SupersetException -from superset.models.core import Dashboard +from superset.models.core import Dashboard, Database from superset.utils.core import decode_dashboards, get_or_create_example_db, \ get_or_create_main_db @@ -67,12 +69,35 @@ def import_dashboards(session, data_stream, import_time=None): session.commit() +def get_db_name(uri): + """Get the DB name from the URI string""" + db_name = make_url(uri).database + if uri.startswith('sqlite'): + db_name = re.match('(?s:.*)/(.+?).db$', db_name).group(1) + return db_name + + +def get_default_example_db(): + """Get the optional substitute database for example import""" + uri = app.config.get('SQLALCHEMY_EXAMPLES_URI') + db_name = get_db_name(uri) + + return db.session.query(Database).filter_by( + database_name=db_name).one() + + def import_example_dashboard(session, import_example_json, data_blob_urls, database_uri, import_time=None): """Imports dashboards from a JSON string and data files to databases""" data = json.loads(import_example_json, object_hook=decode_dashboards) - # TODO: import DRUID datasources + substitute_db_name = get_db_name(database_uri) or \ + get_default_example_db().database_name + + for table in data['datasources']: + type(table).import_obj(table, import_time=import_time, + substitute_db_name=substitute_db_name) + session.commit() for dashboard in data['dashboards']: Dashboard.import_obj( From 44d79e4cfecd720b4fc129c53013ebd83796b70d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 9 Jun 2019 15:46:08 -0700 Subject: [PATCH 064/107] SqlaTable.import_obj() accepts a substitute_db_name --- superset/connectors/sqla/models.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 138b0e5d5cd6..b152cad388ca 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -934,13 +934,14 @@ def fetch_metadata(self): db.session.commit() @classmethod - def import_obj(cls, i_datasource, import_time=None): + def import_obj(cls, i_datasource, import_time=None, substitute_db_name=None): """Imports the datasource from the object to the database. Metrics and columns and datasource will be overrided if exists. This function can be used to import/export dashboards between multiple superset instances. Audit metadata isn't copies over. """ + def lookup_sqlatable(table): return db.session.query(SqlaTable).join(Database).filter( SqlaTable.table_name == table.table_name, @@ -949,11 +950,13 @@ def lookup_sqlatable(table): ).first() def lookup_database(table): + db_name = substitute_db_name or table.params_dict['database_name'] return db.session.query(Database).filter_by( - database_name=table.params_dict['database_name']).one() + database_name=db_name).one() + return import_datasource.import_datasource( - db.session, i_datasource, lookup_database, lookup_sqlatable, - import_time) + db.session, i_datasource, lookup_database, + lookup_sqlatable, import_time) @classmethod def query_datasources_by_name( From e8ba1973dfa7924b6839aaec44e1cf5007b5aee8 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 9 Jun 2019 15:55:18 -0700 Subject: [PATCH 065/107] Substitute/debug "main" for superset/examples DB --- superset/utils/dashboard_import_export.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index e549df39febc..b42d4943bb4f 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -74,6 +74,8 @@ def get_db_name(uri): db_name = make_url(uri).database if uri.startswith('sqlite'): db_name = re.match('(?s:.*)/(.+?).db$', db_name).group(1) + if db_name == 'superset': + db_name = 'main' return db_name @@ -93,8 +95,10 @@ def import_example_dashboard(session, import_example_json, data_blob_urls, substitute_db_name = get_db_name(database_uri) or \ get_default_example_db().database_name - + for table in data['datasources']: + logging.debug( + f'Importing table: {table} in substitute_db_name: {substitute_db_name}') type(table).import_obj(table, import_time=import_time, substitute_db_name=substitute_db_name) From 578806b79934cddfec75dc6ae509b40fd776e1e1 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 9 Jun 2019 16:20:57 -0700 Subject: [PATCH 066/107] Rolled back datasource substitution from main->examples db --- superset/connectors/sqla/models.py | 3 ++- superset/utils/dashboard_import_export.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index b152cad388ca..fecb8a2f39d6 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -950,7 +950,8 @@ def lookup_sqlatable(table): ).first() def lookup_database(table): - db_name = substitute_db_name or table.params_dict['database_name'] + # db_name = substitute_db_name or table.params_dict['database_name'] + db_name = table.params_dict['database_name'] return db.session.query(Database).filter_by( database_name=db_name).one() diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index b42d4943bb4f..4bf65c6caf63 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -74,8 +74,6 @@ def get_db_name(uri): db_name = make_url(uri).database if uri.startswith('sqlite'): db_name = re.match('(?s:.*)/(.+?).db$', db_name).group(1) - if db_name == 'superset': - db_name = 'main' return db_name From c50dbe153019b38f974cc7c50af4a5db6c914f96 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sun, 9 Jun 2019 16:43:41 -0700 Subject: [PATCH 067/107] Flask-AppBuilder>=2.1.4 --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8e13031cae98..467a6ff1eadb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,8 +23,7 @@ croniter==0.3.29 cryptography==2.4.2 decorator==4.3.0 # via retry defusedxml==0.5.0 # via python3-openid -# Flask-AppBuilder>=2.1.4 --e git+git://github.com/dpgaspar/Flask-AppBuilder.git@master#egg=Flask-AppBuilder +Flask-AppBuilder>=2.1.4 flask-babel==0.11.1 # via flask-appbuilder flask-caching==1.4.0 flask-compress==1.4.0 From e4197dccb0bd26c44d4534ff2d035fc8553c3e77 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 10 Jun 2019 11:58:38 -0700 Subject: [PATCH 068/107] Using main db instead of examples db --- superset/utils/dashboard_import_export.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 4bf65c6caf63..f590a5ee4560 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -91,8 +91,10 @@ def import_example_dashboard(session, import_example_json, data_blob_urls, """Imports dashboards from a JSON string and data files to databases""" data = json.loads(import_example_json, object_hook=decode_dashboards) + # substitute_db_name = get_db_name(database_uri) or \ + # get_default_example_db().database_name substitute_db_name = get_db_name(database_uri) or \ - get_default_example_db().database_name + get_or_create_main_db().database_name for table in data['datasources']: logging.debug( @@ -106,7 +108,8 @@ def import_example_dashboard(session, import_example_json, data_blob_urls, dashboard, import_time=import_time) if len(data['files']) > 0: - examples_engine = get_or_create_example_db(database_uri) + # examples_engine = get_or_create_example_db(database_uri) + examples_engine = get_or_create_main_db() with tempfile.TemporaryDirectory() as tmpdir: for file_info in data['files']: From f86f302a73e1e11d38b02c48dbbc5ac9336b051f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 10 Jun 2019 18:56:16 -0700 Subject: [PATCH 069/107] Added "superset export remove" and associated unit test. Also unit test for "superset example import" --- superset/cli.py | 68 ++++++++++++- superset/utils/dashboard_import_export.py | 51 +++++++++- tests/cli_tests.py | 112 ++++++++++++++++++---- 3 files changed, 206 insertions(+), 25 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index d7a9a2989d15..9ddcf8b08654 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -33,9 +33,11 @@ from superset import ( app, appbuilder, data, db, security_manager, ) +from superset.connectors.connector_registry import ConnectorRegistry from superset.data.helpers import download_url_to_blob_url, get_examples_file_list, \ get_examples_uris, list_examples_table from superset.exceptions import DashboardNotFoundException, ExampleNotFoundException +from superset.models.core import Dashboard from superset.utils import ( core as utils, dashboard_import_export, dict_import_export) @@ -58,7 +60,7 @@ def make_shell_context(): def init(): """Inits the Superset application""" utils.get_or_create_main_db() - utils.get_or_create_example_db() + # utils.get_or_create_example_db() appbuilder.add_permissions(update_perms=True) security_manager.sync_role_definitions() @@ -313,9 +315,69 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): @click.option( '--database-uri', '-d', help='Database URI to remove example from', default=config.get('SQLALCHEMY_EXAMPLES_URI')) -def remove_example(example_title, database_uri): +@click.option( + '--examples-repo', '-r', + help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', + default=None) +@click.option( + '--examples-tag', '-r', + help='Tag or branch of Github repository containing examples. Defaults to \'master\'', + default='master') +def remove_example(example_title, database_uri, examples_repo, examples_tag): """Remove an example dashboard/dataset""" - pass + + # First fetch the example information from Github + examples_repos = [(examples_repo, examples_tag)] \ + if examples_repo else config.get('EXAMPLE_REPOS_TAGS') + examples_repos_uris = [(r[0], r[1]) + get_examples_uris(r[0], r[1]) + for r in examples_repos] + examples_files = get_examples_file_list(examples_repos_uris) + + # Github authentication via a Personal Access Token for rate limit problems + headers = None + token = config.get('GITHUB_AUTH_TOKEN') + if token: + headers = {'Authorization': 'token %s' % token} + + # temporary - substitute url provided + db_name = 'superset' + + import_example_data = None + for example_file in examples_files: + + metadata_download_url = example_file['metadata_file']['download_url'] + example_metadata_json = requests.get(metadata_download_url, + headers=headers).content + # Cheaply load json without generating objects + example_metadata = json.loads(example_metadata_json) + if example_metadata['description']['title'] == example_title: + import_example_data = json.loads(example_metadata_json) + logging.info( + f"Will remove example '{example_title}' from '{db_name}'") + break + + logging.debug(import_example_data['files']) + + # Get the dashboard and associated records + dashboard_title = \ + import_example_data['dashboards'][0]['__Dashboard__']['dashboard_title'] + logging.debug(f'Got dashboard title {dashboard_title} for removal...') + + utils.get_or_create_main_db() + session = db.session() + + try: + dashboard_import_export.remove_dashboard( + session, + import_example_data, + dashboard_title, + database_uri=database_uri + ) + except DashboardNotFoundException as e: + logging.exception(e) + click.echo(click.style( + f'Example {example_title} associated dashboard {dashboard_title} not found!', + fg='red')) @app.cli.command() diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index f590a5ee4560..615518a52d04 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -26,12 +26,16 @@ import pandas as pd import requests from sqlalchemy.engine.url import make_url +from sqlalchemy.exc import ResourceClosedError +from sqlalchemy.orm.exc import NoResultFound from superset import app, db -from superset.exceptions import SupersetException +from superset.connectors.connector_registry import ConnectorRegistry +from superset.exceptions import DashboardNotFoundException from superset.models.core import Dashboard, Database -from superset.utils.core import decode_dashboards, get_or_create_example_db, \ - get_or_create_main_db +from superset.utils.core import ( + decode_dashboards, get_or_create_main_db, +) def import_dashboards(session, data_stream, import_time=None): @@ -153,7 +157,7 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, data = {} if not export_dashboard_ids: logging.error('No dashboards found!') - raise SupersetException('No dashboards found!') + raise DashboardNotFoundException('No dashboards found!') else: data = Dashboard.export_dashboards(export_dashboard_ids, export_data, export_data_dir) @@ -189,3 +193,42 @@ def get_slug(session, dashboard_id=None, dashboard_title=None): dashboard = query.first() slug = getattr(dashboard, 'slug', None) return slug + + +def remove_dashboard(session, import_example_data, dashboard_title, + database_uri=None): + """Remove a dashboard based on id or title""" + + session = db.session() if not session else session + logging.debug(session.query(Dashboard).all()) + + try: + dashboard = session.query(Dashboard).filter( + Dashboard.dashboard_title == dashboard_title, + ).one() + + session.delete(dashboard) + session.commit() + except NoResultFound: + raise DashboardNotFoundException('Dashboard not found!') + + # Remove the associated table metadata + SqlaTable = ConnectorRegistry.sources['table'] + for f in import_example_data['files']: + t = session.query(SqlaTable).filter( + SqlaTable.table_name == f['table_name'] + ).one() + session.delete(t) + session.commit() + + # Now delete the physical data table + # exampled_engine = get_or_create_example_db(database_uri) + examples_engine = get_or_create_main_db() + + try: + pd.read_sql( + f"DROP TABLE {f['table_name']}", + examples_engine.get_sqla_engine(), + ) + except (AttributeError, ResourceClosedError): + pass diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 0d388c0f1aa9..d9424e5048c6 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -6,7 +6,12 @@ import tarfile import tempfile -from superset import app, cli +import pandas as pd + +from superset import app, cli, db +from superset.connectors.connector_registry import ConnectorRegistry +from superset.models.core import Dashboard, Database +from superset.utils.dashboard_import_export import get_or_create_main_db from tests.base_tests import SupersetTestCase config = app.config @@ -82,20 +87,18 @@ def test_examples_menu(self): def test_examples_list(self): """Test `superset examples list`""" - result = self.runner.invoke( - app.cli, ['examples', 'list']) + result = self.runner.invoke(app.cli, ['examples', 'list']) - print("results.output", result.output) found = False for i, line in enumerate(result.output.split('\n')): # skip header if i < 3: continue # Odd lines have data - if (i % 2) != 1: + if (i % 2) != 0: row = line[1:-1] parts = [i.strip() for i in row.split('|')] - if parts[0] == 'World Bank Health Information': + if parts[0] == 'World Bank Health Nutrition and Population Stats': found = True # Did we find the example in the list? @@ -103,16 +106,81 @@ def test_examples_list(self): def test_examples_import(self): """Test `superset examples import`""" - result = self.runner.invoke( + self.runner.invoke( app.cli, [ - 'examples', 'import', - ] + 'examples', 'import', '-e', + 'World Bank Health Nutrition and Population Stats', + ], ) + # Did the dashboard get imported to the main DB? + dashboard = db.session.query(Dashboard).filter( + Dashboard.dashboard_title.in_(["World's Bank Data"])).one() + self.assertEqual(dashboard.dashboard_title, "World's Bank Data") + + # Temporary - substitute default + db_name = 'main' + + # Did the data table get imported? + SqlaTable = ConnectorRegistry.sources['table'] + table = ( + db.session.query(SqlaTable) + .join(Database) + .filter( + Database.database_name == db_name and + SqlaTable.table_name == 'wb_health_population') + ).one() + print('table', table) + self.assertEqual(table.name, 'wb_health_population') + + # Did all rows get imported? + df = pd.read_sql('SELECT * FROM wb_health_population', + get_or_create_main_db().get_sqla_engine()) + self.assertEqual(len(df.index), 11770) + def test_examples_remove(self): """Test `superset examples remove`""" - pass + # First add the example... + self.runner.invoke( + app.cli, + [ + 'examples', 'import', '-e', + 'World Bank Health Nutrition and Population Stats', + ], + ) + + # Then remove the example... + self.runner.invoke( + app.cli, + [ + 'examples', 'remove', '-e', + 'World Bank Health Nutrition and Population Stats', + ], + ) + + # Is the dashboard still in the main db? + total = db.session.query(Dashboard).filter( + Dashboard.dashboard_title.in_(["World's Bank Data"])).count() + logging.debug('total 1') + logging.debug(total) + self.assertEqual(total, 0) + + # Is the data table gone? + db_name = 'main' + + # Did the data table get removed? + SqlaTable = ConnectorRegistry.sources['table'] + total = ( + db.session.query(SqlaTable) + .join(Database) + .filter( + Database.database_name == db_name and + SqlaTable.table_name == 'wb_health_population') + ).count() + logging.debug('total 2') + logging.debug(total) + self.assertEqual(total, 0) def test_examples_export(self): """Test `superset examples export`""" @@ -120,10 +188,19 @@ def test_examples_export(self): result = self.runner.invoke( app.cli, [ - 'examples', 'export', '--dashboard-title', 'World\'s Bank Data', - '--description', - 'World Bank Data example about world health populations from 1960-2010.', - '--example-title', 'World Bank Health Information', + 'examples', 'export', '-e', + 'World Bank Health Nutrition and Population Stats', '-t', + "World's Bank Data", '-d', + 'Health Nutrition and Population Statistics database provides key ' + + 'health, nutrition and population statistics gathered from a ' + + 'variety of international and national sources. Themes include ' + + 'global surgery, health financing, HIV/AIDS, immunization, ' + + 'infectious diseases, medical resources and usage, noncommunicable ' + + 'diseases, nutrition, population dynamics, reproductive health, ' + + 'universal health coverage, and water and sanitation.', + '-l', 'Apache 2.0', '-u', + 'https://datacatalog.worldbank.org/dataset/' + + 'health-nutrition-and-population-statistics', ]) logging.info(result.output) @@ -140,11 +217,10 @@ def test_examples_export(self): json_f = open(f'{world_health_path}/dashboard.json', 'r') dashboard = json.loads(json_f.read()) desc = dashboard['description'] - self.assertEqual(desc['title'], 'World Bank Health Information') self.assertEqual( - desc['description'], - 'World Bank Data example about world health populations from 1960-2010.', - ) + desc['title'], 'World Bank Health Nutrition and Population Stats') + self.assertEqual( + desc['description'][0:30], 'Health Nutrition and Populatio') # Check the data export by writing out the tarball, getting the file size # and comparing to the metadata size From eeec06bb059932fddc636888924b04433b0edf2d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 07:46:51 -0700 Subject: [PATCH 070/107] From PTable --> tabulate for "superset examples list" --- requirements.txt | 2 +- setup.py | 1 + superset/data/helpers.py | 13 ++++++++----- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 242d85f0a819..6e98ede74741 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,7 +55,6 @@ pandas==0.23.4 parsedatetime==2.0.0 pathlib2==2.3.0 polyline==1.3.2 -PTable==0.9.2 prison==0.1.0 # via flask-appbuilder py==1.7.0 # via retry pycparser==2.19 # via cffi @@ -77,6 +76,7 @@ six==1.11.0 # via bleach, cryptography, flask-jwt-extended, flask- sqlalchemy-utils==0.33.11 sqlalchemy==1.3.1 sqlparse==0.2.4 +tabulate==0.8.3 urllib3==1.24.3 # via requests, selenium vine==1.1.4 # via amqp webencodings==0.5.1 # via bleach diff --git a/setup.py b/setup.py index e109c6362a1a..cade8bc15479 100644 --- a/setup.py +++ b/setup.py @@ -105,6 +105,7 @@ def get_git_sha(): 'sqlalchemy>=1.3.1,<2.0', 'sqlalchemy-utils>=0.33.2', 'sqlparse', + 'tabulate>=0.8.3', 'wtforms-json', ], extras_require={ diff --git a/superset/data/helpers.py b/superset/data/helpers.py index 8ea2efaee444..1092827bf826 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + """Loads datasets, dashboards and slices in a new superset instance""" # pylint: disable=C,R,W from datetime import datetime @@ -23,8 +24,8 @@ import re import zlib -from prettytable import PrettyTable import requests +from tabulate import tabulate from superset import app, db from superset.connectors.connector_registry import ConnectorRegistry @@ -141,9 +142,10 @@ def get_examples_file_list(examples_repos_uris, examples_tag='master'): def list_examples_table(examples_repo, examples_tag='master', full_fields=True): """Turn a list of available examples into a PrettyTable""" + # Write a pretty table to stdout - t = PrettyTable(field_names=['Title', 'Description', 'Size (MB)', 'Rows', - 'Files', 'URL', 'Created Date', 'Repository', 'Tag']) + headers = ['Title', 'Description', 'Size (MB)', 'Rows', + 'Files', 'URL', 'Created Date', 'Repository', 'Tag'] # Optionally replace the default examples repo with a specified one examples_repos_uris = [(r[0], r[1]) + get_examples_uris(r[0], r[1]) @@ -169,6 +171,7 @@ def date_format(iso_date): dt = datetime.strptime(iso_date, '%Y-%m-%dT%H:%M:%S.%f') return dt.isoformat() + rows = [] for file_info in file_info_list: d = json.loads( @@ -192,6 +195,6 @@ def date_format(iso_date): file_info['repo_name'], file_info['repo_tag'], ] - t.add_row(row) + rows.append(row) - return t + return '\n' + tabulate(rows, headers=headers) + '\n' From 23ea4aa127f2385c98ba9aebfe3f47e78599ff94 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 09:33:38 -0700 Subject: [PATCH 071/107] Now using a SQLAlchemy Model class to remove data table, assuming an "id" column. --- superset/utils/dashboard_import_export.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 615518a52d04..b91b1e2cc470 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -23,10 +23,12 @@ import tempfile import time +from flask_appbuilder import Model import pandas as pd import requests +from sqlalchemy import Column, Integer, MetaData from sqlalchemy.engine.url import make_url -from sqlalchemy.exc import ResourceClosedError +from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm.exc import NoResultFound from superset import app, db @@ -195,8 +197,8 @@ def get_slug(session, dashboard_id=None, dashboard_title=None): return slug -def remove_dashboard(session, import_example_data, dashboard_title, - database_uri=None): +def remove_dashboard(session, import_example_data, dashboard_title, database_uri=None, + primary_key=Column('id', Integer, primary_key=True)): """Remove a dashboard based on id or title""" session = db.session() if not session else session @@ -224,11 +226,12 @@ def remove_dashboard(session, import_example_data, dashboard_title, # Now delete the physical data table # exampled_engine = get_or_create_example_db(database_uri) examples_engine = get_or_create_main_db() + sqla_engine = examples_engine.get_sqla_engine() - try: - pd.read_sql( - f"DROP TABLE {f['table_name']}", - examples_engine.get_sqla_engine(), - ) - except (AttributeError, ResourceClosedError): - pass + # Create a model class on the fly to do a cross-platform table drop + class DropTable(Model): + __tablename__ = f['table_name'] + id = primary_key + + table = DropTable() + table.__table__.drop(sqla_engine) From ff5d44d79f0ff88cc6a9138d9a2fb1f7d3b66c5c Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 09:40:07 -0700 Subject: [PATCH 072/107] Fixed head for examples database migration --- .../migrations/versions/e5200a951e62_add_dashboards_uuid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index 0e46763c59d4..c8200eb38ab0 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -31,7 +31,7 @@ # revision identifiers, used by Alembic. revision = 'e5200a951e62' -down_revision = 'afc69274c25a' +down_revision = 'd7c1a0d6f2da' Base = declarative_base() From 2f0a26071aefeb30e0fe61d33507bd01bb77ab81 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 12:38:17 -0700 Subject: [PATCH 073/107] Made migration work for postgresql with its builtin UUID type --- .../e5200a951e62_add_dashboards_uuid.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index c8200eb38ab0..0ed98af3956a 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -22,8 +22,9 @@ """ import uuid + from alembic import op -from sqlalchemy import Column, Integer, CHAR +from sqlalchemy import CHAR, Column, Integer from sqlalchemy.ext.declarative import declarative_base from sqlalchemy_utils.types.uuid import UUIDType @@ -36,58 +37,70 @@ Base = declarative_base() -get_uuid = lambda: str(uuid.uuid4()) +def get_uuid(): + return str(uuid.uuid4()) + class Dashboard(Base): __tablename__ = 'dashboards' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class Datasource(Base): __tablename__ = 'datasources' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class Database(Base): __tablename__ = 'dbs' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class DruidCluster(Base): __tablename__ = 'clusters' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class DruidMetric(Base): __tablename__ = 'metrics' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) - + + class Slice(Base): __tablename__ = 'slices' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class SqlaTable(Base): __tablename__ = 'tables' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class SqlMetric(Base): __tablename__ = 'sql_metrics' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class TableColumn(Base): __tablename__ = 'table_columns' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class DashboardEmailSchedule(Base): __tablename__ = 'dashboard_email_schedules' id = Column(Integer, primary_key=True) uuid = Column(UUIDType(binary=False), default=get_uuid) + class SliceEmailSchedule(Base): __tablename__ = 'slice_email_schedules' id = Column(Integer, primary_key=True) @@ -97,6 +110,7 @@ class SliceEmailSchedule(Base): def upgrade(): bind = op.get_bind() session = db.Session(bind=bind) + db_type = session.bind.dialect.name def add_uuid_column(col_name, _type): """Add a uuid column to a given table""" @@ -105,10 +119,13 @@ def add_uuid_column(col_name, _type): for s in session.query(_type): s.uuid = get_uuid() session.merge(s) - with op.batch_alter_table(col_name) as batch_op: - batch_op.alter_column('uuid', existing_type=CHAR(32), - new_column_name='uuid', nullable=False) - batch_op.create_unique_constraint('uq_uuid', ['uuid']) + + if db_type != 'postgresql': + with op.batch_alter_table(col_name) as batch_op: + batch_op.alter_column('uuid', existing_type=CHAR(32), + new_column_name='uuid', nullable=False) + batch_op.create_unique_constraint('uq_uuid', ['uuid']) + session.commit() add_uuid_column('dashboards', Dashboard) @@ -125,6 +142,7 @@ def add_uuid_column(col_name, _type): session.close() + def downgrade(): with op.batch_alter_table('dashboards') as batch_op: batch_op.drop_column('uuid') @@ -134,19 +152,19 @@ def downgrade(): with op.batch_alter_table('dbs') as batch_op: batch_op.drop_column('uuid') - + with op.batch_alter_table('clusters') as batch_op: batch_op.drop_column('uuid') - + with op.batch_alter_table('metrics') as batch_op: batch_op.drop_column('uuid') with op.batch_alter_table('slices') as batch_op: batch_op.drop_column('uuid') - + with op.batch_alter_table('sql_metrics') as batch_op: batch_op.drop_column('uuid') - + with op.batch_alter_table('tables') as batch_op: batch_op.drop_column('uuid') From 9d46442aa3adcda754c685e1c575d40a7c350043 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 12:49:20 -0700 Subject: [PATCH 074/107] typo fix --- superset/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/cli.py b/superset/cli.py index 9ddcf8b08654..c5b5ca956cb4 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -173,7 +173,7 @@ def examples(): '--url', '-u', default=None, help='URL of dataset home page') def export_example(dashboard_id, dashboard_title, description, example_title, file_name, _license, url): - """Exmport example dashboard/datasets tarball""" + """Export example dashboard/datasets tarball""" if not (dashboard_id or dashboard_title): raise click.UsageError('must supply --dashboard-id/-i or --dashboard-title/-t') exclusive( From d4adbce639c6ee5c1ce37b8d34d1e13bdb9e72de Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 12:54:47 -0700 Subject: [PATCH 075/107] flake8 fixes --- superset/utils/dashboard_import_export.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index b91b1e2cc470..f7816199c405 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -26,9 +26,8 @@ from flask_appbuilder import Model import pandas as pd import requests -from sqlalchemy import Column, Integer, MetaData +from sqlalchemy import Column, Integer from sqlalchemy.engine.url import make_url -from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm.exc import NoResultFound from superset import app, db From 4f134dccf07b370f5f04fb8e61576164aa71c673 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 12:55:52 -0700 Subject: [PATCH 076/107] flake8 fixes --- superset/cli.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index c5b5ca956cb4..e03176a81739 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -33,13 +33,14 @@ from superset import ( app, appbuilder, data, db, security_manager, ) -from superset.connectors.connector_registry import ConnectorRegistry -from superset.data.helpers import download_url_to_blob_url, get_examples_file_list, \ - get_examples_uris, list_examples_table +from superset.data.helpers import ( + download_url_to_blob_url, get_examples_file_list, get_examples_uris, + list_examples_table, +) from superset.exceptions import DashboardNotFoundException, ExampleNotFoundException -from superset.models.core import Dashboard from superset.utils import ( - core as utils, dashboard_import_export, dict_import_export) + core as utils, dashboard_import_export, dict_import_export, +) logging.getLogger("urllib3").setLevel(logging.WARNING) From 7af3337eee7e309f874b469afe53ea027a1a2045 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 14:18:34 -0700 Subject: [PATCH 077/107] Changed ImportExportMixin.export_to_json to export_to_json_serializable --- superset/models/core.py | 4 ++-- superset/models/helpers.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 1f54febd05c0..834ed902235b 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -708,8 +708,8 @@ def export_dashboards(cls, dashboard_ids, export_data=False, return { 'description': desc, - 'dashboards': [o.export_to_json() for o in copied_dashboards], - 'datasources': [o.export_to_json() for o in eager_datasources], + 'dashboards': [o.export_to_json_serializable() for o in copied_dashboards], + 'datasources': [o.export_to_json_serializable() for o in eager_datasources], 'files': files, } diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 96fc02b1035a..1f1cc9d49b13 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -191,8 +191,8 @@ def import_from_dict(cls, session, dict_rep, parent=None, return obj - def export_to_json(self, recursive=True): - """Export obj to json""" + def export_to_json_serializable(self, recursive=True): + """Export obj to be serializable in json""" return DashboardEncoder.encode(self) def export_to_dict(self, recursive=True, include_parent_ref=False, From 19960dc730de51b9f33d43f834f7405a44b66d4d Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 14:22:14 -0700 Subject: [PATCH 078/107] Moved repo from rjurney/examples-data to apache-superset/examples-data --- superset/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index 907ff9318d77..786d36fbbb27 100644 --- a/superset/config.py +++ b/superset/config.py @@ -644,7 +644,7 @@ class CeleryConfig(object): # Tuple format: Gitub repo full name, tag/branch EXAMPLE_REPOS_TAGS = [ - ('rjurney/examples-data', 'v0.0.6'), + ('apache-superset/examples-data', 'v0.0.6'), ] # Github Authorization Token - in case the examples commands exceed rate limits From 33cd5942d9c76abc26bc50d45a7eef573918ec8f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 14:25:08 -0700 Subject: [PATCH 079/107] Removes exception handling in ImportExportMixin.DashboardEncoder --- superset/utils/core.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index a8a7cf4cae5e..8318f5aabf1a 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -289,27 +289,23 @@ class DashboardEncoder(): # pylint: disable=E0202 @classmethod def encode(cls, o): - try: - if isinstance(o, uuid.UUID): - return str(o) - if isinstance(o, datetime): - return {'__datetime__': o.replace(microsecond=0).isoformat()} - if isinstance(o, list): - return [DashboardEncoder.encode(i) for i in o] - if hasattr(o, '__dict__'): - vals = {} - for k, v in o.__dict__.items(): - if k == '_sa_instance_state': - continue - elif k.startswith('json') or k.endswith('json'): - vals[k] = v - else: - vals[k] = DashboardEncoder.encode(v) - return {'__{}__'.format(o.__class__.__name__): vals} - else: - return o - except Exception as e: - logging.exception(e) + if isinstance(o, uuid.UUID): + return str(o) + if isinstance(o, datetime): + return {'__datetime__': o.replace(microsecond=0).isoformat()} + if isinstance(o, list): + return [DashboardEncoder.encode(i) for i in o] + if hasattr(o, '__dict__'): + vals = {} + for k, v in o.__dict__.items(): + if k == '_sa_instance_state': + continue + elif k.startswith('json') or k.endswith('json'): + vals[k] = v + else: + vals[k] = DashboardEncoder.encode(v) + return {'__{}__'.format(o.__class__.__name__): vals} + else: return o From d0a7b2426cf4a19cd5cda3cb593f88c6515a7667 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 14:59:10 -0700 Subject: [PATCH 080/107] Removed duplicate code in dashoard_import_export.py between example import/dashboard import --- superset/cli.py | 13 ++- superset/utils/dashboard_import_export.py | 134 ++++++++++------------ 2 files changed, 69 insertions(+), 78 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index e03176a81739..cf8dc2740079 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -42,7 +42,7 @@ core as utils, dashboard_import_export, dict_import_export, ) -logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger('urllib3').setLevel(logging.WARNING) config = app.config celery_app = utils.get_celery_app(config) @@ -300,13 +300,14 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): data_blob_urls[github_info['name']] = blob_url try: - dashboard_import_export.import_example_dashboard( + dashboard_import_export.import_dashboards( db.session, import_example_json, - data_blob_urls, - database_uri) + is_example=True, + data_blob_urls=data_blob_urls, + database_uri=database_uri) except Exception as e: - logging.error(f'Error importing example dashboard \'{example_title}\'!') + logging.error(f"Error importing example dashboard '{example_title}'!") logging.exception(e) @@ -430,7 +431,7 @@ def import_dashboards(path, recursive): try: with f.open() as data_stream: dashboard_import_export.import_dashboards( - db.session, data_stream) + db.session, data_stream.read()) except Exception as e: logging.error('Error when importing dashboard from file %s', f) logging.error(e) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index f7816199c405..cfbbf043b4e7 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -39,38 +39,76 @@ ) -def import_dashboards(session, data_stream, import_time=None): +def import_dashboard(session, data, import_time): + """Import a Dashboard from exported data""" + for dashboard in data['dashboards']: + Dashboard.import_obj( + dashboard, import_time=import_time) + + +def import_datasources(data, import_time, substitute_db_name=None): + """Import any data sources in Dashboard file""" + for table in data['datasources']: + type(table).import_obj(table, import_time=import_time, + substitute_db_name=substitute_db_name) + + +def table_to_sql(path, table_name, engine): + """Take a file and load it into a table""" + logging.info(f'Import data from file {path} into table {table_name}') + + df = pd.read_csv(path, parse_dates=True, infer_datetime_format=True, + compression='infer') + df.to_sql( + table_name, + engine.get_sqla_engine(), + if_exists='replace', + chunksize=500, + index=False) + + +def import_files_to_table(data, is_example=False, data_blob_urls=None): + """Import any files in this exported Dashboard""" + if isinstance(data, dict) and 'files' in data and len(data['files']) > 0: + engine = get_or_create_main_db() + + if is_example: + with tempfile.TemporaryDirectory() as tmpdir: + for file_info in data['files']: + + # Get the github info for the file + blob_file_path = f'{tmpdir}{os.path.sep}{file_info["file_name"]}' + blob_url = data_blob_urls[file_info['file_name']] + + response = requests.get(blob_url, stream=True) + with open(blob_file_path, 'wb') as out_file: + shutil.copyfileobj(response.raw, out_file) + del response + + table_to_sql(blob_file_path, file_info['table_name'], engine) + else: + for table in data['files']: + table_to_sql(table['file_name'], table['table_name'], engine) + + +def import_dashboards(session, data, is_example=False, data_blob_urls=None, + database_uri=None, import_time=None): """Imports dashboards from a stream to databases""" current_tt = int(time.time()) import_time = current_tt if import_time is None else import_time - data = json.loads(data_stream.read(), object_hook=decode_dashboards) + data = json.loads(data, object_hook=decode_dashboards) - for table in data['datasources']: - type(table).import_obj(table, import_time=import_time) + # substitute_db_name = get_db_name(database_uri) or \ + # get_default_example_db().database_name + substitute_db_name = get_db_name(database_uri) if database_uri else \ + get_or_create_main_db().database_name - # TODO: import DRUID datasources + import_dashboard(session, data, import_time) + import_datasources(data, import_time, substitute_db_name=substitute_db_name) session.commit() - for dashboard in data['dashboards']: - Dashboard.import_obj( - dashboard, import_time=import_time) - - # Import any files in this exported Dashboard - if 'files' in data: - if len(data['files']) > 0: - examples_engine = get_or_create_main_db() - for table in data['files']: - logging.info(f'Import data from file {table["file_name"]} into table ' + - f'{table["table_name"]}') - df = pd.read_csv(table['file_name'], parse_dates=True, - infer_datetime_format=True, compression='infer') - df.to_sql( - table['table_name'], - examples_engine.get_sqla_engine(), - if_exists='replace', - chunksize=500, - index=False) + import_files_to_table(data, is_example=True, data_blob_urls=data_blob_urls) session.commit() @@ -91,54 +129,6 @@ def get_default_example_db(): database_name=db_name).one() -def import_example_dashboard(session, import_example_json, data_blob_urls, - database_uri, import_time=None): - """Imports dashboards from a JSON string and data files to databases""" - data = json.loads(import_example_json, object_hook=decode_dashboards) - - # substitute_db_name = get_db_name(database_uri) or \ - # get_default_example_db().database_name - substitute_db_name = get_db_name(database_uri) or \ - get_or_create_main_db().database_name - - for table in data['datasources']: - logging.debug( - f'Importing table: {table} in substitute_db_name: {substitute_db_name}') - type(table).import_obj(table, import_time=import_time, - substitute_db_name=substitute_db_name) - - session.commit() - for dashboard in data['dashboards']: - Dashboard.import_obj( - dashboard, import_time=import_time) - - if len(data['files']) > 0: - # examples_engine = get_or_create_example_db(database_uri) - examples_engine = get_or_create_main_db() - - with tempfile.TemporaryDirectory() as tmpdir: - for file_info in data['files']: - # Get the github info for the file - blob_file_path = f'{tmpdir}{os.path.sep}{file_info["file_name"]}' - blob_url = data_blob_urls[file_info['file_name']] - - response = requests.get(blob_url, stream=True) - with open(blob_file_path, 'wb') as out_file: - shutil.copyfileobj(response.raw, out_file) - del response - - df = pd.read_csv(blob_file_path, parse_dates=True, - infer_datetime_format=True, compression='infer') - df.to_sql( - file_info['table_name'], - examples_engine.get_sqla_engine(), - if_exists='replace', - chunksize=500, - index=False) - - session.commit() - - def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_data=False, export_data_dir=None, description=None, export_title=None, _license='Apache 2.0', url=None, From e7a650968700ebdfe3aa0d18d79a7ed3fafadac8 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 15:30:29 -0700 Subject: [PATCH 081/107] Replace click.echo(click.style()) with click.secho() --- superset/cli.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index cf8dc2740079..3513af7cf74a 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -211,10 +211,10 @@ def export_example(dashboard_id, dashboard_title, description, example_title, with tarfile.open(file_name, 'w:gz') as tar: tar.add(tmp_dir_name, arcname=f'{dashboard_slug}') - click.echo(click.style(str(f'Exported example to {file_name}'), fg='blue')) + click.secho(str(f'Exported example to {file_name}'), fg='blue') except DashboardNotFoundException as e: - click.echo(click.style(str(e), fg='red')) + click.secho(str(e), fg='red') exit(1) @@ -284,7 +284,7 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): if not import_example_json: e = ExampleNotFoundException(f'Example {example_title} not found!') - click.echo(click.style(str(e), fg='red')) + click.secho(str(e), fg='red') exit(1) # Parse data to get file download_urls -> blob_urls @@ -377,9 +377,9 @@ def remove_example(example_title, database_uri, examples_repo, examples_tag): ) except DashboardNotFoundException as e: logging.exception(e) - click.echo(click.style( + click.secho( f'Example {example_title} associated dashboard {dashboard_title} not found!', - fg='red')) + fg='red') @app.cli.command() @@ -467,7 +467,7 @@ def export_dashboards(print_stdout, dashboard_file, dashboard_ids, export_data=export_data, export_data_dir=export_data_dir) except DashboardNotFoundException as e: - click.echo(click.style(str(e), fg='red')) + click.secho(str(e), fg='red') exit(1) if print_stdout or not dashboard_file: print(data) From d3e6b5231004ff9a520c450c5404819cf960286f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 15:34:38 -0700 Subject: [PATCH 082/107] From single to double quotes when quotes appear --- superset/cli.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 3513af7cf74a..e1f1b18a6316 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -221,11 +221,12 @@ def export_example(dashboard_id, dashboard_title, description, example_title, @examples.command('list') @click.option( '--examples-repo', '-r', - help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', + help="Full name of Github repository containing examples, ex: " + + "'apache-superset/examples-data'", default=None) @click.option( '--examples-tag', '-r', - help='Tag or branch of Github repository containing examples. Defaults to \'master\'', + help="Tag or branch of Github repository containing examples. Defaults to 'master'", default='master') @click.option( '--full-fields', '-ff', is_flag=True, default=False, help='Print full length fields') @@ -242,11 +243,12 @@ def _list_examples(examples_repo, examples_tag, full_fields): default=config.get('SQLALCHEMY_EXAMPLES_URI')) @click.option( '--examples-repo', '-r', - help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', + help="Full name of Github repository containing examples, ex: " + + "'apache-superset/examples-data'", default=None) @click.option( '--examples-tag', '-r', - help='Tag or branch of Github repository containing examples. Defaults to \'master\'', + help="Tag or branch of Github repository containing examples. Defaults to 'master'", default='master') @click.option( '--example-title', '-e', help='Title of example to import', required=True) @@ -279,7 +281,7 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): import_example_json = example_metadata_json import_data_info = example_file['data_files'] logging.info( - f'Will import example \'{example_title}\' from {metadata_download_url}') + f"Will import example '{example_title}' from {metadata_download_url}") break if not import_example_json: @@ -319,11 +321,12 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): default=config.get('SQLALCHEMY_EXAMPLES_URI')) @click.option( '--examples-repo', '-r', - help='Full name of Github repository containing examples, ex: \'apache-superset/examples-data\'', + help="Full name of Github repository containing examples, ex: " + + "'apache-superset/examples-data'", default=None) @click.option( '--examples-tag', '-r', - help='Tag or branch of Github repository containing examples. Defaults to \'master\'', + help="Tag or branch of Github repository containing examples. Defaults to 'master'", default='master') def remove_example(example_title, database_uri, examples_repo, examples_tag): """Remove an example dashboard/dataset""" @@ -452,10 +455,10 @@ def import_dashboards(path, recursive): help='Specify dashboard title to export') @click.option( '--export-data', '-x', default=None, is_flag=True, - help='Export the dashboard\'s data tables as CSV files.') + help="Export the dashboard's data tables as CSV files.") @click.option( '--export-data-dir', '-d', default=config.get('DASHBOARD_EXPORT_DIR'), - help='Specify export directory path. Defaults to \'/tmp\'.') + help="Specify export directory path. Defaults to '/tmp'") def export_dashboards(print_stdout, dashboard_file, dashboard_ids, dashboard_titles, export_data, export_data_dir): """Export dashboards to JSON and optionally tables to CSV""" From eef467554eb3804df6d59e3982e5f08379f1cf21 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Tue, 11 Jun 2019 15:36:09 -0700 Subject: [PATCH 083/107] removed unneeded print from unit test for cli --- tests/cli_tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index d9424e5048c6..de74af1eb826 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -131,7 +131,6 @@ def test_examples_import(self): Database.database_name == db_name and SqlaTable.table_name == 'wb_health_population') ).one() - print('table', table) self.assertEqual(table.name, 'wb_health_population') # Did all rows get imported? From 86dc7e4909719b107316ed60654ff11c84ea7116 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 13:30:45 -0700 Subject: [PATCH 084/107] Removed temporary file --- superset/dev.py | 103 ------------------------------------------------ 1 file changed, 103 deletions(-) delete mode 100644 superset/dev.py diff --git a/superset/dev.py b/superset/dev.py deleted file mode 100644 index 02c611a27824..000000000000 --- a/superset/dev.py +++ /dev/null @@ -1,103 +0,0 @@ -# superset.core.models imports -"""A collection of ORM sqlalchemy models for Superset""" -from contextlib import closing -from copy import copy, deepcopy -from datetime import datetime -import functools -import json -import logging -import os -import sys -import textwrap -from typing import List - -from flask import escape, g, Markup, request -from flask_appbuilder import Model -from flask_appbuilder.models.decorators import renders -from flask_appbuilder.security.sqla.models import User -import numpy -import pandas as pd -import sqlalchemy as sqla -from sqlalchemy import ( - Boolean, Column, create_engine, DateTime, ForeignKey, Integer, - MetaData, String, Table, Text, -) -from sqlalchemy.engine import url -from sqlalchemy.engine.url import make_url -from sqlalchemy.orm import relationship, sessionmaker, subqueryload -from sqlalchemy.orm.session import make_transient -from sqlalchemy.pool import NullPool -from sqlalchemy.schema import UniqueConstraint -from sqlalchemy.sql import select, text -from sqlalchemy_utils import EncryptedType -import sqlparse - -from superset import app, db, db_engine_specs, security_manager -from superset.connectors.connector_registry import ConnectorRegistry -from superset.legacy import update_time_range -from superset.models.helpers import AuditMixinNullable, ImportExportMixin -from superset.models.tags import ChartUpdater, DashboardUpdater, FavStarUpdater -from superset.models.user_attributes import UserAttribute -from superset.utils import ( - cache as cache_util, - core as utils, -) -from superset.viz import viz_types -from urllib import parse # noqa - -config = app.config -custom_password_store = config.get('SQLALCHEMY_CUSTOM_PASSWORD_STORE') -stats_logger = config.get('STATS_LOGGER') -log_query = config.get('QUERY_LOGGER') -metadata = Model.metadata # pylint: disable=no-member - -PASSWORD_MASK = 'X' * 10 - - -# My setup -dashboard_ids=[1] -dashboard_titles=[] -export_data=True -export_data_dir='.' - -session = db.session() - - -# superset.utils.dashboard_import_export imports -from superset.models.core import Dashboard - - -# superset.models.core.export_dashboards internals -copied_dashboards = [] -datasource_ids = set() -for dashboard_id in dashboard_ids: - # make sure that dashboard_id is an integer - dashboard_id = int(dashboard_id) - copied_dashboard = ( - db.session.query(Dashboard) - .options(subqueryload(Dashboard.slices)) - .filter_by(id=dashboard_id).first() - ) - make_transient(copied_dashboard) - for slc in copied_dashboard.slices: - datasource_ids.add((slc.datasource_id, slc.datasource_type)) - # add extra params for the import - slc.alter_params( - remote_id=slc.id, - datasource_name=slc.datasource.name, - schema=slc.datasource.name, - database_name=slc.datasource.database.name, - ) - copied_dashboard.alter_params(remote_id=dashboard_id) - copied_dashboards.append(copied_dashboard) - - eager_datasources = [] - for dashboard_id, dashboard_type in datasource_ids: - eager_datasource = ConnectorRegistry.get_eager_datasource( - db.session, dashboard_type, dashboard_id) - eager_datasource.alter_params( - remote_id=eager_datasource.id, - database_name=eager_datasource.database.name, - ) - make_transient(eager_datasource) - eager_datasources.append(eager_datasource) \ No newline at end of file From 306dacb5ed7e478c2bd7d6882508849ae1d696ee Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 13:38:54 -0700 Subject: [PATCH 085/107] Changed name of DashboardEncoder to SQLAJsonEncoder as it serializes different types beyond Dashboard --- superset/migrations/README | 2 +- superset/models/helpers.py | 4 ++-- superset/utils/core.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/superset/migrations/README b/superset/migrations/README index 98e4f9c44eff..2500aa1bcf72 100755 --- a/superset/migrations/README +++ b/superset/migrations/README @@ -1 +1 @@ -Generic single-database configuration. \ No newline at end of file +Generic single-database configuration. diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 1f1cc9d49b13..4d79082c36db 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -33,7 +33,7 @@ from sqlalchemy_utils.types.uuid import UUIDType import yaml -from superset.utils.core import DashboardEncoder, QueryStatus +from superset.utils.core import SQLAJsonEncoder, QueryStatus def json_to_dict(json_str): @@ -193,7 +193,7 @@ def import_from_dict(cls, session, dict_rep, parent=None, def export_to_json_serializable(self, recursive=True): """Export obj to be serializable in json""" - return DashboardEncoder.encode(self) + return SQLAJsonEncoder.encode(self) def export_to_dict(self, recursive=True, include_parent_ref=False, include_defaults=False): diff --git a/superset/utils/core.py b/superset/utils/core.py index 8318f5aabf1a..f0aa10708463 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -284,7 +284,7 @@ def decode_dashboards(o): return o -class DashboardEncoder(): +class SQLAJsonEncoder(): # pylint: disable=E0202 @classmethod @@ -294,7 +294,7 @@ def encode(cls, o): if isinstance(o, datetime): return {'__datetime__': o.replace(microsecond=0).isoformat()} if isinstance(o, list): - return [DashboardEncoder.encode(i) for i in o] + return [SQLAJsonEncoder.encode(i) for i in o] if hasattr(o, '__dict__'): vals = {} for k, v in o.__dict__.items(): @@ -303,7 +303,7 @@ def encode(cls, o): elif k.startswith('json') or k.endswith('json'): vals[k] = v else: - vals[k] = DashboardEncoder.encode(v) + vals[k] = SQLAJsonEncoder.encode(v) return {'__{}__'.format(o.__class__.__name__): vals} else: return o From e0cf1d48f17ec9c675d2799c333d15aa0e719230 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 13:48:40 -0700 Subject: [PATCH 086/107] Using data.get in conditional checks for files in import data --- superset/utils/dashboard_import_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index cfbbf043b4e7..d1fa90be4f3b 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -69,7 +69,7 @@ def table_to_sql(path, table_name, engine): def import_files_to_table(data, is_example=False, data_blob_urls=None): """Import any files in this exported Dashboard""" - if isinstance(data, dict) and 'files' in data and len(data['files']) > 0: + if isinstance(data, dict) and data.get('files'): engine = get_or_create_main_db() if is_example: From c5c13538c586ab8dc0efde6a1fb6a38519e92f8e Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 14:00:54 -0700 Subject: [PATCH 087/107] Check for table datasources and throw exception on druid datasources on import --- superset/utils/dashboard_import_export.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index d1fa90be4f3b..29d97b4022a8 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -32,7 +32,7 @@ from superset import app, db from superset.connectors.connector_registry import ConnectorRegistry -from superset.exceptions import DashboardNotFoundException +from superset.exceptions import DashboardNotFoundException, SupersetException from superset.models.core import Dashboard, Database from superset.utils.core import ( decode_dashboards, get_or_create_main_db, @@ -49,8 +49,11 @@ def import_dashboard(session, data, import_time): def import_datasources(data, import_time, substitute_db_name=None): """Import any data sources in Dashboard file""" for table in data['datasources']: - type(table).import_obj(table, import_time=import_time, - substitute_db_name=substitute_db_name) + if table.type == 'table': + type(table).import_obj(table, import_time=import_time, + substitute_db_name=substitute_db_name) + else: + raise SupersetException('Druid datasources not supported!') def table_to_sql(path, table_name, engine): From 7b1fa128176477f62ecb73d7677420b4a2b753a3 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 14:08:10 -0700 Subject: [PATCH 088/107] Added error handling to pd.read_csv/to_sql in example import --- superset/utils/dashboard_import_export.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 29d97b4022a8..91ebf4f605d1 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -60,14 +60,19 @@ def table_to_sql(path, table_name, engine): """Take a file and load it into a table""" logging.info(f'Import data from file {path} into table {table_name}') - df = pd.read_csv(path, parse_dates=True, infer_datetime_format=True, - compression='infer') - df.to_sql( - table_name, - engine.get_sqla_engine(), - if_exists='replace', - chunksize=500, - index=False) + try: + df = pd.read_csv(path, parse_dates=True, infer_datetime_format=True, + compression='infer') + df.to_sql( + table_name, + engine.get_sqla_engine(), + if_exists='replace', + chunksize=500, + index=False) + except (pd.errors.ParserError, pd.errors.OutOfBoundsDatetime, + pd.errors.EmptyDataError) as e: + logging.exception(e) + raise SupersetException('Error reading table into database!') def import_files_to_table(data, is_example=False, data_blob_urls=None): From f93fa1a9d61b6287eba8af9fe96c504dbf8787e3 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 15:28:49 -0700 Subject: [PATCH 089/107] Changed method of db initialization in superset.utils.dashboard_import_export.get_slug --- superset/utils/dashboard_import_export.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 91ebf4f605d1..517e92189f7c 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -171,19 +171,12 @@ def export_dashboards(session, dashboard_ids=None, dashboard_titles=None, export_json = json.dumps(data, indent=4, sort_keys=True) - # Remove datasources[].__SqlaTable__.database for example export - # if strip_database: - # parsed_json = json.loads(export_json) - # for datasource in parsed_json['datasources']: - # datasource['__SqlaTable__']['database'] = None - # export_json = '{}' # json.dumps(parsed_json, indent=4, sort_keys=True) - return export_json def get_slug(session, dashboard_id=None, dashboard_title=None): """Get the slug for the name of the directory inside the tarballed example""" - session = db.session() if not session else session + session = session or db.session() query = session.query(Dashboard) slug = None if dashboard_id or dashboard_title: From 008a146da373419cc9185cef616765577b4066ea Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 15:29:57 -0700 Subject: [PATCH 090/107] Set SQLALCHEMY_EXAMPLES_URI to default to SQLALCHEMY_DATABASE_URI --- superset/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index 786d36fbbb27..1b33b7bba5cb 100644 --- a/superset/config.py +++ b/superset/config.py @@ -81,7 +81,7 @@ # SQLALCHEMY_DATABASE_URI = 'postgresql://root:password@localhost/myapp' # The SQLAlchemy connection string for incoming examples -SQLALCHEMY_EXAMPLES_URI = 'sqlite:///' + os.path.join(DATA_DIR, 'superset.db') +SQLALCHEMY_EXAMPLES_URI = SQLALCHEMY_DATABASE_URI # SQLALCHEMY_EXAMPLES_URI = 'mysql://myapp@localhost/examples' # SQLALCHEMY_EXAMPLES_URI = 'postgresql://root:password@localhost/examples' From afc7ade79943ce402f00120543f697d4f84ce339 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 15:31:11 -0700 Subject: [PATCH 091/107] removed substitute_db_name code for now --- superset/connectors/sqla/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 16ba39f528b9..7b36d54fb64e 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -950,7 +950,6 @@ def lookup_sqlatable(table): ).first() def lookup_database(table): - # db_name = substitute_db_name or table.params_dict['database_name'] db_name = table.params_dict['database_name'] return db.session.query(Database).filter_by( database_name=db_name).one() From e4c4efc35f669b8b48d469373b9c83cabc33f1b5 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 12 Jun 2019 16:35:53 -0700 Subject: [PATCH 092/107] Removed uuid from export_fields in core model and made SQLAJsonEncoder look for and add a uuid field if present --- superset/models/core.py | 6 +++--- superset/utils/core.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 834ed902235b..107b0d4bcfca 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -164,7 +164,7 @@ class Slice(Model, AuditMixinNullable, ImportExportMixin): owners = relationship(security_manager.user_model, secondary=slice_user) export_fields = ('slice_name', 'datasource_type', 'datasource_name', - 'viz_type', 'params', 'cache_timeout', 'uuid') + 'viz_type', 'params', 'cache_timeout') def __repr__(self): return self.slice_name or str(self.id) @@ -413,7 +413,7 @@ class Dashboard(Model, AuditMixinNullable, ImportExportMixin): owners = relationship(security_manager.user_model, secondary=dashboard_user) export_fields = ('dashboard_title', 'position_json', 'json_metadata', - 'description', 'css', 'slug', 'uuid') + 'description', 'css', 'slug') def __repr__(self): return self.dashboard_title or str(self.id) @@ -749,7 +749,7 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): impersonate_user = Column(Boolean, default=False) export_fields = ('database_name', 'sqlalchemy_uri', 'cache_timeout', 'expose_in_sqllab', 'allow_run_async', - 'allow_ctas', 'allow_csv_upload', 'extra', 'uuid') + 'allow_ctas', 'allow_csv_upload', 'extra') export_children = ['tables'] def __repr__(self): diff --git a/superset/utils/core.py b/superset/utils/core.py index f0aa10708463..7da24b2cca25 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -297,6 +297,8 @@ def encode(cls, o): return [SQLAJsonEncoder.encode(i) for i in o] if hasattr(o, '__dict__'): vals = {} + if 'uuid' in o.__dict__.keys(): + vals['uuid'] = o.__dict__['uuid'] for k, v in o.__dict__.items(): if k == '_sa_instance_state': continue From 091b399dacfff33c2ea24689000a2525ff52ce87 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 13 Jun 2019 11:00:42 -0700 Subject: [PATCH 093/107] Refactored get_or_create_main_db/get_or_create_examples_db to get_or_create_db_by_name(db_name="main") --- superset/cli.py | 8 +++--- superset/data/bart_lines.py | 4 +-- superset/data/birth_names.py | 4 +-- superset/data/country_map.py | 2 +- superset/data/energy.py | 2 +- superset/data/flights.py | 2 +- superset/data/long_lat.py | 2 +- superset/data/multiformat_time_series.py | 2 +- superset/data/paris.py | 2 +- superset/data/random_time_series.py | 2 +- superset/data/sf_population_polygons.py | 2 +- superset/data/unicode_test_data.py | 2 +- superset/data/world_bank.py | 2 +- superset/utils/core.py | 34 ++++++----------------- superset/utils/dashboard_import_export.py | 14 +++++----- 15 files changed, 33 insertions(+), 51 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index e1f1b18a6316..bd9ca6cf6082 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -60,8 +60,8 @@ def make_shell_context(): @app.cli.command() def init(): """Inits the Superset application""" - utils.get_or_create_main_db() - # utils.get_or_create_example_db() + utils.get_or_create_db_by_name(db_name='main') + # utils.get_or_create_db_by_name(db_name='examples') appbuilder.add_permissions(update_perms=True) security_manager.sync_role_definitions() @@ -368,7 +368,7 @@ def remove_example(example_title, database_uri, examples_repo, examples_tag): import_example_data['dashboards'][0]['__Dashboard__']['dashboard_title'] logging.debug(f'Got dashboard title {dashboard_title} for removal...') - utils.get_or_create_main_db() + utils.get_or_create_db_by_name(db_name='main') session = db.session() try: @@ -648,7 +648,7 @@ def load_test_users_run(): gamma_sqllab_role = security_manager.add_role('gamma_sqllab') for perm in security_manager.find_role('Gamma').permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) - utils.get_or_create_main_db() + utils.get_or_create_db_by_name(db_name='main') db_perm = utils.get_main_database(security_manager.get_session).perm security_manager.add_permission_view_menu('database_access', db_perm) db_pvm = security_manager.find_permission_view_menu( diff --git a/superset/data/bart_lines.py b/superset/data/bart_lines.py index f4e0b1f09cfc..b9ce8173d715 100644 --- a/superset/data/bart_lines.py +++ b/superset/data/bart_lines.py @@ -21,7 +21,7 @@ from sqlalchemy import String, Text from superset import db -from superset.utils.core import get_or_create_main_db +from superset.utils.core import get_or_create_db_by_name from .helpers import TBL, get_example_data @@ -50,7 +50,7 @@ def load_bart_lines(): if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'BART lines' - tbl.database = get_or_create_main_db() + tbl.database = get_or_create_db_by_name(db_name='main') db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() diff --git a/superset/data/birth_names.py b/superset/data/birth_names.py index 4f11ac5b7dd9..5ec54e30a2d2 100644 --- a/superset/data/birth_names.py +++ b/superset/data/birth_names.py @@ -22,7 +22,7 @@ from superset import db, security_manager from superset.connectors.sqla.models import SqlMetric, TableColumn -from superset.utils.core import get_or_create_main_db +from superset.utils.core import get_or_create_db_by_name from .helpers import ( config, Dash, @@ -60,7 +60,7 @@ def load_birth_names(): if not obj: obj = TBL(table_name='birth_names') obj.main_dttm_col = 'ds' - obj.database = get_or_create_main_db() + obj.database = get_or_create_db_by_name(db_name='main') obj.filter_select_enabled = True if not any(col.column_name == 'num_california' for col in obj.columns): diff --git a/superset/data/country_map.py b/superset/data/country_map.py index e74638bfbc71..b093277a4744 100644 --- a/superset/data/country_map.py +++ b/superset/data/country_map.py @@ -67,7 +67,7 @@ def load_country_map_data(): if not obj: obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' - obj.database = utils.get_or_create_main_db() + obj.database = utils.get_or_create_db_by_name(db_name='main') if not any(col.metric_name == 'avg__2004' for col in obj.metrics): obj.metrics.append(SqlMetric( metric_name='avg__2004', diff --git a/superset/data/energy.py b/superset/data/energy.py index e1d48e76a75a..f4c88fd2f5d6 100644 --- a/superset/data/energy.py +++ b/superset/data/energy.py @@ -51,7 +51,7 @@ def load_energy(): if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Energy consumption' - tbl.database = utils.get_or_create_main_db() + tbl.database = utils.get_or_create_db_by_name(db_name='main') if not any(col.metric_name == 'sum__value' for col in tbl.metrics): tbl.metrics.append(SqlMetric( diff --git a/superset/data/flights.py b/superset/data/flights.py index 25112444fc29..556b8336fc7b 100644 --- a/superset/data/flights.py +++ b/superset/data/flights.py @@ -54,7 +54,7 @@ def load_flights(): if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Random set of flights in the US' - tbl.database = utils.get_or_create_main_db() + tbl.database = utils.get_or_create_db_by_name(db_name='main') db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() diff --git a/superset/data/long_lat.py b/superset/data/long_lat.py index 18f477cfa404..e1ae194d3758 100644 --- a/superset/data/long_lat.py +++ b/superset/data/long_lat.py @@ -79,7 +79,7 @@ def load_long_lat_data(): if not obj: obj = TBL(table_name='long_lat') obj.main_dttm_col = 'datetime' - obj.database = utils.get_or_create_main_db() + obj.database = utils.get_or_create_db_by_name(db_name='main') db.session.merge(obj) db.session.commit() obj.fetch_metadata() diff --git a/superset/data/multiformat_time_series.py b/superset/data/multiformat_time_series.py index 58ff7fbb0d32..98b78760eed2 100644 --- a/superset/data/multiformat_time_series.py +++ b/superset/data/multiformat_time_series.py @@ -61,7 +61,7 @@ def load_multiformat_time_series(): if not obj: obj = TBL(table_name='multiformat_time_series') obj.main_dttm_col = 'ds' - obj.database = utils.get_or_create_main_db() + obj.database = utils.get_or_create_db_by_name(db_name='main') dttm_and_expr_dict = { 'ds': [None, None], 'ds2': [None, None], diff --git a/superset/data/paris.py b/superset/data/paris.py index 2ed3f8eaea07..f5fcbd7892e4 100644 --- a/superset/data/paris.py +++ b/superset/data/paris.py @@ -48,7 +48,7 @@ def load_paris_iris_geojson(): if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Map of Paris' - tbl.database = utils.get_or_create_main_db() + tbl.database = utils.get_or_create_db_by_name(db_name='main') db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() diff --git a/superset/data/random_time_series.py b/superset/data/random_time_series.py index ee7450a63405..4c149a3bf622 100644 --- a/superset/data/random_time_series.py +++ b/superset/data/random_time_series.py @@ -52,7 +52,7 @@ def load_random_time_series_data(): if not obj: obj = TBL(table_name='random_time_series') obj.main_dttm_col = 'ds' - obj.database = utils.get_or_create_main_db() + obj.database = utils.get_or_create_db_by_name(db_name='main') db.session.merge(obj) db.session.commit() obj.fetch_metadata() diff --git a/superset/data/sf_population_polygons.py b/superset/data/sf_population_polygons.py index 2248a48dafec..7ed5c8816d88 100644 --- a/superset/data/sf_population_polygons.py +++ b/superset/data/sf_population_polygons.py @@ -48,7 +48,7 @@ def load_sf_population_polygons(): if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Population density of San Francisco' - tbl.database = utils.get_or_create_main_db() + tbl.database = utils.get_or_create_db_by_name(db_name='main') db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() diff --git a/superset/data/unicode_test_data.py b/superset/data/unicode_test_data.py index 03c00a7b07fc..e978c4df920e 100644 --- a/superset/data/unicode_test_data.py +++ b/superset/data/unicode_test_data.py @@ -64,7 +64,7 @@ def load_unicode_test_data(): if not obj: obj = TBL(table_name='unicode_test') obj.main_dttm_col = 'dttm' - obj.database = utils.get_or_create_main_db() + obj.database = utils.get_or_create_db_by_name(db_name='main') db.session.merge(obj) db.session.commit() obj.fetch_metadata() diff --git a/superset/data/world_bank.py b/superset/data/world_bank.py index 94aa468ccd3c..34f932cd906e 100644 --- a/superset/data/world_bank.py +++ b/superset/data/world_bank.py @@ -66,7 +66,7 @@ def load_world_bank_health_n_pop(): tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md')) tbl.main_dttm_col = 'year' - tbl.database = utils.get_or_create_main_db() + tbl.database = utils.get_or_create_db_by_name(db_name='main') tbl.filter_select_enabled = True metrics = [ diff --git a/superset/utils/core.py b/superset/utils/core.py index 7da24b2cca25..f6763ba4f824 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -887,19 +887,23 @@ def user_label(user: User) -> Optional[str]: return None -def get_or_create_main_db(): +def get_or_create_db_by_name(db_name='main', database_uri=None): from superset import conf, db from superset.models import core as models - logging.info('Creating database reference') + logging.info(f'Creating database reference {db_name}') dbobj = get_main_database(db.session) if not dbobj: dbobj = models.Database( - database_name='main', + database_name=db_name, allow_csv_upload=True, expose_in_sqllab=True, ) - dbobj.set_sqlalchemy_uri(conf.get('SQLALCHEMY_DATABASE_URI')) + if db_name == 'examples': + database_uri = database_uri or conf.get('SQLALCHEMY_EXAMPLES_URI') + else: + database_uri = conf.get('SQLALCHEMY_DATABASE_URI') + dbobj.set_sqlalchemy_uri(database_uri) db.session.add(dbobj) db.session.commit() return dbobj @@ -914,28 +918,6 @@ def get_main_database(session): ) -def get_or_create_example_db(database_uri=None): - """Get or create the examples Database connection""" - from superset import conf, db - from superset.models import core as models - - if not database_uri: - database_uri = conf.get('SQLALCHEMY_EXAMPLES_URI') - - logging.info('Creating database reference') - dbobj = get_examples_database(db.session) - if not dbobj: - dbobj = models.Database( - database_name='examples', - allow_csv_upload=True, - expose_in_sqllab=True, - ) - dbobj.set_sqlalchemy_uri(database_uri) - db.session.add(dbobj) - db.session.commit() - return dbobj - - def get_examples_database(session): from superset.models import core as models return ( diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 517e92189f7c..6209ad49f3dd 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -35,7 +35,7 @@ from superset.exceptions import DashboardNotFoundException, SupersetException from superset.models.core import Dashboard, Database from superset.utils.core import ( - decode_dashboards, get_or_create_main_db, + decode_dashboards, get_or_create_db_by_name, ) @@ -78,7 +78,7 @@ def table_to_sql(path, table_name, engine): def import_files_to_table(data, is_example=False, data_blob_urls=None): """Import any files in this exported Dashboard""" if isinstance(data, dict) and data.get('files'): - engine = get_or_create_main_db() + engine = get_or_create_db_by_name(db_name='main') if is_example: with tempfile.TemporaryDirectory() as tmpdir: @@ -107,10 +107,10 @@ def import_dashboards(session, data, is_example=False, data_blob_urls=None, data = json.loads(data, object_hook=decode_dashboards) - # substitute_db_name = get_db_name(database_uri) or \ - # get_default_example_db().database_name + # substitute_db_name = get_db_name(database_uri) if database_uri else \ + # get_or_create_db_by_name(db_name='examples').database_name substitute_db_name = get_db_name(database_uri) if database_uri else \ - get_or_create_main_db().database_name + get_or_create_db_by_name(db_name='main').database_name import_dashboard(session, data, import_time) import_datasources(data, import_time, substitute_db_name=substitute_db_name) @@ -214,8 +214,8 @@ def remove_dashboard(session, import_example_data, dashboard_title, database_uri session.commit() # Now delete the physical data table - # exampled_engine = get_or_create_example_db(database_uri) - examples_engine = get_or_create_main_db() + # examples_engine = get_or_create_db_by_name(db_name='examples') + examples_engine = get_or_create_db_by_name(db_name='main') sqla_engine = examples_engine.get_sqla_engine() # Create a model class on the fly to do a cross-platform table drop From c93d81da50ff7aa4c658c7555b5d1eb3b661615f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 13 Jun 2019 11:02:31 -0700 Subject: [PATCH 094/107] ImportExportMixin(object) - the object was not needed in py3 --- superset/models/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 4d79082c36db..6db778f410f8 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -49,7 +49,7 @@ def get_uuid(): return str(uuid.uuid4()) -class ImportExportMixin(object): +class ImportExportMixin(): export_parent = None # The name of the attribute # with the SQL Alchemy back reference From 5f84f11865f48888ac63867f7fb50c2d4e5c1c82 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 13 Jun 2019 11:08:20 -0700 Subject: [PATCH 095/107] Add default empty list to data.get("files") in dashboard_import_export.import_files_to_table --- superset/utils/dashboard_import_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 6209ad49f3dd..823f34bcacb6 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -77,7 +77,7 @@ def table_to_sql(path, table_name, engine): def import_files_to_table(data, is_example=False, data_blob_urls=None): """Import any files in this exported Dashboard""" - if isinstance(data, dict) and data.get('files'): + if isinstance(data, dict) and data.get('files', []): engine = get_or_create_db_by_name(db_name='main') if is_example: From af52e8fc6ad6bf0c5ac358f6f12496cf247b8d67 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 13 Jun 2019 11:19:44 -0700 Subject: [PATCH 096/107] Removed druid tests in dict_import_export_tests as it is deprecated. --- tests/dict_import_export_tests.py | 133 ------------------------------ 1 file changed, 133 deletions(-) diff --git a/tests/dict_import_export_tests.py b/tests/dict_import_export_tests.py index f1f93fa64f89..504ce88596e9 100644 --- a/tests/dict_import_export_tests.py +++ b/tests/dict_import_export_tests.py @@ -45,9 +45,6 @@ def delete_imports(cls): for table in session.query(SqlaTable): if DBREF in table.params_dict: session.delete(table) - for datasource in session.query(DruidDatasource): - if DBREF in datasource.params_dict: - session.delete(datasource) session.commit() @classmethod @@ -87,36 +84,6 @@ def create_table( table.metrics.append(SqlMetric(metric_name=metric_name, expression='')) return table, dict_rep - def create_druid_datasource( - self, name, id=0, cols_names=[], metric_names=[]): - name = '{0}{1}'.format(NAME_PREFIX, name) - cluster_name = 'druid_test' - params = {DBREF: id, 'database_name': cluster_name} - dict_rep = { - 'cluster_name': cluster_name, - 'datasource_name': name, - 'id': id, - 'params': json.dumps(params), - 'columns': [{'column_name': c} for c in cols_names], - 'metrics': [{'metric_name': c, 'json': '{}'} for c in metric_names], - } - - datasource = DruidDatasource( - id=id, - datasource_name=name, - cluster_name=cluster_name, - params=json.dumps(params), - ) - for col_name in cols_names: - datasource.columns.append(DruidColumn(column_name=col_name)) - for metric_name in metric_names: - datasource.metrics.append(DruidMetric(metric_name=metric_name)) - return datasource, dict_rep - - def get_datasource(self, datasource_id): - return db.session.query(DruidDatasource).filter_by( - id=datasource_id).first() - def get_table_by_name(self, name): return db.session.query(SqlaTable).filter_by( table_name=name).first() @@ -257,106 +224,6 @@ def test_import_table_override_identical(self): self.yaml_compare(imported_copy_table.export_to_dict(), imported_table.export_to_dict()) - def test_import_druid_no_metadata(self): - datasource, dict_datasource = self.create_druid_datasource( - 'pure_druid', id=ID_PREFIX + 1) - imported_cluster = DruidDatasource.import_from_dict(db.session, - dict_datasource) - db.session.commit() - imported = self.get_datasource(imported_cluster.id) - self.assert_datasource_equals(datasource, imported) - - def test_import_druid_1_col_1_met(self): - datasource, dict_datasource = self.create_druid_datasource( - 'druid_1_col_1_met', id=ID_PREFIX + 2, - cols_names=['col1'], metric_names=['metric1']) - imported_cluster = DruidDatasource.import_from_dict(db.session, - dict_datasource) - db.session.commit() - imported = self.get_datasource(imported_cluster.id) - self.assert_datasource_equals(datasource, imported) - self.assertEquals( - {DBREF: ID_PREFIX + 2, 'database_name': 'druid_test'}, - json.loads(imported.params)) - - def test_import_druid_2_col_2_met(self): - datasource, dict_datasource = self.create_druid_datasource( - 'druid_2_col_2_met', id=ID_PREFIX + 3, cols_names=['c1', 'c2'], - metric_names=['m1', 'm2']) - imported_cluster = DruidDatasource.import_from_dict(db.session, - dict_datasource) - db.session.commit() - imported = self.get_datasource(imported_cluster.id) - self.assert_datasource_equals(datasource, imported) - - def test_import_druid_override_append(self): - datasource, dict_datasource = self.create_druid_datasource( - 'druid_override', id=ID_PREFIX + 3, cols_names=['col1'], - metric_names=['m1']) - imported_cluster = DruidDatasource.import_from_dict(db.session, - dict_datasource) - db.session.commit() - table_over, table_over_dict = self.create_druid_datasource( - 'druid_override', id=ID_PREFIX + 3, - cols_names=['new_col1', 'col2', 'col3'], - metric_names=['new_metric1']) - imported_over_cluster = DruidDatasource.import_from_dict( - db.session, - table_over_dict) - db.session.commit() - imported_over = self.get_datasource(imported_over_cluster.id) - self.assertEquals(imported_cluster.id, imported_over.id) - expected_datasource, _ = self.create_druid_datasource( - 'druid_override', id=ID_PREFIX + 3, - metric_names=['new_metric1', 'm1'], - cols_names=['col1', 'new_col1', 'col2', 'col3']) - self.assert_datasource_equals(expected_datasource, imported_over) - - def test_import_druid_override_sync(self): - datasource, dict_datasource = self.create_druid_datasource( - 'druid_override', id=ID_PREFIX + 3, cols_names=['col1'], - metric_names=['m1']) - imported_cluster = DruidDatasource.import_from_dict( - db.session, - dict_datasource) - db.session.commit() - table_over, table_over_dict = self.create_druid_datasource( - 'druid_override', id=ID_PREFIX + 3, - cols_names=['new_col1', 'col2', 'col3'], - metric_names=['new_metric1']) - imported_over_cluster = DruidDatasource.import_from_dict( - session=db.session, - dict_rep=table_over_dict, - sync=['metrics', 'columns']) # syncing metrics and columns - db.session.commit() - imported_over = self.get_datasource(imported_over_cluster.id) - self.assertEquals(imported_cluster.id, imported_over.id) - expected_datasource, _ = self.create_druid_datasource( - 'druid_override', id=ID_PREFIX + 3, - metric_names=['new_metric1'], - cols_names=['new_col1', 'col2', 'col3']) - self.assert_datasource_equals(expected_datasource, imported_over) - - def test_import_druid_override_identical(self): - datasource, dict_datasource = self.create_druid_datasource( - 'copy_cat', id=ID_PREFIX + 4, - cols_names=['new_col1', 'col2', 'col3'], - metric_names=['new_metric1']) - imported = DruidDatasource.import_from_dict(session=db.session, - dict_rep=dict_datasource) - db.session.commit() - copy_datasource, dict_cp_datasource = self.create_druid_datasource( - 'copy_cat', id=ID_PREFIX + 4, - cols_names=['new_col1', 'col2', 'col3'], - metric_names=['new_metric1']) - imported_copy = DruidDatasource.import_from_dict(db.session, - dict_cp_datasource) - db.session.commit() - - self.assertEquals(imported.id, imported_copy.id) - self.assert_datasource_equals( - copy_datasource, self.get_datasource(imported.id)) - if __name__ == '__main__': unittest.main() From a1e86018d186440ff5919c97a01c9e9fd1e00254 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 13 Jun 2019 11:22:11 -0700 Subject: [PATCH 097/107] Changed debug outputs to info --- superset/cli.py | 4 ++-- superset/utils/dashboard_import_export.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index bd9ca6cf6082..17ecd4265dec 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -361,12 +361,12 @@ def remove_example(example_title, database_uri, examples_repo, examples_tag): f"Will remove example '{example_title}' from '{db_name}'") break - logging.debug(import_example_data['files']) + logging.info(import_example_data['files']) # Get the dashboard and associated records dashboard_title = \ import_example_data['dashboards'][0]['__Dashboard__']['dashboard_title'] - logging.debug(f'Got dashboard title {dashboard_title} for removal...') + logging.info(f'Got dashboard title {dashboard_title} for removal...') utils.get_or_create_db_by_name(db_name='main') session = db.session() diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index 823f34bcacb6..b51eef29905e 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -192,7 +192,7 @@ def remove_dashboard(session, import_example_data, dashboard_title, database_uri """Remove a dashboard based on id or title""" session = db.session() if not session else session - logging.debug(session.query(Dashboard).all()) + logging.info(session.query(Dashboard).all()) try: dashboard = session.query(Dashboard).filter( From b19059fe1f00bc369fa1180c9f106256dd49b98f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 14 Jun 2019 13:20:41 -0700 Subject: [PATCH 098/107] Removed Druid datasource import --- superset/utils/dict_import_export.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/superset/utils/dict_import_export.py b/superset/utils/dict_import_export.py index ca57c6b177d4..374a575684df 100644 --- a/superset/utils/dict_import_export.py +++ b/superset/utils/dict_import_export.py @@ -72,11 +72,6 @@ def import_from_dict(session, data, sync=[]): for database in data.get(DATABASES_KEY, []): Database.import_from_dict(session, database, sync=sync) - logging.info('Importing %d %s', - len(data.get(DRUID_CLUSTERS_KEY, [])), - DRUID_CLUSTERS_KEY) - for datasource in data.get(DRUID_CLUSTERS_KEY, []): - DruidCluster.import_from_dict(session, datasource, sync=sync) session.commit() else: logging.info('Supplied object is not a dictionary.') From 8f5b685055312b5aa443ac9d8f4efd571d792b68 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 17 Jun 2019 13:28:10 -0700 Subject: [PATCH 099/107] Moved uuid column to helpers functions and away from core models --- superset/models/core.py | 6 +++--- superset/models/helpers.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 107b0d4bcfca..834ed902235b 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -164,7 +164,7 @@ class Slice(Model, AuditMixinNullable, ImportExportMixin): owners = relationship(security_manager.user_model, secondary=slice_user) export_fields = ('slice_name', 'datasource_type', 'datasource_name', - 'viz_type', 'params', 'cache_timeout') + 'viz_type', 'params', 'cache_timeout', 'uuid') def __repr__(self): return self.slice_name or str(self.id) @@ -413,7 +413,7 @@ class Dashboard(Model, AuditMixinNullable, ImportExportMixin): owners = relationship(security_manager.user_model, secondary=dashboard_user) export_fields = ('dashboard_title', 'position_json', 'json_metadata', - 'description', 'css', 'slug') + 'description', 'css', 'slug', 'uuid') def __repr__(self): return self.dashboard_title or str(self.id) @@ -749,7 +749,7 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): impersonate_user = Column(Boolean, default=False) export_fields = ('database_name', 'sqlalchemy_uri', 'cache_timeout', 'expose_in_sqllab', 'allow_run_async', - 'allow_ctas', 'allow_csv_upload', 'extra') + 'allow_ctas', 'allow_csv_upload', 'extra', 'uuid') export_children = ['tables'] def __repr__(self): diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 6db778f410f8..067eff777718 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -58,7 +58,7 @@ class ImportExportMixin(): # List of (str) names of attributes # with the SQL Alchemy forward references - export_fields = [] + export_fields = ['uuid'] # The names of the attributes # that are available for import and export @@ -94,7 +94,7 @@ def formatter(c): str(c.type), c.default.arg) if c.default else str(c.type)) schema = {c.name: formatter(c) for c in cls.__table__.columns - if (c.name in cls.export_fields and + if (c.name in cls.export_fields + ['uuid'] and c.name not in parent_excludes)} if recursive: for c in cls.export_children: @@ -108,7 +108,7 @@ def import_from_dict(cls, session, dict_rep, parent=None, recursive=True, sync=[]): """Import obj from a dictionary""" parent_refs = cls._parent_foreign_key_mappings() - export_fields = set(cls.export_fields) | set(parent_refs.keys()) + export_fields = set(cls.export_fields + ['uuid']) | set(parent_refs.keys()) new_children = {c: dict_rep.get(c) for c in cls.export_children if c in dict_rep} unique_constrains = cls._unique_constrains() @@ -117,7 +117,7 @@ def import_from_dict(cls, session, dict_rep, parent=None, # Remove fields that should not get imported for k in list(dict_rep): - if k not in export_fields: + if k not in export_fields + ['uuid']: del dict_rep[k] if not parent: @@ -207,7 +207,7 @@ def export_to_dict(self, recursive=True, include_parent_ref=False, dict_rep = {c.name: getattr(self, c.name) for c in cls.__table__.columns - if (c.name in self.export_fields and + if (c.name in self.export_fields + ['uuid'] and c.name not in parent_excludes and (include_defaults or ( getattr(self, c.name) is not None and @@ -231,7 +231,7 @@ def export_to_dict(self, recursive=True, include_parent_ref=False, def override(self, obj): """Overrides the plain fields of the dashboard.""" - for field in obj.__class__.export_fields: + for field in obj.__class__.export_fields + ['uuid']: setattr(self, field, getattr(obj, field)) def copy(self): From 1b80bf2b9f615f20cb1221ba029da3dd3ed44a4e Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 20 Jun 2019 16:29:25 -0700 Subject: [PATCH 100/107] Defined and used ImportExportMixin.export_fields_with_uuid --- superset/models/helpers.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 067eff777718..f06f9df1be8a 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -58,12 +58,16 @@ class ImportExportMixin(): # List of (str) names of attributes # with the SQL Alchemy forward references - export_fields = ['uuid'] + export_fields = [] # The names of the attributes # that are available for import and export uuid = sa.Column(UUIDType(binary=False), unique=True, default=get_uuid) + @classmethod + def export_fields_with_uuid(cls): + return list(cls.export_fields) + ['uuid'] + @classmethod def _parent_foreign_key_mappings(cls): """Get a mapping of foreign name to the local name of foreign keys""" @@ -94,8 +98,8 @@ def formatter(c): str(c.type), c.default.arg) if c.default else str(c.type)) schema = {c.name: formatter(c) for c in cls.__table__.columns - if (c.name in cls.export_fields + ['uuid'] and - c.name not in parent_excludes)} + if (c.name in cls.export_fields_with_uuid() and + c.name not in parent_excludes)} if recursive: for c in cls.export_children: child_class = cls.__mapper__.relationships[c].argument.class_ @@ -108,7 +112,7 @@ def import_from_dict(cls, session, dict_rep, parent=None, recursive=True, sync=[]): """Import obj from a dictionary""" parent_refs = cls._parent_foreign_key_mappings() - export_fields = set(cls.export_fields + ['uuid']) | set(parent_refs.keys()) + export_fields = set(cls.export_fields_with_uuid()) | set(parent_refs.keys()) new_children = {c: dict_rep.get(c) for c in cls.export_children if c in dict_rep} unique_constrains = cls._unique_constrains() @@ -117,7 +121,7 @@ def import_from_dict(cls, session, dict_rep, parent=None, # Remove fields that should not get imported for k in list(dict_rep): - if k not in export_fields + ['uuid']: + if k not in export_fields: del dict_rep[k] if not parent: @@ -207,7 +211,7 @@ def export_to_dict(self, recursive=True, include_parent_ref=False, dict_rep = {c.name: getattr(self, c.name) for c in cls.__table__.columns - if (c.name in self.export_fields + ['uuid'] and + if (c.name in ImportExportMixin.export_fields_with_uuid() and c.name not in parent_excludes and (include_defaults or ( getattr(self, c.name) is not None and @@ -231,7 +235,7 @@ def export_to_dict(self, recursive=True, include_parent_ref=False, def override(self, obj): """Overrides the plain fields of the dashboard.""" - for field in obj.__class__.export_fields + ['uuid']: + for field in obj.__class__.export_fields_with_uuid(): setattr(self, field, getattr(obj, field)) def copy(self): From 4b1ae6905fd7b3577a37bb99377f0689bc04036c Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 20 Jun 2019 16:30:12 -0700 Subject: [PATCH 101/107] Removed model class uuid exportfield and finished implementation of uuid based import --- superset/models/core.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 834ed902235b..30f6e5646ccc 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -164,7 +164,7 @@ class Slice(Model, AuditMixinNullable, ImportExportMixin): owners = relationship(security_manager.user_model, secondary=slice_user) export_fields = ('slice_name', 'datasource_type', 'datasource_name', - 'viz_type', 'params', 'cache_timeout', 'uuid') + 'viz_type', 'params', 'cache_timeout') def __repr__(self): return self.slice_name or str(self.id) @@ -352,11 +352,11 @@ def import_obj(cls, slc_to_import, slc_to_override, import_time=None): session = db.session make_transient(slc_to_import) slc_to_import.dashboards = [] - slc_to_import.alter_params( - remote_id=slc_to_import.id, import_time=import_time) + slc_to_import.alter_params(remote_id=slc_to_import.id, import_time=import_time) - slc_to_import = slc_to_import.copy() + slc_to_import = slc_to_import.copy() # Resets id to None params = slc_to_import.params_dict + slc_to_import.datasource_id = ConnectorRegistry.get_datasource_by_name( session, slc_to_import.datasource_type, params['datasource_name'], params['schema'], params['database_name']).id @@ -413,7 +413,7 @@ class Dashboard(Model, AuditMixinNullable, ImportExportMixin): owners = relationship(security_manager.user_model, secondary=dashboard_user) export_fields = ('dashboard_title', 'position_json', 'json_metadata', - 'description', 'css', 'slug', 'uuid') + 'description', 'css', 'slug') def __repr__(self): return self.dashboard_title or str(self.id) @@ -553,15 +553,14 @@ def alter_positions(dashboard, old_to_new_slc_id_dict): new_timed_refresh_immune_slices = [] new_expanded_slices = {} i_params_dict = dashboard_to_import.params_dict - remote_id_slice_map = { - slc.params_dict['remote_id']: slc + uuid_slice_map = { + slc.uuid: slc for slc in session.query(Slice).all() - if 'remote_id' in slc.params_dict } for slc in slices: logging.info('Importing slice {} from the dashboard: {}'.format( slc.to_json(), dashboard_to_import.dashboard_title)) - remote_slc = remote_id_slice_map.get(slc.id) + remote_slc = uuid_slice_map.get(slc.uuid) new_slc_id = Slice.import_obj(slc, remote_slc, import_time=import_time) old_to_new_slc_id_dict[slc.id] = new_slc_id # update json metadata that deals with slice ids @@ -582,14 +581,15 @@ def alter_positions(dashboard, old_to_new_slc_id_dict): # override the dashboard existing_dashboard = None for dash in session.query(Dashboard).all(): - if ('remote_id' in dash.params_dict and - dash.params_dict['remote_id'] == - dashboard_to_import.id): + if ('uuid' in dash.params_dict and + dash.params_dict['uuid'] == + dashboard_to_import.uuid): existing_dashboard = dash dashboard_to_import.id = None alter_positions(dashboard_to_import, old_to_new_slc_id_dict) - dashboard_to_import.alter_params(import_time=import_time) + dashboard_to_import.alter_params(import_time=import_time, + uuid=dashboard_to_import.uuid) if new_expanded_slices: dashboard_to_import.alter_params( expanded_slices=new_expanded_slices) @@ -749,7 +749,7 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): impersonate_user = Column(Boolean, default=False) export_fields = ('database_name', 'sqlalchemy_uri', 'cache_timeout', 'expose_in_sqllab', 'allow_run_async', - 'allow_ctas', 'allow_csv_upload', 'extra', 'uuid') + 'allow_ctas', 'allow_csv_upload', 'extra') export_children = ['tables'] def __repr__(self): From 2630293f97cddcb2898b89026b574f8df1b72979 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 20 Jun 2019 16:31:19 -0700 Subject: [PATCH 102/107] Made dashboard_import_export.import_dashboards() do data table import before datasources and dashboards --- superset/utils/dashboard_import_export.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index b51eef29905e..e8a5839b9c08 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -112,14 +112,15 @@ def import_dashboards(session, data, is_example=False, data_blob_urls=None, substitute_db_name = get_db_name(database_uri) if database_uri else \ get_or_create_db_by_name(db_name='main').database_name - import_dashboard(session, data, import_time) - import_datasources(data, import_time, substitute_db_name=substitute_db_name) + import_files_to_table(data, is_example=True, data_blob_urls=data_blob_urls) session.commit() - import_files_to_table(data, is_example=True, data_blob_urls=data_blob_urls) + import_datasources(data, import_time, substitute_db_name=substitute_db_name) + import_dashboard(session, data, import_time) session.commit() + def get_db_name(uri): """Get the DB name from the URI string""" db_name = make_url(uri).database From 9996781b444bdfc4de9ed5d926b74d7c845a44b6 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 20 Jun 2019 16:32:23 -0700 Subject: [PATCH 103/107] Cleanup logging. Using superset.utils.core.get_or_create_db_by_name(), added test cases test_examples_import_duplicate and test_examples_import_duplicate_uuid --- tests/cli_tests.py | 67 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index de74af1eb826..b06d06cf7b82 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -10,8 +10,8 @@ from superset import app, cli, db from superset.connectors.connector_registry import ConnectorRegistry -from superset.models.core import Dashboard, Database -from superset.utils.dashboard_import_export import get_or_create_main_db +from superset.models.core import Dashboard, Database, Slice +from superset.utils.core import get_or_create_db_by_name from tests.base_tests import SupersetTestCase config = app.config @@ -135,9 +135,66 @@ def test_examples_import(self): # Did all rows get imported? df = pd.read_sql('SELECT * FROM wb_health_population', - get_or_create_main_db().get_sqla_engine()) + get_or_create_db_by_name(db_name='main').get_sqla_engine()) self.assertEqual(len(df.index), 11770) + def test_examples_import_duplicate(self): + """Test `superset examples import` when existing dashboard/diff uuid is present""" + # Load a pre-existing "World's Bank" Dashboard via `superset load_examples` + self.runner.invoke( + app.cli, + ['load_examples'] + ) + # Load the same dashboard but different uuids + self.runner.invoke( + app.cli, + [ + 'examples', 'import', '-e', + 'World Bank Health Nutrition and Population Stats', + ], + ) + + # Did the dashboard get imported to the main DB more than once? + dashboards = db.session.query(Dashboard).filter( + Dashboard.dashboard_title.in_(["World's Bank Data"])).all() + self.assertEqual(len(dashboards), 2) + + # Did the slices get imported to the main DB more than once? + slices = db.session.query(Slice).filter( + Slice.slice_name.in_(["World's Population"]) + ).all() + self.assertEqual(len(slices), 2) + + def test_examples_import_duplicate_uuid(self): + """Test `superset examples import` when existing dashboard/same uuid is present""" + # Load a pre-existing "World's Bank" Dashboard + self.runner.invoke( + app.cli, + [ + 'examples', 'import', '-e', + 'World Bank Health Nutrition and Population Stats', + ], + ) + # Load the same dashboard but different uuids + self.runner.invoke( + app.cli, + [ + 'examples', 'import', '-e', + 'World Bank Health Nutrition and Population Stats', + ], + ) + + # Did the dashboard get imported to the main DB just once? + dashboards = db.session.query(Dashboard).filter( + Dashboard.dashboard_title.in_(["World's Bank Data"])).all() + self.assertEqual(len(dashboards), 1) + + # Did the slices get imported just once? + slices = db.session.query(Slice).filter( + Slice.slice_name.in_(["World's Population"]) + ).all() + self.assertEqual(len(slices), 1) + def test_examples_remove(self): """Test `superset examples remove`""" # First add the example... @@ -161,8 +218,6 @@ def test_examples_remove(self): # Is the dashboard still in the main db? total = db.session.query(Dashboard).filter( Dashboard.dashboard_title.in_(["World's Bank Data"])).count() - logging.debug('total 1') - logging.debug(total) self.assertEqual(total, 0) # Is the data table gone? @@ -177,8 +232,6 @@ def test_examples_remove(self): Database.database_name == db_name and SqlaTable.table_name == 'wb_health_population') ).count() - logging.debug('total 2') - logging.debug(total) self.assertEqual(total, 0) def test_examples_export(self): From 67e804325175dc1795643ae7f279ae6eae48718e Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 21 Jun 2019 12:35:31 -0700 Subject: [PATCH 104/107] Removed comment --- superset/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/cli.py b/superset/cli.py index 17ecd4265dec..9fbe365ce64c 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -61,7 +61,6 @@ def make_shell_context(): def init(): """Inits the Superset application""" utils.get_or_create_db_by_name(db_name='main') - # utils.get_or_create_db_by_name(db_name='examples') appbuilder.add_permissions(update_perms=True) security_manager.sync_role_definitions() From ab24559420c41653d896c93616d7ae497b56ee94 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 21 Jun 2019 12:43:46 -0700 Subject: [PATCH 105/107] Removed abstract superset example classes --- superset/data/__init__.py | 63 --------------------------------------- 1 file changed, 63 deletions(-) diff --git a/superset/data/__init__.py b/superset/data/__init__.py index 9390386f1ae7..b36a3002f1f3 100644 --- a/superset/data/__init__.py +++ b/superset/data/__init__.py @@ -31,66 +31,3 @@ from .tabbed_dashboard import load_tabbed_dashboard # noqa from .unicode_test_data import load_unicode_test_data # noqa from .world_bank import load_world_bank_health_n_pop # noqa - -from abc import ABC - -class AbstractSupersetExample(ABC): - """Defines interface through which superset examples load themselves.""" - - def __init__(self, description): - self.description = description - - def load_data(self): - # Task 1: Load file and create pandas.DataFrame - # Task 2: Load data into SQL with pandas.DataFrame.to_sql() - # Task 3: Process through ORM to get back workable Table object from whichever data source the table is in - pass - - def create_metrics(self): - # Task 1: Build any TableColumns - # Task 2: Build Metrics - SQLMetrics - # Task 3: Store metrics in DB via ORM - pass - - def create_charts(self, slices): - # Task 1: Build Slice from config/JSON - # Task 2: Store to DB via - misc_dash_slices.add(slc.slice_name) / merge_slice(slc) - pass - - def create_dashboards(self, name, config): - # Task 1: Instantiate Dash via ORM - # Task 2: Configure Dash via JSON - # Task 3: Store to DB via ORM - pass - - -class SupersetConfigExample(): - """Defines interface through which superset examples define themselves""" - - def __init__(self, description): - self.description = description - - def load_data(self, data_path, data_types='csv', encoding='utf-8', dt_column=None): - # Task 1: Load file and create pandas.DataFrame - # Task 2: Load data into SQL with pandas.DataFrame.to_sql() - # Task 3: Process through ORM to get back workable Table object from whichever data source the table is in - - pass - - def create_metrics(self, metrics): - # Task 1: Build TableColumns - # Task 2: Build Metrics - SQLMetrics - # Task 3: Store metrics in DB via ORM - pass - - def create_charts(self, slices): - # Task 1: Build Slice from config/JSON - # Task 2: Store to DB via - misc_dash_slices.add(slc.slice_name) / merge_slice(slc) - pass - - def create_dashboards(self, name, config): - # Task 1: Instantiate Dash via ORM - # Task 2: Configure Dash via JSON - # Task 3: Store to DB via ORM - pass - From 10e36d3781f50eeed5c5e51f45f62c3dec385a69 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 21 Jun 2019 12:58:51 -0700 Subject: [PATCH 106/107] flake8 fixes --- superset/cli.py | 14 +++++++------- superset/models/helpers.py | 2 +- superset/utils/core.py | 1 + superset/utils/dashboard_import_export.py | 11 +++++------ tests/cli_tests.py | 8 ++++---- tests/dict_import_export_tests.py | 3 --- 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 9fbe365ce64c..b05471abaf2d 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -34,7 +34,7 @@ app, appbuilder, data, db, security_manager, ) from superset.data.helpers import ( - download_url_to_blob_url, get_examples_file_list, get_examples_uris, + download_url_to_blob_url, get_examples_file_list, get_examples_uris, list_examples_table, ) from superset.exceptions import DashboardNotFoundException, ExampleNotFoundException @@ -220,7 +220,7 @@ def export_example(dashboard_id, dashboard_title, description, example_title, @examples.command('list') @click.option( '--examples-repo', '-r', - help="Full name of Github repository containing examples, ex: " + + help='Full name of Github repository containing examples, ex: ' + "'apache-superset/examples-data'", default=None) @click.option( @@ -242,7 +242,7 @@ def _list_examples(examples_repo, examples_tag, full_fields): default=config.get('SQLALCHEMY_EXAMPLES_URI')) @click.option( '--examples-repo', '-r', - help="Full name of Github repository containing examples, ex: " + + help='Full name of Github repository containing examples, ex: ' + "'apache-superset/examples-data'", default=None) @click.option( @@ -272,7 +272,7 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): for example_file in examples_files: metadata_download_url = example_file['metadata_file']['download_url'] - example_metadata_json = requests.get(metadata_download_url, + example_metadata_json = requests.get(metadata_download_url, headers=headers).content # Cheaply load json without generating objects example_metadata = json.loads(example_metadata_json) @@ -289,7 +289,7 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): exit(1) # Parse data to get file download_urls -> blob_urls - example_metadata = json.loads(import_example_json, + example_metadata = json.loads(import_example_json, object_hook=utils.decode_dashboards) # The given download url won't work for data files, need a blob url @@ -320,7 +320,7 @@ def import_example(example_title, examples_repo, examples_tag, database_uri): default=config.get('SQLALCHEMY_EXAMPLES_URI')) @click.option( '--examples-repo', '-r', - help="Full name of Github repository containing examples, ex: " + + help='Full name of Github repository containing examples, ex: ' + "'apache-superset/examples-data'", default=None) @click.option( @@ -375,7 +375,7 @@ def remove_example(example_title, database_uri, examples_repo, examples_tag): session, import_example_data, dashboard_title, - database_uri=database_uri + database_uri=database_uri, ) except DashboardNotFoundException as e: logging.exception(e) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index f06f9df1be8a..6088d22ab470 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -33,7 +33,7 @@ from sqlalchemy_utils.types.uuid import UUIDType import yaml -from superset.utils.core import SQLAJsonEncoder, QueryStatus +from superset.utils.core import QueryStatus, SQLAJsonEncoder def json_to_dict(json_str): diff --git a/superset/utils/core.py b/superset/utils/core.py index f6763ba4f824..d42b840d4a2e 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -25,6 +25,7 @@ from email.utils import formatdate import errno import functools +import json import logging import os import signal diff --git a/superset/utils/dashboard_import_export.py b/superset/utils/dashboard_import_export.py index e8a5839b9c08..7c64b82bd979 100644 --- a/superset/utils/dashboard_import_export.py +++ b/superset/utils/dashboard_import_export.py @@ -61,7 +61,7 @@ def table_to_sql(path, table_name, engine): logging.info(f'Import data from file {path} into table {table_name}') try: - df = pd.read_csv(path, parse_dates=True, infer_datetime_format=True, + df = pd.read_csv(path, parse_dates=True, infer_datetime_format=True, compression='infer') df.to_sql( table_name, @@ -69,7 +69,7 @@ def table_to_sql(path, table_name, engine): if_exists='replace', chunksize=500, index=False) - except (pd.errors.ParserError, pd.errors.OutOfBoundsDatetime, + except (pd.errors.ParserError, pd.errors.OutOfBoundsDatetime, pd.errors.EmptyDataError) as e: logging.exception(e) raise SupersetException('Error reading table into database!') @@ -99,7 +99,7 @@ def import_files_to_table(data, is_example=False, data_blob_urls=None): table_to_sql(table['file_name'], table['table_name'], engine) -def import_dashboards(session, data, is_example=False, data_blob_urls=None, +def import_dashboards(session, data, is_example=False, data_blob_urls=None, database_uri=None, import_time=None): """Imports dashboards from a stream to databases""" current_tt = int(time.time()) @@ -120,7 +120,6 @@ def import_dashboards(session, data, is_example=False, data_blob_urls=None, session.commit() - def get_db_name(uri): """Get the DB name from the URI string""" db_name = make_url(uri).database @@ -188,7 +187,7 @@ def get_slug(session, dashboard_id=None, dashboard_title=None): return slug -def remove_dashboard(session, import_example_data, dashboard_title, database_uri=None, +def remove_dashboard(session, import_example_data, dashboard_title, database_uri=None, primary_key=Column('id', Integer, primary_key=True)): """Remove a dashboard based on id or title""" @@ -209,7 +208,7 @@ def remove_dashboard(session, import_example_data, dashboard_title, database_uri SqlaTable = ConnectorRegistry.sources['table'] for f in import_example_data['files']: t = session.query(SqlaTable).filter( - SqlaTable.table_name == f['table_name'] + SqlaTable.table_name == f['table_name'], ).one() session.delete(t) session.commit() diff --git a/tests/cli_tests.py b/tests/cli_tests.py index b06d06cf7b82..7b7022b0de8f 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -8,7 +8,7 @@ import pandas as pd -from superset import app, cli, db +from superset import app, cli, db # noqa: F401 from superset.connectors.connector_registry import ConnectorRegistry from superset.models.core import Dashboard, Database, Slice from superset.utils.core import get_or_create_db_by_name @@ -143,7 +143,7 @@ def test_examples_import_duplicate(self): # Load a pre-existing "World's Bank" Dashboard via `superset load_examples` self.runner.invoke( app.cli, - ['load_examples'] + ['load_examples'], ) # Load the same dashboard but different uuids self.runner.invoke( @@ -161,7 +161,7 @@ def test_examples_import_duplicate(self): # Did the slices get imported to the main DB more than once? slices = db.session.query(Slice).filter( - Slice.slice_name.in_(["World's Population"]) + Slice.slice_name.in_(["World's Population"]), ).all() self.assertEqual(len(slices), 2) @@ -191,7 +191,7 @@ def test_examples_import_duplicate_uuid(self): # Did the slices get imported just once? slices = db.session.query(Slice).filter( - Slice.slice_name.in_(["World's Population"]) + Slice.slice_name.in_(["World's Population"]), ).all() self.assertEqual(len(slices), 1) diff --git a/tests/dict_import_export_tests.py b/tests/dict_import_export_tests.py index 504ce88596e9..dba6205841dd 100644 --- a/tests/dict_import_export_tests.py +++ b/tests/dict_import_export_tests.py @@ -21,9 +21,6 @@ import yaml from superset import db -from superset.connectors.druid.models import ( - DruidColumn, DruidDatasource, DruidMetric, -) from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.utils.core import get_main_database from .base_tests import SupersetTestCase From 5c5dfeadaa75472bbea639f5bff17c9df9849248 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Sat, 22 Jun 2019 11:39:09 -0700 Subject: [PATCH 107/107] Removed schedulers from uuid field migrations --- .../e5200a951e62_add_dashboards_uuid.py | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py index 0ed98af3956a..b26cabe81c77 100644 --- a/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py +++ b/superset/migrations/versions/e5200a951e62_add_dashboards_uuid.py @@ -95,18 +95,6 @@ class TableColumn(Base): uuid = Column(UUIDType(binary=False), default=get_uuid) -class DashboardEmailSchedule(Base): - __tablename__ = 'dashboard_email_schedules' - id = Column(Integer, primary_key=True) - uuid = Column(UUIDType(binary=False), default=get_uuid) - - -class SliceEmailSchedule(Base): - __tablename__ = 'slice_email_schedules' - id = Column(Integer, primary_key=True) - uuid = Column(UUIDType(binary=False), default=get_uuid) - - def upgrade(): bind = op.get_bind() session = db.Session(bind=bind) @@ -137,8 +125,6 @@ def add_uuid_column(col_name, _type): add_uuid_column('sql_metrics', SqlMetric) add_uuid_column('tables', SqlaTable) add_uuid_column('table_columns', TableColumn) - add_uuid_column('dashboard_email_schedules', DashboardEmailSchedule) - add_uuid_column('slice_email_schedules', SliceEmailSchedule) session.close() @@ -170,9 +156,3 @@ def downgrade(): with op.batch_alter_table('table_columns') as batch_op: batch_op.drop_column('uuid') - - with op.batch_alter_table('dashboard_email_schedules') as batch_op: - batch_op.drop_column('uuid') - - with op.batch_alter_table('slice_email_schedules') as batch_op: - batch_op.drop_column('uuid')