From aa12906e006040d0998ef18d4b6d1ae10ebbb7a6 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sun, 13 Jan 2019 14:39:04 -0800 Subject: [PATCH 1/4] Making thrift, pyhive and tableschema as extra_requires Looking at the dependency tree for license related questions, I noticed that tableschema had a huge tree, and only people running Hive really need it. Making this as well as pyhive and thrift optional. Also bumping some python dependencies --- UPDATING.md | 6 ++++++ setup.py | 14 ++++++++------ superset/db_engine_specs.py | 3 ++- superset/db_engines/hive.py | 9 +++++---- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 97ed71082af3..03ae1da685d7 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -3,6 +3,12 @@ This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. +## Superset 0.32.0 +* If you use `Hive` or `Presto`, we've moved some dependencies that were + in the main package as optional now. To get these packages, + run `pip install superset[presto]` and/or `pip install superset[hive]` as + required. + ## Superset 0.31.0 * boto3 / botocore was removed from the dependency list. If you use s3 as a place to store your SQL Lab result set or Hive uploads, you may diff --git a/setup.py b/setup.py index e8447de9cf11..4958f23d4c0c 100644 --- a/setup.py +++ b/setup.py @@ -82,9 +82,7 @@ def get_git_sha(): 'flask-compress', 'flask-migrate', 'flask-wtf', - 'flower', # deprecated 'geopy', - 'gsheetsdb>=0.1.9', 'gunicorn', # deprecated 'humanize', 'idna', @@ -95,7 +93,6 @@ def get_git_sha(): 'pathlib2', 'polyline', 'pydruid>=0.4.3', - 'pyhive>=0.4.0', 'python-dateutil', 'python-geohash', 'pyyaml>=3.13', @@ -106,14 +103,19 @@ def get_git_sha(): 'sqlalchemy', 'sqlalchemy-utils', 'sqlparse', - 'tableschema', - 'thrift>=0.9.3', - 'thrift-sasl>=0.2.1', 'unicodecsv', ], extras_require={ 'cors': ['flask-cors>=2.0.0'], 'console_log': ['console_log==0.2.10'], + 'hive': [ + 'pyhive>=0.4.0', + 'tableschema', + 'thrift-sasl>=0.2.1', + 'thrift>=0.9.3', + ], + 'presto': ['pyhive>=0.4.0'], + 'gsheets': ['gsheetsdb>=0.1.9'], }, author='Apache Superset Dev', author_email='dev@superset.incubator.apache.org', diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 1a9bf817a41c..49e587558c95 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -47,7 +47,6 @@ from sqlalchemy.sql import quoted_name, text from sqlalchemy.sql.expression import TextAsFrom import sqlparse -from tableschema import Table from werkzeug.utils import secure_filename from superset import app, conf, db, sql_parse @@ -1071,6 +1070,8 @@ def convert_to_hive_type(col_type): upload_path = config['UPLOAD_FOLDER'] + \ secure_filename(filename) + # Optional dependency + from tableschema import Table # pylint: disable=import-error hive_table_schema = Table(upload_path).infer() column_name_and_type = [] for column_info in hive_table_schema['fields']: diff --git a/superset/db_engines/hive.py b/superset/db_engines/hive.py index f0f8995d9b22..63342577a253 100644 --- a/superset/db_engines/hive.py +++ b/superset/db_engines/hive.py @@ -15,14 +15,11 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=C,R,W -from pyhive import hive # pylint: disable=no-name-in-module -from TCLIService import ttypes -from thrift import Thrift # TODO: contribute back to pyhive. def fetch_logs(self, max_rows=1024, - orientation=ttypes.TFetchOrientation.FETCH_NEXT): + orientation=None): """Mocked. Retrieve the logs produced by the execution of the query. Can be called multiple times to fetch the logs produced after the previous call. @@ -31,6 +28,10 @@ def fetch_logs(self, max_rows=1024, .. note:: This is not a part of DB-API. """ + from pyhive import hive + from TCLIService import ttypes + from thrift import Thrift + orientation = orientation or ttypes.TFetchOrientation.FETCH_NEXT try: req = ttypes.TGetLogReq(operationHandle=self._operationHandle) logs = self._connection.client.GetLog(req).log From 813efff935ddc2a9bd02dc3337733f3a0d798773 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 18 Jan 2019 17:01:20 -0800 Subject: [PATCH 2/4] Run pip-compile --- requirements.txt | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/requirements.txt b/requirements.txt index c503e2712a1e..9767f2837d4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,11 +7,9 @@ alembic==1.0.0 # via flask-migrate amqp==2.3.2 # via kombu asn1crypto==0.24.0 # via cryptography -babel==2.6.0 # via flask-babel, flower +babel==2.6.0 # via flask-babel billiard==3.5.0.4 # via celery bleach==3.0.2 -cachetools==3.0.0 # via google-auth -cchardet==1.0.0 # via tabulator celery==4.2.0 certifi==2018.8.24 # via requests cffi==1.11.5 # via cryptography @@ -23,7 +21,6 @@ croniter==0.3.26 cryptography==2.4.2 decorator==4.3.0 # via retry defusedxml==0.5.0 # via python3-openid -et-xmlfile==1.0.1 # via openpyxl flask-appbuilder==1.12.1 flask-babel==0.11.1 # via flask-appbuilder flask-caching==1.4.0 @@ -34,67 +31,42 @@ flask-openid==1.2.5 # via flask-appbuilder flask-sqlalchemy==2.3.2 # via flask-appbuilder, flask-migrate flask-wtf==0.14.2 flask==1.0.2 -flower==0.9.2 -future==0.16.0 # via pyhive geopy==1.11.0 -google-auth==1.6.1 # via gsheetsdb -gsheetsdb==0.1.9 gunicorn==19.8.0 humanize==0.5.1 idna==2.6 -ijson==2.3 # via tabulator isodate==0.6.0 itsdangerous==0.24 # via flask -jdcal==1.4 # via openpyxl jinja2==2.10 # via flask, flask-babel -jsonlines==1.2.0 # via tabulator -jsonschema==2.6.0 # via tableschema kombu==4.2.1 # via celery -linear-tsv==1.1.0 # via tabulator mako==1.0.7 # via alembic markdown==3.0 markupsafe==1.0 # via jinja2, mako -mo-future==2.20.18317 # via moz-sql-parser -moz-sql-parser==2.19.18318 # via gsheetsdb numpy==1.15.2 # via pandas -openpyxl==2.4.11 # via tabulator pandas==0.23.1 parsedatetime==2.0.0 pathlib2==2.3.0 polyline==1.3.2 py==1.7.0 # via retry -pyasn1-modules==0.2.2 # via google-auth -pyasn1==0.4.4 # via pyasn1-modules, rsa pycparser==2.19 # via cffi pydruid==0.5.0 -pyhive==0.5.1 -pyparsing==2.3.0 # via moz-sql-parser python-dateutil==2.6.1 python-editor==1.0.3 # via alembic python-geohash==0.8.5 python3-openid==3.1.0 # via flask-openid -pytz==2018.5 # via babel, celery, flower, pandas +pytz==2018.5 # via babel, celery, pandas pyyaml==3.13 requests==2.20.0 retry==0.9.2 -rfc3986==1.1.0 # via tableschema -rsa==4.0 # via google-auth -sasl==0.2.1 # via thrift-sasl selenium==3.141.0 simplejson==3.15.0 -six==1.11.0 # via bleach, cryptography, google-auth, gsheetsdb, isodate, jsonlines, linear-tsv, pathlib2, polyline, pydruid, python-dateutil, sasl, sqlalchemy-utils, tableschema, tabulator, thrift +six==1.11.0 # via bleach, cryptography, isodate, pathlib2, polyline, pydruid, python-dateutil, sqlalchemy-utils sqlalchemy-utils==0.32.21 sqlalchemy==1.2.2 sqlparse==0.2.4 -tableschema==1.1.0 -tabulator==1.15.0 # via tableschema -thrift-sasl==0.3.0 -thrift==0.11.0 -tornado==5.1.1 # via flower unicodecsv==0.14.1 urllib3==1.22 # via requests, selenium vine==1.1.4 # via amqp webencodings==0.5.1 # via bleach werkzeug==0.14.1 # via flask wtforms==2.2.1 # via flask-wtf -xlrd==1.1.0 # via tabulator From 042433ac8d8ce74bea9d4c95e031a8d2ee14acf1 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 18 Jan 2019 17:50:09 -0800 Subject: [PATCH 3/4] Removing refs to past.builtins (from future lib) --- requirements-dev.txt | 1 + superset/connectors/base/models.py | 3 +-- superset/connectors/sqla/views.py | 3 +-- superset/dataframe.py | 5 ++--- superset/db_engine_specs.py | 5 ++--- superset/utils/core.py | 10 ++++------ superset/viz.py | 5 ++--- tests/celery_tests.py | 4 +--- 8 files changed, 14 insertions(+), 22 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0ac1c570753d..837e7de64c2a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,6 +9,7 @@ mysqlclient==1.3.13 pip-tools==3.1.0 psycopg2-binary==2.7.5 pycodestyle==2.4.0 +pyhive==0.6.1 pylint==1.9.2 python-dotenv==0.10.1 redis==2.10.6 diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index 50ef6d8f54c1..39cc5853d6f6 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -17,7 +17,6 @@ # pylint: disable=C,R,W import json -from past.builtins import basestring from sqlalchemy import ( and_, Boolean, Column, Integer, String, Text, ) @@ -218,7 +217,7 @@ def filter_values_handler( values, target_column_is_numeric=False, is_list_target=False): def handle_single_value(v): # backward compatibility with previous