diff --git a/docs/sqllab.rst b/docs/sqllab.rst
index c69e5848167c..9f9bac07623c 100644
--- a/docs/sqllab.rst
+++ b/docs/sqllab.rst
@@ -103,3 +103,22 @@ it's possible for administrators to expose more more macros in their
environment using the configuration variable ``JINJA_CONTEXT_ADDONS``.
All objects referenced in this dictionary will become available for users
to integrate in their queries in **SQL Lab**.
+
+Query cost estimation
+'''''''''''''''''''''
+
+Some databases support ``EXPLAIN`` queries that allow users to estimate the cost
+of queries before executing this. Currently, Presto is supported in SQL Lab. To
+enable query cost estimation, add the following keys to the "Extra" field in the
+database configuration:
+
+.. code-block:: json
+
+ {
+ "version": "0.319",
+ "cost_estimate_enabled": true,
+ ...
+ }
+
+Here, "version" should be the version of your Presto cluster. Support for this
+functionality was introduced in Presto 0.319.
diff --git a/superset/assets/src/SqlLab/actions/sqlLab.js b/superset/assets/src/SqlLab/actions/sqlLab.js
index ca364284fddc..e5b11c642be8 100644
--- a/superset/assets/src/SqlLab/actions/sqlLab.js
+++ b/superset/assets/src/SqlLab/actions/sqlLab.js
@@ -70,6 +70,9 @@ export const CHANGE_DATA_PREVIEW_ID = 'CHANGE_DATA_PREVIEW_ID';
export const START_QUERY_VALIDATION = 'START_QUERY_VALIDATION';
export const QUERY_VALIDATION_RETURNED = 'QUERY_VALIDATION_RETURNED';
export const QUERY_VALIDATION_FAILED = 'QUERY_VALIDATION_FAILED';
+export const COST_ESTIMATE_STARTED = 'COST_ESTIMATE_STARTED';
+export const COST_ESTIMATE_RETURNED = 'COST_ESTIMATE_RETURNED';
+export const COST_ESTIMATE_FAILED = 'COST_ESTIMATE_FAILED';
export const CREATE_DATASOURCE_STARTED = 'CREATE_DATASOURCE_STARTED';
export const CREATE_DATASOURCE_SUCCESS = 'CREATE_DATASOURCE_SUCCESS';
@@ -120,6 +123,27 @@ export function scheduleQuery(query) {
.catch(() => dispatch(addDangerToast(t('Your query could not be scheduled'))));
}
+export function estimateQueryCost(query) {
+ const { dbId, schema, sql, templateParams } = query;
+ const endpoint = schema === null
+ ? `/superset/estimate_query_cost/${dbId}/`
+ : `/superset/estimate_query_cost/${dbId}/${schema}/`;
+ return dispatch => Promise.all([
+ dispatch({ type: COST_ESTIMATE_STARTED, query }),
+ SupersetClient.post({
+ endpoint,
+ postPayload: { sql, templateParams: JSON.parse(templateParams) },
+ })
+ .then(({ json }) => dispatch({ type: COST_ESTIMATE_RETURNED, query, json }))
+ .catch(response =>
+ getClientErrorObject(response).then((error) => {
+ const message = error.error || error.statusText || t('Failed at retrieving results');
+ return dispatch({ type: COST_ESTIMATE_FAILED, query, error: message });
+ }),
+ ),
+ ]);
+}
+
export function startQuery(query) {
Object.assign(query, {
id: query.id ? query.id : shortid.generate(),
diff --git a/superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx b/superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx
new file mode 100644
index 000000000000..a8e794fffa10
--- /dev/null
+++ b/superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import React from 'react';
+import PropTypes from 'prop-types';
+import { Table } from 'reactable-arc';
+import { Alert } from 'react-bootstrap';
+import { t } from '@superset-ui/translation';
+
+import Button from '../../components/Button';
+import Loading from '../../components/Loading';
+import ModalTrigger from '../../components/ModalTrigger';
+
+const propTypes = {
+ dbId: PropTypes.number.isRequired,
+ schema: PropTypes.string.isRequired,
+ sql: PropTypes.string.isRequired,
+ getEstimate: PropTypes.func.isRequired,
+ queryCostEstimate: PropTypes.Object,
+ selectedText: PropTypes.string,
+ tooltip: PropTypes.string,
+ disabled: PropTypes.bool,
+};
+const defaultProps = {
+ queryCostEstimate: [],
+ tooltip: '',
+ disabled: false,
+};
+
+class EstimateQueryCostButton extends React.PureComponent {
+ constructor(props) {
+ super(props);
+ this.queryCostModal = React.createRef();
+ this.onClick = this.onClick.bind(this);
+ this.renderModalBody = this.renderModalBody.bind(this);
+ }
+
+ onClick() {
+ this.props.getEstimate();
+ }
+
+ renderModalBody() {
+ if (this.props.queryCostEstimate.error !== null) {
+ return (
+
+ {this.props.queryCostEstimate.error}
+
+ );
+ } else if (this.props.queryCostEstimate.completed) {
+ return (
+
+ );
+ }
+ return ;
+ }
+
+ render() {
+ const { disabled, selectedText, tooltip } = this.props;
+ const btnText = selectedText
+ ? t('Estimate Selected Query Cost')
+ : t('Estimate Query Cost');
+ return (
+
+
+ {btnText}
+
+ }
+ bsSize="medium"
+ />
+
+ );
+ }
+}
+
+EstimateQueryCostButton.propTypes = propTypes;
+EstimateQueryCostButton.defaultProps = defaultProps;
+
+export default EstimateQueryCostButton;
diff --git a/superset/assets/src/SqlLab/components/SqlEditor.jsx b/superset/assets/src/SqlLab/components/SqlEditor.jsx
index cb0c6179fa25..9ab0ce9f7300 100644
--- a/superset/assets/src/SqlLab/components/SqlEditor.jsx
+++ b/superset/assets/src/SqlLab/components/SqlEditor.jsx
@@ -39,6 +39,7 @@ import TemplateParamsEditor from './TemplateParamsEditor';
import SouthPane from './SouthPane';
import SaveQuery from './SaveQuery';
import ScheduleQueryButton from './ScheduleQueryButton';
+import EstimateQueryCostButton from './EstimateQueryCostButton';
import ShareSqlLabQuery from './ShareSqlLabQuery';
import Timer from '../../components/Timer';
import Hotkeys from '../../components/Hotkeys';
@@ -109,6 +110,7 @@ class SqlEditor extends React.PureComponent {
this.requestValidation.bind(this),
VALIDATION_DEBOUNCE_MS,
);
+ this.getQueryCostEstimate = this.getQueryCostEstimate.bind(this);
this.handleWindowResize = throttle(
this.handleWindowResize.bind(this),
WINDOW_RESIZE_THROTTLE_MS,
@@ -210,6 +212,19 @@ class SqlEditor extends React.PureComponent {
setQueryLimit(queryLimit) {
this.props.actions.queryEditorSetQueryLimit(this.props.queryEditor, queryLimit);
}
+ getQueryCostEstimate() {
+ if (this.props.database) {
+ const qe = this.props.queryEditor;
+ const query = {
+ dbId: qe.dbId,
+ sql: qe.selectedText ? qe.selectedText : this.state.sql,
+ sqlEditorId: qe.id,
+ schema: qe.schema,
+ templateParams: qe.templateParams,
+ };
+ this.props.actions.estimateQueryCost(query);
+ }
+ }
handleWindowResize() {
this.setState({ height: this.getSqlEditorHeight() });
}
@@ -383,6 +398,23 @@ class SqlEditor extends React.PureComponent {
sql={this.state.sql}
/>
+ {
+ isFeatureEnabled(FeatureFlag.ESTIMATE_QUERY_COST) &&
+ this.props.database &&
+ this.props.database.allows_cost_estimate &&
+
+
+
+ }
{isFeatureEnabled(FeatureFlag.SCHEDULED_QUERIES) &&
bool:
+ return False
+
@classmethod
def get_timestamp_expr(
cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
@@ -649,6 +654,55 @@ def select_star(
sql = sqlparse.format(sql, reindent=True)
return sql
+ @classmethod
+ def estimate_statement_cost(
+ cls, statement: str, database, cursor, user_name: str
+ ) -> Dict[str, str]:
+ """
+ Generate a SQL query that estimates the cost of a given statement.
+
+ :param statement: A single SQL statement
+ :param database: Database instance
+ :param cursor: Cursor instance
+ :param username: Effective username
+ """
+ raise Exception("Database does not support cost estimation")
+
+ @classmethod
+ def estimate_query_cost(
+ cls, database, schema: str, sql: str, source: str = None
+ ) -> List[Dict[str, str]]:
+ """
+ Estimate the cost of a multiple statement SQL query.
+
+ :param database: Database instance
+ :param schema: Database schema
+ :param sql: SQL query with possibly multiple statements
+ :param source: Source of the query (eg, "sql_lab")
+ """
+ database_version = database.get_extra().get("version")
+ if not cls.get_allow_cost_estimate(database_version):
+ raise Exception("Database does not support cost estimation")
+
+ user_name = g.user.username if g.user else None
+ parsed_query = sql_parse.ParsedQuery(sql)
+ statements = parsed_query.get_statements()
+
+ engine = database.get_sqla_engine(
+ schema=schema, nullpool=True, user_name=user_name, source=source
+ )
+
+ costs = []
+ with closing(engine.raw_connection()) as conn:
+ with closing(conn.cursor()) as cursor:
+ for statement in statements:
+ costs.append(
+ cls.estimate_statement_cost(
+ statement, database, cursor, user_name
+ )
+ )
+ return costs
+
@classmethod
def modify_url_for_impersonation(cls, url, impersonate_user: bool, username: str):
"""
diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
index 186557c978ce..13e574d7cf09 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -25,19 +25,22 @@
from typing import Any, Dict, List, Optional, Set, Tuple
from urllib import parse
+import simplejson as json
from sqlalchemy import Column, literal_column
from sqlalchemy.engine.base import Engine
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.engine.result import RowProxy
from sqlalchemy.sql.expression import ColumnClause, Select
-from superset import is_feature_enabled
+from superset import app, is_feature_enabled, security_manager
from superset.db_engine_specs.base import BaseEngineSpec
from superset.exceptions import SupersetTemplateException
from superset.models.sql_types.presto_sql_types import type_map as presto_type_map
+from superset.sql_parse import ParsedQuery
from superset.utils import core as utils
QueryStatus = utils.QueryStatus
+config = app.config
# map between Presto types and Pandas
pandas_dtype_map = {
@@ -74,6 +77,10 @@ class PrestoEngineSpec(BaseEngineSpec):
"date_add('day', 1, CAST({col} AS TIMESTAMP))))",
}
+ @classmethod
+ def get_allow_cost_estimate(cls, version: str = None) -> bool:
+ return version is not None and StrictVersion(version) >= StrictVersion("0.319")
+
@classmethod
def get_view_names(cls, inspector: Inspector, schema: Optional[str]) -> List[str]:
"""Returns an empty list
@@ -388,6 +395,73 @@ def select_star(
presto_cols,
)
+ @classmethod
+ def estimate_statement_cost(
+ cls, statement: str, database, cursor, user_name: str
+ ) -> Dict[str, str]:
+ """
+ Generate a SQL query that estimates the cost of a given statement.
+
+ :param statement: A single SQL statement
+ :param database: Database instance
+ :param cursor: Cursor instance
+ :param username: Effective username
+ """
+ parsed_query = ParsedQuery(statement)
+ sql = parsed_query.stripped()
+
+ SQL_QUERY_MUTATOR = config.get("SQL_QUERY_MUTATOR")
+ if SQL_QUERY_MUTATOR:
+ sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database)
+
+ sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {sql}"
+ cursor.execute(sql)
+
+ # the output from Presto is a single column and a single row containing
+ # JSON:
+ #
+ # {
+ # ...
+ # "estimate" : {
+ # "outputRowCount" : 8.73265878E8,
+ # "outputSizeInBytes" : 3.41425774958E11,
+ # "cpuCost" : 3.41425774958E11,
+ # "maxMemory" : 0.0,
+ # "networkCost" : 3.41425774958E11
+ # }
+ # }
+ result = json.loads(cursor.fetchone()[0])
+ estimate = result["estimate"]
+
+ def humanize(value: Any, suffix: str) -> str:
+ try:
+ value = int(value)
+ except ValueError:
+ return str(value)
+
+ prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"]
+ prefix = ""
+ to_next_prefix = 1000
+ while value > to_next_prefix and prefixes:
+ prefix = prefixes.pop(0)
+ value //= to_next_prefix
+
+ return f"{value} {prefix}{suffix}"
+
+ cost = {}
+ columns = [
+ ("outputRowCount", "Output count", " rows"),
+ ("outputSizeInBytes", "Output size", "B"),
+ ("cpuCost", "CPU cost", ""),
+ ("maxMemory", "Max memory", "B"),
+ ("networkCost", "Network cost", ""),
+ ]
+ for key, label, suffix in columns:
+ if key in estimate:
+ cost[label] = humanize(estimate[key], suffix)
+
+ return cost
+
@classmethod
def adjust_database_uri(cls, uri, selected_schema=None):
database = uri.database
diff --git a/superset/models/core.py b/superset/models/core.py
index e3dcafa0a701..b31fb6710c5d 100755
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -773,6 +773,16 @@ def name(self):
def allows_subquery(self):
return self.db_engine_spec.allows_subqueries
+ @property
+ def allows_cost_estimate(self) -> bool:
+ extra = self.get_extra()
+ database_version = extra.get("version")
+ cost_estimate_enabled = extra.get("cost_estimate_enabled")
+ return (
+ self.db_engine_spec.get_allow_cost_estimate(database_version)
+ and cost_estimate_enabled
+ )
+
@property
def data(self):
return {
@@ -781,6 +791,7 @@ def data(self):
"backend": self.backend,
"allow_multi_schema_metadata_fetch": self.allow_multi_schema_metadata_fetch,
"allows_subquery": self.allows_subquery,
+ "allows_cost_estimate": self.allows_cost_estimate,
}
@property
diff --git a/superset/views/core.py b/superset/views/core.py
index b81656d8c2a1..826f04c068cb 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -71,6 +71,7 @@
DatabaseNotFound,
SupersetException,
SupersetSecurityException,
+ SupersetTimeoutException,
)
from superset.jinja_context import get_template_processor
from superset.legacy import update_time_range
@@ -107,8 +108,12 @@
get_viz,
)
+
config = app.config
CACHE_DEFAULT_TIMEOUT = config.get("CACHE_DEFAULT_TIMEOUT", 0)
+SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = config.get(
+ "SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT", 10
+)
stats_logger = config.get("STATS_LOGGER")
DAR = models.DatasourceAccessRequest
QueryStatus = utils.QueryStatus
@@ -2396,6 +2401,34 @@ def select_star(self, database_id, table_name, schema=None):
mydb.select_star(table_name, schema, latest_partition=True, show_cols=True)
)
+ @has_access_api
+ @expose("/estimate_query_cost//", methods=["POST"])
+ @expose("/estimate_query_cost///", methods=["POST"])
+ @event_logger.log_this
+ def estimate_query_cost(self, database_id: int, schema: str = None) -> Response:
+ mydb = db.session.query(models.Database).filter_by(id=database_id).one_or_none()
+
+ sql = json.loads(request.form.get("sql", '""'))
+ template_params = json.loads(request.form.get("templateParams") or "{}")
+ if template_params:
+ template_processor = get_template_processor(mydb)
+ sql = template_processor.process_template(sql, **template_params)
+
+ timeout = SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT
+ timeout_msg = f"The estimation exceeded the {timeout} seconds timeout."
+ try:
+ with utils.timeout(seconds=timeout, error_message=timeout_msg):
+ cost = mydb.db_engine_spec.estimate_query_cost(
+ mydb, schema, sql, utils.sources.get("sql_lab")
+ )
+ except SupersetTimeoutException as e:
+ logging.exception(e)
+ return json_error_response(timeout_msg)
+ except Exception as e:
+ return json_error_response(str(e))
+
+ return json_success(json.dumps(cost))
+
@expose("/theme/")
def theme(self):
return self.render_template("superset/theme.html")
diff --git a/superset/views/database/api.py b/superset/views/database/api.py
index 2bb8ea5f35ec..352086eb6c17 100644
--- a/superset/views/database/api.py
+++ b/superset/views/database/api.py
@@ -48,6 +48,7 @@ class DatabaseRestApi(DatabaseMixin, ModelRestApi):
"allow_multi_schema_metadata_fetch",
"allow_csv_upload",
"allows_subquery",
+ "allows_cost_estimate",
"backend",
]
# Removes the local limit for the page size