diff --git a/docs/sqllab.rst b/docs/sqllab.rst index c69e5848167c..9f9bac07623c 100644 --- a/docs/sqllab.rst +++ b/docs/sqllab.rst @@ -103,3 +103,22 @@ it's possible for administrators to expose more more macros in their environment using the configuration variable ``JINJA_CONTEXT_ADDONS``. All objects referenced in this dictionary will become available for users to integrate in their queries in **SQL Lab**. + +Query cost estimation +''''''''''''''''''''' + +Some databases support ``EXPLAIN`` queries that allow users to estimate the cost +of queries before executing this. Currently, Presto is supported in SQL Lab. To +enable query cost estimation, add the following keys to the "Extra" field in the +database configuration: + +.. code-block:: json + + { + "version": "0.319", + "cost_estimate_enabled": true, + ... + } + +Here, "version" should be the version of your Presto cluster. Support for this +functionality was introduced in Presto 0.319. diff --git a/superset/assets/src/SqlLab/actions/sqlLab.js b/superset/assets/src/SqlLab/actions/sqlLab.js index ca364284fddc..e5b11c642be8 100644 --- a/superset/assets/src/SqlLab/actions/sqlLab.js +++ b/superset/assets/src/SqlLab/actions/sqlLab.js @@ -70,6 +70,9 @@ export const CHANGE_DATA_PREVIEW_ID = 'CHANGE_DATA_PREVIEW_ID'; export const START_QUERY_VALIDATION = 'START_QUERY_VALIDATION'; export const QUERY_VALIDATION_RETURNED = 'QUERY_VALIDATION_RETURNED'; export const QUERY_VALIDATION_FAILED = 'QUERY_VALIDATION_FAILED'; +export const COST_ESTIMATE_STARTED = 'COST_ESTIMATE_STARTED'; +export const COST_ESTIMATE_RETURNED = 'COST_ESTIMATE_RETURNED'; +export const COST_ESTIMATE_FAILED = 'COST_ESTIMATE_FAILED'; export const CREATE_DATASOURCE_STARTED = 'CREATE_DATASOURCE_STARTED'; export const CREATE_DATASOURCE_SUCCESS = 'CREATE_DATASOURCE_SUCCESS'; @@ -120,6 +123,27 @@ export function scheduleQuery(query) { .catch(() => dispatch(addDangerToast(t('Your query could not be scheduled')))); } +export function estimateQueryCost(query) { + const { dbId, schema, sql, templateParams } = query; + const endpoint = schema === null + ? `/superset/estimate_query_cost/${dbId}/` + : `/superset/estimate_query_cost/${dbId}/${schema}/`; + return dispatch => Promise.all([ + dispatch({ type: COST_ESTIMATE_STARTED, query }), + SupersetClient.post({ + endpoint, + postPayload: { sql, templateParams: JSON.parse(templateParams) }, + }) + .then(({ json }) => dispatch({ type: COST_ESTIMATE_RETURNED, query, json })) + .catch(response => + getClientErrorObject(response).then((error) => { + const message = error.error || error.statusText || t('Failed at retrieving results'); + return dispatch({ type: COST_ESTIMATE_FAILED, query, error: message }); + }), + ), + ]); +} + export function startQuery(query) { Object.assign(query, { id: query.id ? query.id : shortid.generate(), diff --git a/superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx b/superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx new file mode 100644 index 000000000000..a8e794fffa10 --- /dev/null +++ b/superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import React from 'react'; +import PropTypes from 'prop-types'; +import { Table } from 'reactable-arc'; +import { Alert } from 'react-bootstrap'; +import { t } from '@superset-ui/translation'; + +import Button from '../../components/Button'; +import Loading from '../../components/Loading'; +import ModalTrigger from '../../components/ModalTrigger'; + +const propTypes = { + dbId: PropTypes.number.isRequired, + schema: PropTypes.string.isRequired, + sql: PropTypes.string.isRequired, + getEstimate: PropTypes.func.isRequired, + queryCostEstimate: PropTypes.Object, + selectedText: PropTypes.string, + tooltip: PropTypes.string, + disabled: PropTypes.bool, +}; +const defaultProps = { + queryCostEstimate: [], + tooltip: '', + disabled: false, +}; + +class EstimateQueryCostButton extends React.PureComponent { + constructor(props) { + super(props); + this.queryCostModal = React.createRef(); + this.onClick = this.onClick.bind(this); + this.renderModalBody = this.renderModalBody.bind(this); + } + + onClick() { + this.props.getEstimate(); + } + + renderModalBody() { + if (this.props.queryCostEstimate.error !== null) { + return ( + + {this.props.queryCostEstimate.error} + + ); + } else if (this.props.queryCostEstimate.completed) { + return ( + + ); + } + return ; + } + + render() { + const { disabled, selectedText, tooltip } = this.props; + const btnText = selectedText + ? t('Estimate Selected Query Cost') + : t('Estimate Query Cost'); + return ( + + + {btnText} + + } + bsSize="medium" + /> + + ); + } +} + +EstimateQueryCostButton.propTypes = propTypes; +EstimateQueryCostButton.defaultProps = defaultProps; + +export default EstimateQueryCostButton; diff --git a/superset/assets/src/SqlLab/components/SqlEditor.jsx b/superset/assets/src/SqlLab/components/SqlEditor.jsx index cb0c6179fa25..9ab0ce9f7300 100644 --- a/superset/assets/src/SqlLab/components/SqlEditor.jsx +++ b/superset/assets/src/SqlLab/components/SqlEditor.jsx @@ -39,6 +39,7 @@ import TemplateParamsEditor from './TemplateParamsEditor'; import SouthPane from './SouthPane'; import SaveQuery from './SaveQuery'; import ScheduleQueryButton from './ScheduleQueryButton'; +import EstimateQueryCostButton from './EstimateQueryCostButton'; import ShareSqlLabQuery from './ShareSqlLabQuery'; import Timer from '../../components/Timer'; import Hotkeys from '../../components/Hotkeys'; @@ -109,6 +110,7 @@ class SqlEditor extends React.PureComponent { this.requestValidation.bind(this), VALIDATION_DEBOUNCE_MS, ); + this.getQueryCostEstimate = this.getQueryCostEstimate.bind(this); this.handleWindowResize = throttle( this.handleWindowResize.bind(this), WINDOW_RESIZE_THROTTLE_MS, @@ -210,6 +212,19 @@ class SqlEditor extends React.PureComponent { setQueryLimit(queryLimit) { this.props.actions.queryEditorSetQueryLimit(this.props.queryEditor, queryLimit); } + getQueryCostEstimate() { + if (this.props.database) { + const qe = this.props.queryEditor; + const query = { + dbId: qe.dbId, + sql: qe.selectedText ? qe.selectedText : this.state.sql, + sqlEditorId: qe.id, + schema: qe.schema, + templateParams: qe.templateParams, + }; + this.props.actions.estimateQueryCost(query); + } + } handleWindowResize() { this.setState({ height: this.getSqlEditorHeight() }); } @@ -383,6 +398,23 @@ class SqlEditor extends React.PureComponent { sql={this.state.sql} /> + { + isFeatureEnabled(FeatureFlag.ESTIMATE_QUERY_COST) && + this.props.database && + this.props.database.allows_cost_estimate && + + + + } {isFeatureEnabled(FeatureFlag.SCHEDULED_QUERIES) && bool: + return False + @classmethod def get_timestamp_expr( cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str] @@ -649,6 +654,55 @@ def select_star( sql = sqlparse.format(sql, reindent=True) return sql + @classmethod + def estimate_statement_cost( + cls, statement: str, database, cursor, user_name: str + ) -> Dict[str, str]: + """ + Generate a SQL query that estimates the cost of a given statement. + + :param statement: A single SQL statement + :param database: Database instance + :param cursor: Cursor instance + :param username: Effective username + """ + raise Exception("Database does not support cost estimation") + + @classmethod + def estimate_query_cost( + cls, database, schema: str, sql: str, source: str = None + ) -> List[Dict[str, str]]: + """ + Estimate the cost of a multiple statement SQL query. + + :param database: Database instance + :param schema: Database schema + :param sql: SQL query with possibly multiple statements + :param source: Source of the query (eg, "sql_lab") + """ + database_version = database.get_extra().get("version") + if not cls.get_allow_cost_estimate(database_version): + raise Exception("Database does not support cost estimation") + + user_name = g.user.username if g.user else None + parsed_query = sql_parse.ParsedQuery(sql) + statements = parsed_query.get_statements() + + engine = database.get_sqla_engine( + schema=schema, nullpool=True, user_name=user_name, source=source + ) + + costs = [] + with closing(engine.raw_connection()) as conn: + with closing(conn.cursor()) as cursor: + for statement in statements: + costs.append( + cls.estimate_statement_cost( + statement, database, cursor, user_name + ) + ) + return costs + @classmethod def modify_url_for_impersonation(cls, url, impersonate_user: bool, username: str): """ diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 186557c978ce..13e574d7cf09 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -25,19 +25,22 @@ from typing import Any, Dict, List, Optional, Set, Tuple from urllib import parse +import simplejson as json from sqlalchemy import Column, literal_column from sqlalchemy.engine.base import Engine from sqlalchemy.engine.reflection import Inspector from sqlalchemy.engine.result import RowProxy from sqlalchemy.sql.expression import ColumnClause, Select -from superset import is_feature_enabled +from superset import app, is_feature_enabled, security_manager from superset.db_engine_specs.base import BaseEngineSpec from superset.exceptions import SupersetTemplateException from superset.models.sql_types.presto_sql_types import type_map as presto_type_map +from superset.sql_parse import ParsedQuery from superset.utils import core as utils QueryStatus = utils.QueryStatus +config = app.config # map between Presto types and Pandas pandas_dtype_map = { @@ -74,6 +77,10 @@ class PrestoEngineSpec(BaseEngineSpec): "date_add('day', 1, CAST({col} AS TIMESTAMP))))", } + @classmethod + def get_allow_cost_estimate(cls, version: str = None) -> bool: + return version is not None and StrictVersion(version) >= StrictVersion("0.319") + @classmethod def get_view_names(cls, inspector: Inspector, schema: Optional[str]) -> List[str]: """Returns an empty list @@ -388,6 +395,73 @@ def select_star( presto_cols, ) + @classmethod + def estimate_statement_cost( + cls, statement: str, database, cursor, user_name: str + ) -> Dict[str, str]: + """ + Generate a SQL query that estimates the cost of a given statement. + + :param statement: A single SQL statement + :param database: Database instance + :param cursor: Cursor instance + :param username: Effective username + """ + parsed_query = ParsedQuery(statement) + sql = parsed_query.stripped() + + SQL_QUERY_MUTATOR = config.get("SQL_QUERY_MUTATOR") + if SQL_QUERY_MUTATOR: + sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) + + sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {sql}" + cursor.execute(sql) + + # the output from Presto is a single column and a single row containing + # JSON: + # + # { + # ... + # "estimate" : { + # "outputRowCount" : 8.73265878E8, + # "outputSizeInBytes" : 3.41425774958E11, + # "cpuCost" : 3.41425774958E11, + # "maxMemory" : 0.0, + # "networkCost" : 3.41425774958E11 + # } + # } + result = json.loads(cursor.fetchone()[0]) + estimate = result["estimate"] + + def humanize(value: Any, suffix: str) -> str: + try: + value = int(value) + except ValueError: + return str(value) + + prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"] + prefix = "" + to_next_prefix = 1000 + while value > to_next_prefix and prefixes: + prefix = prefixes.pop(0) + value //= to_next_prefix + + return f"{value} {prefix}{suffix}" + + cost = {} + columns = [ + ("outputRowCount", "Output count", " rows"), + ("outputSizeInBytes", "Output size", "B"), + ("cpuCost", "CPU cost", ""), + ("maxMemory", "Max memory", "B"), + ("networkCost", "Network cost", ""), + ] + for key, label, suffix in columns: + if key in estimate: + cost[label] = humanize(estimate[key], suffix) + + return cost + @classmethod def adjust_database_uri(cls, uri, selected_schema=None): database = uri.database diff --git a/superset/models/core.py b/superset/models/core.py index e3dcafa0a701..b31fb6710c5d 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -773,6 +773,16 @@ def name(self): def allows_subquery(self): return self.db_engine_spec.allows_subqueries + @property + def allows_cost_estimate(self) -> bool: + extra = self.get_extra() + database_version = extra.get("version") + cost_estimate_enabled = extra.get("cost_estimate_enabled") + return ( + self.db_engine_spec.get_allow_cost_estimate(database_version) + and cost_estimate_enabled + ) + @property def data(self): return { @@ -781,6 +791,7 @@ def data(self): "backend": self.backend, "allow_multi_schema_metadata_fetch": self.allow_multi_schema_metadata_fetch, "allows_subquery": self.allows_subquery, + "allows_cost_estimate": self.allows_cost_estimate, } @property diff --git a/superset/views/core.py b/superset/views/core.py index b81656d8c2a1..826f04c068cb 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -71,6 +71,7 @@ DatabaseNotFound, SupersetException, SupersetSecurityException, + SupersetTimeoutException, ) from superset.jinja_context import get_template_processor from superset.legacy import update_time_range @@ -107,8 +108,12 @@ get_viz, ) + config = app.config CACHE_DEFAULT_TIMEOUT = config.get("CACHE_DEFAULT_TIMEOUT", 0) +SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = config.get( + "SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT", 10 +) stats_logger = config.get("STATS_LOGGER") DAR = models.DatasourceAccessRequest QueryStatus = utils.QueryStatus @@ -2396,6 +2401,34 @@ def select_star(self, database_id, table_name, schema=None): mydb.select_star(table_name, schema, latest_partition=True, show_cols=True) ) + @has_access_api + @expose("/estimate_query_cost//", methods=["POST"]) + @expose("/estimate_query_cost///", methods=["POST"]) + @event_logger.log_this + def estimate_query_cost(self, database_id: int, schema: str = None) -> Response: + mydb = db.session.query(models.Database).filter_by(id=database_id).one_or_none() + + sql = json.loads(request.form.get("sql", '""')) + template_params = json.loads(request.form.get("templateParams") or "{}") + if template_params: + template_processor = get_template_processor(mydb) + sql = template_processor.process_template(sql, **template_params) + + timeout = SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT + timeout_msg = f"The estimation exceeded the {timeout} seconds timeout." + try: + with utils.timeout(seconds=timeout, error_message=timeout_msg): + cost = mydb.db_engine_spec.estimate_query_cost( + mydb, schema, sql, utils.sources.get("sql_lab") + ) + except SupersetTimeoutException as e: + logging.exception(e) + return json_error_response(timeout_msg) + except Exception as e: + return json_error_response(str(e)) + + return json_success(json.dumps(cost)) + @expose("/theme/") def theme(self): return self.render_template("superset/theme.html") diff --git a/superset/views/database/api.py b/superset/views/database/api.py index 2bb8ea5f35ec..352086eb6c17 100644 --- a/superset/views/database/api.py +++ b/superset/views/database/api.py @@ -48,6 +48,7 @@ class DatabaseRestApi(DatabaseMixin, ModelRestApi): "allow_multi_schema_metadata_fetch", "allow_csv_upload", "allows_subquery", + "allows_cost_estimate", "backend", ] # Removes the local limit for the page size