Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docs/sqllab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,36 @@ database configuration:
Here, "version" should be the version of your Presto cluster. Support for this
functionality was introduced in Presto 0.319.

You also need to enable the feature flag in your `superset_config.py`, and you
can optionally specify a custom formatter. Eg:

.. code-block:: python

def presto_query_cost_formatter(cost_estimate: List[Dict[str, float]]) -> List[Dict[str, str]]:
"""
Format cost estimate returned by Presto.

:param cost_estimate: JSON estimate from Presto
:return: Human readable cost estimate
"""
# Convert cost to dollars based on CPU and network cost. These coefficients are just
# examples, they need to be estimated based on your infrastructure.
cpu_coefficient = 2e-12
network_coefficient = 1e-12

cost = 0
for row in cost_estimate:
cost += row.get("cpuCost", 0) * cpu_coefficient
cost += row.get("networkCost", 0) * network_coefficient

return [{"Cost": f"US$ {cost:.2f}"}]


DEFAULT_FEATURE_FLAGS = {
"ESTIMATE_QUERY_COST": True,
"QUERY_COST_FORMATTERS_BY_ENGINE": {"presto": presto_query_cost_formatter},
}

.. _ref_ctas_engine_config:

Create Table As (CTAS)
Expand Down
15 changes: 14 additions & 1 deletion superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,14 +674,27 @@ def select_star( # pylint: disable=too-many-arguments,too-many-locals
@classmethod
def estimate_statement_cost(
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, str]:
) -> Dict[str, Any]:
"""
Generate a SQL query that estimates the cost of a given statement.

:param statement: A single SQL statement
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
:return: Dictionary with different costs
"""
raise Exception("Database does not support cost estimation")

@classmethod
def query_cost_formatter(
cls, raw_cost: List[Dict[str, Any]]
) -> List[Dict[str, str]]:
"""
Format cost estimate.

:param raw_cost: Raw estimate from `estimate_query_cost`
:return: Human readable cost estimate
"""
raise Exception("Database does not support cost estimation")

Expand Down
29 changes: 22 additions & 7 deletions superset/db_engine_specs/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,14 +443,15 @@ def select_star( # pylint: disable=too-many-arguments
@classmethod
def estimate_statement_cost( # pylint: disable=too-many-locals
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, str]:
) -> Dict[str, float]:
"""
Generate a SQL query that estimates the cost of a given statement.
Run a SQL query that estimates the cost of a given statement.

:param statement: A single SQL statement
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
:return: JSON estimate from Presto
"""
parsed_query = ParsedQuery(statement)
sql = parsed_query.stripped()
Expand All @@ -476,7 +477,18 @@ def estimate_statement_cost( # pylint: disable=too-many-locals
# }
# }
result = json.loads(cursor.fetchone()[0])
estimate = result["estimate"]
return result["estimate"]

@classmethod
def query_cost_formatter(
cls, raw_cost: List[Dict[str, float]]
) -> List[Dict[str, str]]:
"""
Format cost estimate.

:param raw_cost: JSON estimate from Presto
:return: Human readable cost estimate
"""

def humanize(value: Any, suffix: str) -> str:
try:
Expand All @@ -493,17 +505,20 @@ def humanize(value: Any, suffix: str) -> str:

return f"{value} {prefix}{suffix}"

cost = {}
cost = []
columns = [
("outputRowCount", "Output count", " rows"),
("outputSizeInBytes", "Output size", "B"),
("cpuCost", "CPU cost", ""),
("maxMemory", "Max memory", "B"),
("networkCost", "Network cost", ""),
]
for key, label, suffix in columns:
if key in estimate:
cost[label] = humanize(estimate[key], suffix)
for row in raw_cost:
statement_cost = {}
for key, label, suffix in columns:
if key in row:
statement_cost[label] = humanize(row[key], suffix).strip()
cost.append(statement_cost)

return cost

Expand Down
9 changes: 9 additions & 0 deletions superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2410,6 +2410,15 @@ def estimate_query_cost(self, database_id: int, schema: str = None) -> Response:
except Exception as e:
return json_error_response(str(e))

spec = mydb.db_engine_spec
query_cost_formatters = get_feature_flags().get(
"QUERY_COST_FORMATTERS_BY_ENGINE", {}
)
query_cost_formatter = query_cost_formatters.get(
spec.engine, spec.query_cost_formatter
)
cost = query_cost_formatter(cost)

return json_success(json.dumps(cost))

@expose("/theme/")
Expand Down