Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,42 @@ Optimizer Properties
.. warning:: The number of possible join orders scales factorially with the number of relations,
so increasing this value can cause serious performance issues.

``optimizer.optimize-case-expression-predicate``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``false``

When set to true, CASE expression predicate gets simplified into a series of AND/OR clauses.
For example::

SELECT * FROM orders
WHERE (CASE
WHEN status=0 THEN ‘Pending’
WHEN status=1 THEN ‘Complete’
WHEN status=2 THEN ‘Returned’
ELSE ‘Unknown’
END) = ‘Pending’


will get simplified into::

SELECT * FROM orders
WHERE status IS NOT NULL AND status=0;

If the filter condition was to match the ELSE clause ‘Unknown’, it will get translated into::

SELECT * FROM orders
WHERE (status IS NULL OR (status!=0 AND status!=1 and status !=2));

The simplification avoids branching and string operations making it more efficient and also allows
predicate pushdown to happen avoiding a full table scan. This optimizer is to mainly address queries
generated by business intelligence tools like Looker that support human readable labels through
`case <https://cloud.google.com/looker/docs/reference/param-field-case>`_ statements.

The optimization currently only applies to simple CASE expressions where the WHEN clause conditions are
unambiguous and deterministic and on the same column with the comparison operator being equals.

Planner Properties
--------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ public PlanOptimizers(
ruleStats,
statsCalculator,
estimatedExchangesCostCalculator,
new RewriteCaseExpressionPredicate(metadata.getFunctionAndTypeManager()).rules());
new RewriteCaseExpressionPredicate(metadata).rules());

PlanOptimizer predicatePushDown = new StatsRecordingPlanOptimizer(optimizerStats, new PredicatePushDown(metadata, sqlParser));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
import com.facebook.presto.expressions.LogicalRowExpressions;
import com.facebook.presto.expressions.RowExpressionRewriter;
import com.facebook.presto.expressions.RowExpressionTreeRewriter;
import com.facebook.presto.metadata.FunctionAndTypeManager;
import com.facebook.presto.metadata.Metadata;
import com.facebook.presto.spi.relation.CallExpression;
import com.facebook.presto.spi.relation.ConstantExpression;
import com.facebook.presto.spi.relation.DeterminismEvaluator;
import com.facebook.presto.spi.relation.InputReferenceExpression;
import com.facebook.presto.spi.relation.RowExpression;
import com.facebook.presto.spi.relation.SpecialFormExpression;
Expand All @@ -45,6 +46,7 @@
import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.IS_NULL;
import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.SWITCH;
import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.WHEN;
import static com.facebook.presto.sql.planner.RowExpressionInterpreter.evaluateConstantRowExpression;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

Expand All @@ -61,18 +63,18 @@
* <p>
* can be converted into a series AND/OR clauses as below
* <p>
* (result1 = value AND expression=constant1) OR
* (result2 = value AND expression=constant2 AND !(expression=constant1)) OR
* (result3 = value AND expression=constant3 AND !(expression=constant1) AND !(expression=constant2)) OR
* (elseResult = value AND !(expression=constant1) AND !(expression=constant2) AND !(expression=constant3))
* (result1 = value AND expression IS NOT NULL AND expression=constant1) OR
* (result2 = value AND expression IS NOT NULL AND expression=constant2 AND !(expression=constant1)) OR
* (result3 = value AND expression IS NOT NULL AND expression=constant3 AND !(expression=constant1) AND !(expression=constant2)) OR
* (elseResult = value AND ((expression IS NULL) OR (!(expression=constant1) AND !(expression=constant2) AND !(expression=constant3))))
* <p>
* The above conversion evaluates the conditions in WHEN clauses multiple times. But if we ensure these conditions are
* disjunct, we can skip all the NOT of previous WHEN conditions and simplify the expression to:
* <p>
* (result1 = value AND expression=constant1) OR
* (result2 = value AND expression=constant2) OR
* (result3 = value AND expression=constant3) OR
* (elseResult = value AND !(expression=constant1) AND !(expression=constant2) AND !(expression=constant3))
* (result1 = value AND expression IS NOT NULL AND expression=constant1) OR
* (result2 = value AND expression IS NOT NULL AND expression=constant2) OR
* (result3 = value AND expression IS NOT NULL AND expression=constant3) OR
* (elseResult = value AND ((expression IS NULL) OR (!(expression=constant1) AND !(expression=constant2) AND !(expression=constant3)))
* <p>
* To ensure the WHEN conditions are disjunct, the following criteria needs to be met:
* 1. Value is either a constant or column reference or input reference and not any function
Expand All @@ -89,47 +91,56 @@
public class RewriteCaseExpressionPredicate
extends RowExpressionRewriteRuleSet
{
public RewriteCaseExpressionPredicate(FunctionAndTypeManager functionAndTypeManager)
public RewriteCaseExpressionPredicate(Metadata metadata)
{
super(new Rewriter(functionAndTypeManager));
super(new Rewriter(metadata));
}

private static class Rewriter
implements PlanRowExpressionRewriter
{
private final CaseExpressionPredicateRewriter caseExpressionPredicateRewriter;
private final Metadata metadata;

public Rewriter(FunctionAndTypeManager functionAndTypeManager)
public Rewriter(Metadata metadata)
{
requireNonNull(functionAndTypeManager, "functionAndTypeManager is null");
this.caseExpressionPredicateRewriter = new CaseExpressionPredicateRewriter(functionAndTypeManager);
this.metadata = requireNonNull(metadata, "metadata is null");
}

@Override
public RowExpression rewrite(RowExpression expression, Rule.Context context)
{
return RowExpressionTreeRewriter.rewriteWith(caseExpressionPredicateRewriter, expression);
return RowExpressionTreeRewriter.rewriteWith(new CaseExpressionPredicateRewriter(this.metadata, context.getSession()), expression);
}
}

private static class CaseExpressionPredicateRewriter
extends RowExpressionRewriter<Void>
{
private final Metadata metadata;
private final Session session;
private final FunctionResolution functionResolution;
private final LogicalRowExpressions logicalRowExpressions;
private final DeterminismEvaluator determinismEvaluator;

private CaseExpressionPredicateRewriter(FunctionAndTypeManager functionAndTypeManager)
private CaseExpressionPredicateRewriter(Metadata metadata, Session session)
{
this.functionResolution = new FunctionResolution(functionAndTypeManager);
this.metadata = requireNonNull(metadata, "metadata is null");
this.session = requireNonNull(session, "session is null");
this.functionResolution = new FunctionResolution(metadata.getFunctionAndTypeManager());
this.logicalRowExpressions = new LogicalRowExpressions(
new RowExpressionDeterminismEvaluator(functionAndTypeManager),
new RowExpressionDeterminismEvaluator(metadata.getFunctionAndTypeManager()),
functionResolution,
functionAndTypeManager);
metadata.getFunctionAndTypeManager());
this.determinismEvaluator = new RowExpressionDeterminismEvaluator(metadata.getFunctionAndTypeManager());
}

@Override
public RowExpression rewriteCall(CallExpression node, Void context, RowExpressionTreeRewriter<Void> treeRewriter)
{
RowExpression rewritten = node;
if (!determinismEvaluator.isDeterministic(node)) {
return treeRewriter.defaultRewrite(rewritten, context);
}
if (functionResolution.isComparisonFunction(node.getFunctionHandle()) && node.getArguments().size() == 2) {
RowExpression left = node.getArguments().get(0);
RowExpression right = node.getArguments().get(1);
Expand All @@ -140,7 +151,7 @@ else if (isCaseExpression(right) && isSimpleExpression(left)) {
return processCaseExpression(right, expression -> replaceArguments(node, left, expression), left);
}
}
return null;
return treeRewriter.defaultRewrite(rewritten, context);
}

private boolean isCaseExpression(RowExpression expression)
Expand Down Expand Up @@ -221,6 +232,17 @@ private RowExpression processCaseExpression(SpecialFormExpression caseExpression
ImmutableList.Builder<RowExpression> andExpressions = new ImmutableList.Builder<>();
ImmutableList.Builder<RowExpression> invertedOperands = new ImmutableList.Builder<>();

RowExpression nullCheckExpression;
if (caseOperand.isPresent()) {
nullCheckExpression = new SpecialFormExpression(IS_NULL, BOOLEAN, caseOperand.get());
}
else {
RowExpression whenOperand = whenClauses.stream().findFirst()
.map(whenClause -> ((SpecialFormExpression) whenClause).getArguments().get(0))
.orElseThrow(() -> new IllegalArgumentException("When clause is empty"));
nullCheckExpression = new SpecialFormExpression(IS_NULL, BOOLEAN, ((CallExpression) whenOperand).getArguments().get(0));
}

for (RowExpression whenClause : whenClauses) {
RowExpression whenOperand = ((SpecialFormExpression) whenClause).getArguments().get(0);
if (caseOperand.isPresent()) {
Expand All @@ -232,12 +254,15 @@ private RowExpression processCaseExpression(SpecialFormExpression caseExpression
}

RowExpression comparisonExpression = comparisonExpressionGenerator.apply(whenResult);
andExpressions.add(and(comparisonExpression, whenOperand));
andExpressions.add(and(
comparisonExpression,
logicalRowExpressions.notCallExpression(nullCheckExpression),
whenOperand));
invertedOperands.add(logicalRowExpressions.notCallExpression(whenOperand));
}
RowExpression elseCondition = and(
getElseExpression(castExpression, value, elseResult, comparisonExpressionGenerator),
and(invertedOperands.build()));
or(nullCheckExpression, and(invertedOperands.build())));
andExpressions.add(elseCondition);

return or(andExpressions.build());
Expand Down Expand Up @@ -298,12 +323,16 @@ private boolean allAreEqualsExpression(List<RowExpression> whenClauses)

private boolean allExpressionsAreConstantAndUnique(List<RowExpression> expressions)
{
Set<RowExpression> expressionSet = new HashSet<>();
Set<Object> literals = new HashSet<>();
for (RowExpression expression : expressions) {
if (!isConstantExpression(expression) || expressionSet.contains(expression)) {
if (!isConstantExpression(expression)) {
return false;
}
Object constantExpression = evaluateConstantRowExpression(expression, metadata, session.toConnectorSession());
if (constantExpression == null || literals.contains(constantExpression)) {
return false;
}
expressionSet.add(expression);
literals.add(constantExpression);
}
return true;
}
Expand All @@ -327,6 +356,7 @@ public boolean isRewriterEnabled(Session session)
public Set<Rule<?>> rules()
{
return ImmutableSet.of(
projectRowExpressionRewriteRule(),
filterRowExpressionRewriteRule(),
joinRowExpressionRewriteRule());
}
Expand Down
Loading