Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import static io.trino.cost.SymbolStatsEstimate.buildFrom;
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
import static io.trino.util.MoreMath.averageExcludingNaNs;
import static io.trino.util.MoreMath.max;
import static io.trino.util.MoreMath.maxExcludeNaN;
import static io.trino.util.MoreMath.min;
import static io.trino.util.MoreMath.minExcludeNaN;
import static java.lang.Double.NEGATIVE_INFINITY;
import static java.lang.Double.NaN;
import static java.lang.Double.POSITIVE_INFINITY;
Expand All @@ -31,6 +33,11 @@

public final class ComparisonStatsCalculator
{
// We assume uniform distribution of values within each range.
// Within the overlapping range, we assume that all pairs of distinct values from both ranges exist.
// Based on the above, we estimate that half of the pairs of values will match inequality predicate on average.
public static final double OVERLAPPING_RANGE_INEQUALITY_FILTER_COEFFICIENT = 0.5;
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated

private ComparisonStatsCalculator() {}

public static PlanNodeStatsEstimate estimateExpressionToLiteralComparison(
Expand Down Expand Up @@ -164,6 +171,13 @@ public static PlanNodeStatsEstimate estimateExpressionToExpressionComparison(
case LESS_THAN_OR_EQUAL:
case GREATER_THAN:
case GREATER_THAN_OR_EQUAL:
return estimateExpressionToExpressionInequality(
operator,
inputStatistics,
leftExpressionStatistics,
leftExpressionSymbol,
rightExpressionStatistics,
rightExpressionSymbol);
case IS_DISTINCT_FROM:
return PlanNodeStatsEstimate.unknown();
}
Expand Down Expand Up @@ -239,4 +253,128 @@ private static PlanNodeStatsEstimate estimateExpressionNotEqualToExpression(
rightExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, rightNullsFiltered));
return result.build();
}

private static PlanNodeStatsEstimate estimateExpressionToExpressionInequality(
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
ComparisonExpression.Operator operator,
PlanNodeStatsEstimate inputStatistics,
SymbolStatsEstimate leftExpressionStatistics,
Optional<Symbol> leftExpressionSymbol,
SymbolStatsEstimate rightExpressionStatistics,
Optional<Symbol> rightExpressionSymbol)
{
if (leftExpressionStatistics.isUnknown() || rightExpressionStatistics.isUnknown()) {
return PlanNodeStatsEstimate.unknown();
}
if (isNaN(leftExpressionStatistics.getNullsFraction()) && isNaN(rightExpressionStatistics.getNullsFraction())) {
Comment thread
sopel39 marked this conversation as resolved.
Outdated
return PlanNodeStatsEstimate.unknown();
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
}
if (leftExpressionStatistics.statisticRange().isEmpty() || rightExpressionStatistics.statisticRange().isEmpty()) {
return inputStatistics.mapOutputRowCount(rowCount -> 0.0);
}

// We don't know the correlation between NULLs, so we take the max nullsFraction from the expression statistics
// to make a conservative estimate (nulls are fully correlated) for the NULLs filter factor
double nullsFilterFactor = 1 - maxExcludeNaN(leftExpressionStatistics.getNullsFraction(), rightExpressionStatistics.getNullsFraction());
switch (operator) {
case LESS_THAN:
case LESS_THAN_OR_EQUAL:
return estimateExpressionLessThanOrEqualToExpression(
inputStatistics,
leftExpressionStatistics,
leftExpressionSymbol,
rightExpressionStatistics,
rightExpressionSymbol,
nullsFilterFactor);
case GREATER_THAN:
case GREATER_THAN_OR_EQUAL:
Comment thread
sopel39 marked this conversation as resolved.
Outdated
return estimateExpressionLessThanOrEqualToExpression(
inputStatistics,
rightExpressionStatistics,
rightExpressionSymbol,
leftExpressionStatistics,
leftExpressionSymbol,
nullsFilterFactor);
default:
throw new IllegalArgumentException("Unsupported inequality operator " + operator);
}
}

private static PlanNodeStatsEstimate estimateExpressionLessThanOrEqualToExpression(
PlanNodeStatsEstimate inputStatistics,
SymbolStatsEstimate leftExpressionStatistics,
Optional<Symbol> leftExpressionSymbol,
SymbolStatsEstimate rightExpressionStatistics,
Optional<Symbol> rightExpressionSymbol,
double nullsFilterFactor)
{
StatisticRange leftRange = StatisticRange.from(leftExpressionStatistics);
StatisticRange rightRange = StatisticRange.from(rightExpressionStatistics);
// left is always greater than right, no overlap
if (leftRange.getLow() > rightRange.getHigh()) {
return inputStatistics.mapOutputRowCount(rowCount -> 0.0);
}
// left is always lesser than right
if (leftRange.getHigh() < rightRange.getLow()) {
PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics);
leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(
symbol,
leftExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0)));
rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(
symbol,
rightExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0)));
return estimate.setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor)
.build();
}

PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics);
double leftOverlappingRangeFraction = leftRange.overlapPercentWith(rightRange);
double leftAlwaysLessRangeFraction;
if (leftRange.getLow() < rightRange.getLow()) {
leftAlwaysLessRangeFraction = min(
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
leftRange.overlapPercentWith(new StatisticRange(leftRange.getLow(), rightRange.getLow(), NaN)),
// Prevents expanding NDVs in case range fractions addition goes beyond 1 for infinite ranges
1 - leftOverlappingRangeFraction);
}
else {
leftAlwaysLessRangeFraction = 0;
}
leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(
symbol,
SymbolStatsEstimate.builder()
.setLowValue(leftRange.getLow())
.setHighValue(minExcludeNaN(leftRange.getHigh(), rightRange.getHigh()))
.setAverageRowSize(leftExpressionStatistics.getAverageRowSize())
.setDistinctValuesCount(leftExpressionStatistics.getDistinctValuesCount() * (leftAlwaysLessRangeFraction + leftOverlappingRangeFraction))
.setNullsFraction(0)
.build()));

double rightOverlappingRangeFraction = rightRange.overlapPercentWith(leftRange);
double rightAlwaysGreaterRangeFraction;
if (leftRange.getHigh() < rightRange.getHigh()) {
rightAlwaysGreaterRangeFraction = min(
rightRange.overlapPercentWith(new StatisticRange(leftRange.getHigh(), rightRange.getHigh(), NaN)),
// Prevents expanding NDVs in case range fractions addition goes beyond 1 for infinite ranges
1 - rightOverlappingRangeFraction);
}
else {
rightAlwaysGreaterRangeFraction = 0;
}
rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(
symbol,
SymbolStatsEstimate.builder()
.setLowValue(maxExcludeNaN(leftRange.getLow(), rightRange.getLow()))
.setHighValue(rightRange.getHigh())
.setAverageRowSize(rightExpressionStatistics.getAverageRowSize())
.setDistinctValuesCount(rightExpressionStatistics.getDistinctValuesCount() * (rightOverlappingRangeFraction + rightAlwaysGreaterRangeFraction))
.setNullsFraction(0)
.build()));
double filterFactor =
// all left range values which are below right range are selected
leftAlwaysLessRangeFraction +
// for pairs in overlapping range, only half of pairs are selected
leftOverlappingRangeFraction * rightOverlappingRangeFraction * OVERLAPPING_RANGE_INEQUALITY_FILTER_COEFFICIENT +
// all pairs where left value is in overlapping range and right value is above left range are selected
leftOverlappingRangeFraction * rightAlwaysGreaterRangeFraction;
return estimate.setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor * filterFactor).build();
Comment thread
sopel39 marked this conversation as resolved.
Outdated
}
}
Loading