Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ private static PlanNodeStatsEstimate estimateExpressionEqualToLiteral(
filterRange = new StatisticRange(literalValue.getAsDouble(), literalValue.getAsDouble(), 1);
}
else {
// When the literal cannot be represented as a double and the column has no NDV
// and no range, StatisticRange.overlapPercentWith falls back to the
// infinite-to-infinite 0.5 heuristic, which is meant for range overlap, not point
// equality. Treat the selectivity as unknown instead.
if (isNaN(expressionStatistics.getDistinctValuesCount())
&& !isFinite(expressionStatistics.getLowValue())
&& !isFinite(expressionStatistics.getHighValue())) {
return PlanNodeStatsEstimate.unknown();
Comment thread
raunaqmorarka marked this conversation as resolved.
}
filterRange = new StatisticRange(NEGATIVE_INFINITY, POSITIVE_INFINITY, 1);
}
return estimateFilterRange(inputStatistics, expressionStatistics, expressionSymbol, filterRange);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,37 @@ public void testSparseColumnInPredicateOverlap()
.nullsFraction(0.0));
}

@Test
public void testNotInOnColumnWithUnknownNdvAndRange()
{
// Regression: on a varchar column with unknown NDV and unbounded range,
// `c NOT IN ('a', 'b')` used to collapse to 0 rows. Each per-value equality
// returned a 0.5 heuristic selectivity, the IN sum saturated at the full
// non-null row count, and $not(IN) subtracted to 0.

VarcharType type = createVarcharType(16);
Symbol column = new Symbol(type, "c");
Reference ref = new Reference(type, "c");

SymbolStatsEstimate columnStats = SymbolStatsEstimate.builder()
.setAverageRowSize(NaN)
.setDistinctValuesCount(NaN)
.setLowValue(NEGATIVE_INFINITY)
.setHighValue(POSITIVE_INFINITY)
.setNullsFraction(0)
.build();
PlanNodeStatsEstimate input = PlanNodeStatsEstimate.builder()
.addSymbolStatistics(column, columnStats)
.setOutputRowCount(1000)
.build();

Constant a = new Constant(type, Slices.utf8Slice("a"));
Constant b = new Constant(type, Slices.utf8Slice("b"));

// NOT IN on an unknown column yields an unknown estimate rather than a fabricated row count.
assertExpression(not(new In(ref, ImmutableList.of(a, b))), input).outputRowsCountUnknown();
}

private PlanNodeStatsAssertion assertExpression(Expression expression)
{
return assertExpression(expression, session);
Expand Down