Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -890,10 +890,10 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
}
common::Filter* oldFilter = nullptr;
try {
common::Subfield subfield;
if (auto filter = exec::ExprToSubfieldFilterParser::getInstance()
->leafCallToSubfieldFilter(
*call, subfield, evaluator, negated)) {
if (auto subfieldAndFilter =
exec::ExprToSubfieldFilterParser::getInstance()
->leafCallToSubfieldFilter(*call, evaluator, negated)) {
auto& [subfield, filter] = subfieldAndFilter.value();
if (auto it = filters.find(subfield); it != filters.end()) {
oldFilter = it->second.get();
filter = filter->mergeWith(oldFilter);
Expand Down
9 changes: 5 additions & 4 deletions velox/dwio/common/MetadataFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,14 @@ std::unique_ptr<MetadataFilter::Node> MetadataFilter::Node::fromExpression(
return fromExpression(*call->inputs()[0], evaluator, !negated);
}
try {
Subfield subfield;
auto filter =
auto subfieldAndFilter =
exec::ExprToSubfieldFilterParser::getInstance()
->leafCallToSubfieldFilter(*call, subfield, evaluator, negated);
if (!filter) {
->leafCallToSubfieldFilter(*call, evaluator, negated);
if (!subfieldAndFilter.has_value()) {
return nullptr;
}

auto& [subfield, filter] = subfieldAndFilter.value();
VELOX_CHECK(
subfield.valid(),
"Invalid subfield from expression: {}",
Expand Down
56 changes: 53 additions & 3 deletions velox/exec/tests/utils/PlanBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,58 @@ PlanBuilder& PlanBuilder::tpcdsTableScan(
.endTableScan();
}

namespace {

// Analyzes 'expr' to determine if it can be expressed as a subfield filter.
// Returns a pair of subfield and filter if so. Otherwise, throws.
//
// Supports all expressions supported by
// exec::ExprToSubfieldFilterParser::leafCallToSubfieldFilter + negations and
// disjunctions over same subfield.
//
// Examples:
// a = 1
// a = 1 OR a > 10
// not (a = 1)
std::pair<common::Subfield, std::unique_ptr<common::Filter>> toSubfieldFilter(
const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator) {
if (expr->isCallKind();
auto* call = expr->asUnchecked<core::CallTypedExpr>()) {
if (call->name() == "or") {
VELOX_CHECK_EQ(call->inputs().size(), 2);
auto left = toSubfieldFilter(call->inputs()[0], evaluator);
auto right = toSubfieldFilter(call->inputs()[1], evaluator);
VELOX_CHECK(left.first == right.first);
return {
std::move(left.first),
exec::ExprToSubfieldFilterParser::makeOrFilter(
std::move(left.second), std::move(right.second))};
}

if (call->name() == "not") {
const auto& input = call->inputs()[0];
if (input->isCallKind();
auto* inner = input->asUnchecked<core::CallTypedExpr>()) {
if (auto result =
exec::ExprToSubfieldFilterParser::getInstance()
->leafCallToSubfieldFilter(*inner, evaluator, true)) {
return std::move(result.value());
}
}
} else {
if (auto result =
exec::ExprToSubfieldFilterParser::getInstance()
->leafCallToSubfieldFilter(*call, evaluator, false)) {
return std::move(result.value());
}
}
}
VELOX_UNSUPPORTED(
"Unsupported expression for range filter: {}", expr->toString());
}
} // namespace

PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters(
std::vector<std::string> subfieldFilters) {
VELOX_CHECK(subfieldFiltersMap_.empty());
Expand All @@ -210,9 +262,7 @@ PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters(
// Parse directly to subfieldFiltersMap_
auto filterExpr = core::Expressions::inferTypes(
untypedExpr, parseType, planBuilder_.pool_);
auto [subfield, subfieldFilter] =
exec::ExprToSubfieldFilterParser::getInstance()->toSubfieldFilter(
filterExpr, &evaluator);
auto [subfield, subfieldFilter] = toSubfieldFilter(filterExpr, &evaluator);

auto it = columnAliases_.find(subfield.toString());
if (it != columnAliases_.end()) {
Expand Down
97 changes: 43 additions & 54 deletions velox/expression/ExprToSubfieldFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ std::unique_ptr<common::Filter> ExprToSubfieldFilterParser::makeInFilter(
case TypeKind::VARCHAR: {
auto stringElements = elements->as<SimpleVector<StringView>>();
std::vector<std::string> values;
values.reserve(size);
for (auto i = 0; i < size; i++) {
values.push_back(std::string(stringElements->valueAt(offset + i)));
}
Expand Down Expand Up @@ -471,102 +472,90 @@ std::unique_ptr<common::Filter> ExprToSubfieldFilterParser::makeOrFilter(
return orFilter(std::move(a), std::move(b));
}

std::unique_ptr<common::Filter>
namespace {
std::optional<std::pair<common::Subfield, std::unique_ptr<common::Filter>>>
combine(common::Subfield& subfield, std::unique_ptr<common::Filter>& filter) {
if (filter != nullptr) {
return std::make_pair(std::move(subfield), std::move(filter));
}

return std::nullopt;
}
} // namespace

std::optional<std::pair<common::Subfield, std::unique_ptr<common::Filter>>>
PrestoExprToSubfieldFilterParser::leafCallToSubfieldFilter(
const core::CallTypedExpr& call,
common::Subfield& subfield,
core::ExpressionEvaluator* evaluator,
bool negated) {
if (call.inputs().empty()) {
return nullptr;
return std::nullopt;
}

const auto* leftSide = call.inputs()[0].get();

common::Subfield subfield;
if (call.name() == "eq") {
if (toSubfield(leftSide, subfield)) {
return negated ? makeNotEqualFilter(call.inputs()[1], evaluator)
: makeEqualFilter(call.inputs()[1], evaluator);
auto filter = negated ? makeNotEqualFilter(call.inputs()[1], evaluator)
: makeEqualFilter(call.inputs()[1], evaluator);

return combine(subfield, filter);
}
} else if (call.name() == "neq") {
if (toSubfield(leftSide, subfield)) {
return negated ? makeEqualFilter(call.inputs()[1], evaluator)
: makeNotEqualFilter(call.inputs()[1], evaluator);
auto filter = negated ? makeEqualFilter(call.inputs()[1], evaluator)
: makeNotEqualFilter(call.inputs()[1], evaluator);
return combine(subfield, filter);
}
} else if (call.name() == "lte") {
if (toSubfield(leftSide, subfield)) {
return negated ? makeGreaterThanFilter(call.inputs()[1], evaluator)
: makeLessThanOrEqualFilter(call.inputs()[1], evaluator);
auto filter = negated
? makeGreaterThanFilter(call.inputs()[1], evaluator)
: makeLessThanOrEqualFilter(call.inputs()[1], evaluator);
return combine(subfield, filter);
}
} else if (call.name() == "lt") {
if (toSubfield(leftSide, subfield)) {
return negated ? makeGreaterThanOrEqualFilter(call.inputs()[1], evaluator)
: makeLessThanFilter(call.inputs()[1], evaluator);
auto filter = negated
? makeGreaterThanOrEqualFilter(call.inputs()[1], evaluator)
: makeLessThanFilter(call.inputs()[1], evaluator);
return combine(subfield, filter);
}
} else if (call.name() == "gte") {
if (toSubfield(leftSide, subfield)) {
return negated
auto filter = negated
? makeLessThanFilter(call.inputs()[1], evaluator)
: makeGreaterThanOrEqualFilter(call.inputs()[1], evaluator);
return combine(subfield, filter);
}
} else if (call.name() == "gt") {
if (toSubfield(leftSide, subfield)) {
return negated ? makeLessThanOrEqualFilter(call.inputs()[1], evaluator)
: makeGreaterThanFilter(call.inputs()[1], evaluator);
auto filter = negated
? makeLessThanOrEqualFilter(call.inputs()[1], evaluator)
: makeGreaterThanFilter(call.inputs()[1], evaluator);
return combine(subfield, filter);
}
} else if (call.name() == "between") {
if (toSubfield(leftSide, subfield)) {
return makeBetweenFilter(
auto filter = makeBetweenFilter(
call.inputs()[1], call.inputs()[2], evaluator, negated);
return combine(subfield, filter);
}
} else if (call.name() == "in") {
if (toSubfield(leftSide, subfield)) {
return makeInFilter(call.inputs()[1], evaluator, negated);
auto filter = makeInFilter(call.inputs()[1], evaluator, negated);
return combine(subfield, filter);
}
} else if (call.name() == "is_null") {
if (toSubfield(leftSide, subfield)) {
if (negated) {
return isNotNull();
return std::make_pair(std::move(subfield), isNotNull());
}
return isNull();
}
}
return nullptr;
}

std::pair<common::Subfield, std::unique_ptr<common::Filter>>
PrestoExprToSubfieldFilterParser::toSubfieldFilter(
const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator) {
if (expr->isCallKind();
auto* call = expr->asUnchecked<core::CallTypedExpr>()) {
if (call->name() == "or") {
VELOX_CHECK_EQ(call->inputs().size(), 2);
auto left = toSubfieldFilter(call->inputs()[0], evaluator);
auto right = toSubfieldFilter(call->inputs()[1], evaluator);
VELOX_CHECK(left.first == right.first);
return {
std::move(left.first),
makeOrFilter(std::move(left.second), std::move(right.second))};
}

common::Subfield subfield;
std::unique_ptr<common::Filter> filter;
if (call->name() == "not") {
const auto& input = call->inputs()[0];
if (input->isCallKind();
auto* inner = input->asUnchecked<core::CallTypedExpr>()) {
filter = leafCallToSubfieldFilter(*inner, subfield, evaluator, true);
}
} else {
filter = leafCallToSubfieldFilter(*call, subfield, evaluator, false);
}
if (filter) {
return std::make_pair(std::move(subfield), std::move(filter));
return std::make_pair(std::move(subfield), isNull());
}
}
VELOX_UNSUPPORTED(
"Unsupported expression for range filter: {}", expr->toString());
return std::nullopt;
}

} // namespace facebook::velox::exec
54 changes: 15 additions & 39 deletions velox/expression/ExprToSubfieldFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,41 +424,25 @@ class ExprToSubfieldFilterParser {
parser_ = std::move(parser);
}

/// Test-only API. Do not use in production code.
///
/// Analyzes 'expr' to determine if it can be expressed as a subfield filter.
/// Returns a pair of subfield and filter if so. Otherwise, throws.
///
/// Supports all expressions supported by leafCallToSubfieldFilter + negations
/// and disjunctions over same subfield.
/// Examples:
/// a = 1
/// a = 1 OR a > 10
/// not (a = 1)
///
/// TODO Improve the API by returning std::optional instead of throwing.
virtual std::pair<common::Subfield, std::unique_ptr<common::Filter>>
toSubfieldFilter(
const core::TypedExprPtr& expr,
core::ExpressionEvaluator*) = 0;

/// Analyzes 'call' expression to determine if it can be expressed as a
/// subfield filter. Returns the filter and sets 'subfield' output argument if
/// so. Otherwise, returns nullptr. If 'negated' is true, considers the
/// negation of 'call' expressions (not(call)). It is possible that 'call'
/// expression can be represented as subfield filter, but its negation cannot.
///
/// TODO Make this and toSubfieldFilter APIs consistent. Both should not throw
/// and return std::optional pair of filter and subfield.
virtual std::unique_ptr<common::Filter> leafCallToSubfieldFilter(
/// subfield filter. Returns the subfield and filter if so. Otherwise, returns
/// std::nullopt. If 'negated' is true, considers the negation of 'call'
/// expressions (not(call)). It is possible that 'call' expression can be
/// represented as subfield filter, but its negation cannot.
virtual std::optional<
std::pair<common::Subfield, std::unique_ptr<common::Filter>>>
leafCallToSubfieldFilter(
const core::CallTypedExpr& call,
common::Subfield& subfield,
core::ExpressionEvaluator* evaluator,
bool negated = false) = 0;

static std::unique_ptr<common::Filter> makeOrFilter(
std::unique_ptr<common::Filter> a,
std::unique_ptr<common::Filter> b);

protected:
// Converts an expression into a subfield. Returns false if the expression is
// not a valid field expression.
// Converts an expression into a subfield. Returns false if the expression
// is not a valid field expression.
static bool toSubfield(
const core::ITypedExpr* field,
common::Subfield& subfield);
Expand Down Expand Up @@ -507,10 +491,6 @@ class ExprToSubfieldFilterParser {
core::ExpressionEvaluator* evaluator,
bool negated);

static std::unique_ptr<common::Filter> makeOrFilter(
std::unique_ptr<common::Filter> a,
std::unique_ptr<common::Filter> b);

private:
// Singleton parser instance.
static std::shared_ptr<ExprToSubfieldFilterParser> parser_;
Expand All @@ -519,13 +499,9 @@ class ExprToSubfieldFilterParser {
// Parser for Presto expressions.
class PrestoExprToSubfieldFilterParser : public ExprToSubfieldFilterParser {
public:
std::pair<common::Subfield, std::unique_ptr<common::Filter>> toSubfieldFilter(
const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator) override;

std::unique_ptr<common::Filter> leafCallToSubfieldFilter(
std::optional<std::pair<common::Subfield, std::unique_ptr<common::Filter>>>
leafCallToSubfieldFilter(
const core::CallTypedExpr& call,
common::Subfield& subfield,
core::ExpressionEvaluator* evaluator,
bool negated = false) override;
};
Expand Down
Loading
Loading