diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index f0dc05c96e8..ba7e9c25940 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -890,10 +890,10 @@ core::TypedExprPtr extractFiltersFromRemainingFilter( } common::Filter* oldFilter = nullptr; try { - common::Subfield subfield; - if (auto filter = exec::ExprToSubfieldFilterParser::getInstance() - ->leafCallToSubfieldFilter( - *call, subfield, evaluator, negated)) { + if (auto subfieldAndFilter = + exec::ExprToSubfieldFilterParser::getInstance() + ->leafCallToSubfieldFilter(*call, evaluator, negated)) { + auto& [subfield, filter] = subfieldAndFilter.value(); if (auto it = filters.find(subfield); it != filters.end()) { oldFilter = it->second.get(); filter = filter->mergeWith(oldFilter); diff --git a/velox/dwio/common/MetadataFilter.cpp b/velox/dwio/common/MetadataFilter.cpp index 59181c557fe..88e3a5481e2 100644 --- a/velox/dwio/common/MetadataFilter.cpp +++ b/velox/dwio/common/MetadataFilter.cpp @@ -208,13 +208,14 @@ std::unique_ptr MetadataFilter::Node::fromExpression( return fromExpression(*call->inputs()[0], evaluator, !negated); } try { - Subfield subfield; - auto filter = + auto subfieldAndFilter = exec::ExprToSubfieldFilterParser::getInstance() - ->leafCallToSubfieldFilter(*call, subfield, evaluator, negated); - if (!filter) { + ->leafCallToSubfieldFilter(*call, evaluator, negated); + if (!subfieldAndFilter.has_value()) { return nullptr; } + + auto& [subfield, filter] = subfieldAndFilter.value(); VELOX_CHECK( subfield.valid(), "Invalid subfield from expression: {}", diff --git a/velox/exec/tests/utils/PlanBuilder.cpp b/velox/exec/tests/utils/PlanBuilder.cpp index 8b63ab8caa2..4e9f261e687 100644 --- a/velox/exec/tests/utils/PlanBuilder.cpp +++ b/velox/exec/tests/utils/PlanBuilder.cpp @@ -191,6 +191,58 @@ PlanBuilder& PlanBuilder::tpcdsTableScan( .endTableScan(); } +namespace { + +// Analyzes 'expr' to determine if it can be expressed as a subfield filter. +// Returns a pair of subfield and filter if so. Otherwise, throws. +// +// Supports all expressions supported by +// exec::ExprToSubfieldFilterParser::leafCallToSubfieldFilter + negations and +// disjunctions over same subfield. +// +// Examples: +// a = 1 +// a = 1 OR a > 10 +// not (a = 1) +std::pair> toSubfieldFilter( + const core::TypedExprPtr& expr, + core::ExpressionEvaluator* evaluator) { + if (expr->isCallKind(); + auto* call = expr->asUnchecked()) { + if (call->name() == "or") { + VELOX_CHECK_EQ(call->inputs().size(), 2); + auto left = toSubfieldFilter(call->inputs()[0], evaluator); + auto right = toSubfieldFilter(call->inputs()[1], evaluator); + VELOX_CHECK(left.first == right.first); + return { + std::move(left.first), + exec::ExprToSubfieldFilterParser::makeOrFilter( + std::move(left.second), std::move(right.second))}; + } + + if (call->name() == "not") { + const auto& input = call->inputs()[0]; + if (input->isCallKind(); + auto* inner = input->asUnchecked()) { + if (auto result = + exec::ExprToSubfieldFilterParser::getInstance() + ->leafCallToSubfieldFilter(*inner, evaluator, true)) { + return std::move(result.value()); + } + } + } else { + if (auto result = + exec::ExprToSubfieldFilterParser::getInstance() + ->leafCallToSubfieldFilter(*call, evaluator, false)) { + return std::move(result.value()); + } + } + } + VELOX_UNSUPPORTED( + "Unsupported expression for range filter: {}", expr->toString()); +} +} // namespace + PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters( std::vector subfieldFilters) { VELOX_CHECK(subfieldFiltersMap_.empty()); @@ -210,9 +262,7 @@ PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters( // Parse directly to subfieldFiltersMap_ auto filterExpr = core::Expressions::inferTypes( untypedExpr, parseType, planBuilder_.pool_); - auto [subfield, subfieldFilter] = - exec::ExprToSubfieldFilterParser::getInstance()->toSubfieldFilter( - filterExpr, &evaluator); + auto [subfield, subfieldFilter] = toSubfieldFilter(filterExpr, &evaluator); auto it = columnAliases_.find(subfield.toString()); if (it != columnAliases_.end()) { diff --git a/velox/expression/ExprToSubfieldFilter.cpp b/velox/expression/ExprToSubfieldFilter.cpp index 9d3d51eac93..8e0927241fe 100644 --- a/velox/expression/ExprToSubfieldFilter.cpp +++ b/velox/expression/ExprToSubfieldFilter.cpp @@ -369,6 +369,7 @@ std::unique_ptr ExprToSubfieldFilterParser::makeInFilter( case TypeKind::VARCHAR: { auto stringElements = elements->as>(); std::vector values; + values.reserve(size); for (auto i = 0; i < size; i++) { values.push_back(std::string(stringElements->valueAt(offset + i))); } @@ -471,102 +472,90 @@ std::unique_ptr ExprToSubfieldFilterParser::makeOrFilter( return orFilter(std::move(a), std::move(b)); } -std::unique_ptr +namespace { +std::optional>> +combine(common::Subfield& subfield, std::unique_ptr& filter) { + if (filter != nullptr) { + return std::make_pair(std::move(subfield), std::move(filter)); + } + + return std::nullopt; +} +} // namespace + +std::optional>> PrestoExprToSubfieldFilterParser::leafCallToSubfieldFilter( const core::CallTypedExpr& call, - common::Subfield& subfield, core::ExpressionEvaluator* evaluator, bool negated) { if (call.inputs().empty()) { - return nullptr; + return std::nullopt; } const auto* leftSide = call.inputs()[0].get(); + common::Subfield subfield; if (call.name() == "eq") { if (toSubfield(leftSide, subfield)) { - return negated ? makeNotEqualFilter(call.inputs()[1], evaluator) - : makeEqualFilter(call.inputs()[1], evaluator); + auto filter = negated ? makeNotEqualFilter(call.inputs()[1], evaluator) + : makeEqualFilter(call.inputs()[1], evaluator); + + return combine(subfield, filter); } } else if (call.name() == "neq") { if (toSubfield(leftSide, subfield)) { - return negated ? makeEqualFilter(call.inputs()[1], evaluator) - : makeNotEqualFilter(call.inputs()[1], evaluator); + auto filter = negated ? makeEqualFilter(call.inputs()[1], evaluator) + : makeNotEqualFilter(call.inputs()[1], evaluator); + return combine(subfield, filter); } } else if (call.name() == "lte") { if (toSubfield(leftSide, subfield)) { - return negated ? makeGreaterThanFilter(call.inputs()[1], evaluator) - : makeLessThanOrEqualFilter(call.inputs()[1], evaluator); + auto filter = negated + ? makeGreaterThanFilter(call.inputs()[1], evaluator) + : makeLessThanOrEqualFilter(call.inputs()[1], evaluator); + return combine(subfield, filter); } } else if (call.name() == "lt") { if (toSubfield(leftSide, subfield)) { - return negated ? makeGreaterThanOrEqualFilter(call.inputs()[1], evaluator) - : makeLessThanFilter(call.inputs()[1], evaluator); + auto filter = negated + ? makeGreaterThanOrEqualFilter(call.inputs()[1], evaluator) + : makeLessThanFilter(call.inputs()[1], evaluator); + return combine(subfield, filter); } } else if (call.name() == "gte") { if (toSubfield(leftSide, subfield)) { - return negated + auto filter = negated ? makeLessThanFilter(call.inputs()[1], evaluator) : makeGreaterThanOrEqualFilter(call.inputs()[1], evaluator); + return combine(subfield, filter); } } else if (call.name() == "gt") { if (toSubfield(leftSide, subfield)) { - return negated ? makeLessThanOrEqualFilter(call.inputs()[1], evaluator) - : makeGreaterThanFilter(call.inputs()[1], evaluator); + auto filter = negated + ? makeLessThanOrEqualFilter(call.inputs()[1], evaluator) + : makeGreaterThanFilter(call.inputs()[1], evaluator); + return combine(subfield, filter); } } else if (call.name() == "between") { if (toSubfield(leftSide, subfield)) { - return makeBetweenFilter( + auto filter = makeBetweenFilter( call.inputs()[1], call.inputs()[2], evaluator, negated); + return combine(subfield, filter); } } else if (call.name() == "in") { if (toSubfield(leftSide, subfield)) { - return makeInFilter(call.inputs()[1], evaluator, negated); + auto filter = makeInFilter(call.inputs()[1], evaluator, negated); + return combine(subfield, filter); } } else if (call.name() == "is_null") { if (toSubfield(leftSide, subfield)) { if (negated) { - return isNotNull(); + return std::make_pair(std::move(subfield), isNotNull()); } - return isNull(); - } - } - return nullptr; -} - -std::pair> -PrestoExprToSubfieldFilterParser::toSubfieldFilter( - const core::TypedExprPtr& expr, - core::ExpressionEvaluator* evaluator) { - if (expr->isCallKind(); - auto* call = expr->asUnchecked()) { - if (call->name() == "or") { - VELOX_CHECK_EQ(call->inputs().size(), 2); - auto left = toSubfieldFilter(call->inputs()[0], evaluator); - auto right = toSubfieldFilter(call->inputs()[1], evaluator); - VELOX_CHECK(left.first == right.first); - return { - std::move(left.first), - makeOrFilter(std::move(left.second), std::move(right.second))}; - } - - common::Subfield subfield; - std::unique_ptr filter; - if (call->name() == "not") { - const auto& input = call->inputs()[0]; - if (input->isCallKind(); - auto* inner = input->asUnchecked()) { - filter = leafCallToSubfieldFilter(*inner, subfield, evaluator, true); - } - } else { - filter = leafCallToSubfieldFilter(*call, subfield, evaluator, false); - } - if (filter) { - return std::make_pair(std::move(subfield), std::move(filter)); + return std::make_pair(std::move(subfield), isNull()); } } - VELOX_UNSUPPORTED( - "Unsupported expression for range filter: {}", expr->toString()); + return std::nullopt; } } // namespace facebook::velox::exec diff --git a/velox/expression/ExprToSubfieldFilter.h b/velox/expression/ExprToSubfieldFilter.h index 9257a29aa5f..dfd4955c23d 100644 --- a/velox/expression/ExprToSubfieldFilter.h +++ b/velox/expression/ExprToSubfieldFilter.h @@ -424,41 +424,25 @@ class ExprToSubfieldFilterParser { parser_ = std::move(parser); } - /// Test-only API. Do not use in production code. - /// - /// Analyzes 'expr' to determine if it can be expressed as a subfield filter. - /// Returns a pair of subfield and filter if so. Otherwise, throws. - /// - /// Supports all expressions supported by leafCallToSubfieldFilter + negations - /// and disjunctions over same subfield. - /// Examples: - /// a = 1 - /// a = 1 OR a > 10 - /// not (a = 1) - /// - /// TODO Improve the API by returning std::optional instead of throwing. - virtual std::pair> - toSubfieldFilter( - const core::TypedExprPtr& expr, - core::ExpressionEvaluator*) = 0; - /// Analyzes 'call' expression to determine if it can be expressed as a - /// subfield filter. Returns the filter and sets 'subfield' output argument if - /// so. Otherwise, returns nullptr. If 'negated' is true, considers the - /// negation of 'call' expressions (not(call)). It is possible that 'call' - /// expression can be represented as subfield filter, but its negation cannot. - /// - /// TODO Make this and toSubfieldFilter APIs consistent. Both should not throw - /// and return std::optional pair of filter and subfield. - virtual std::unique_ptr leafCallToSubfieldFilter( + /// subfield filter. Returns the subfield and filter if so. Otherwise, returns + /// std::nullopt. If 'negated' is true, considers the negation of 'call' + /// expressions (not(call)). It is possible that 'call' expression can be + /// represented as subfield filter, but its negation cannot. + virtual std::optional< + std::pair>> + leafCallToSubfieldFilter( const core::CallTypedExpr& call, - common::Subfield& subfield, core::ExpressionEvaluator* evaluator, bool negated = false) = 0; + static std::unique_ptr makeOrFilter( + std::unique_ptr a, + std::unique_ptr b); + protected: - // Converts an expression into a subfield. Returns false if the expression is - // not a valid field expression. + // Converts an expression into a subfield. Returns false if the expression + // is not a valid field expression. static bool toSubfield( const core::ITypedExpr* field, common::Subfield& subfield); @@ -507,10 +491,6 @@ class ExprToSubfieldFilterParser { core::ExpressionEvaluator* evaluator, bool negated); - static std::unique_ptr makeOrFilter( - std::unique_ptr a, - std::unique_ptr b); - private: // Singleton parser instance. static std::shared_ptr parser_; @@ -519,13 +499,9 @@ class ExprToSubfieldFilterParser { // Parser for Presto expressions. class PrestoExprToSubfieldFilterParser : public ExprToSubfieldFilterParser { public: - std::pair> toSubfieldFilter( - const core::TypedExprPtr& expr, - core::ExpressionEvaluator* evaluator) override; - - std::unique_ptr leafCallToSubfieldFilter( + std::optional>> + leafCallToSubfieldFilter( const core::CallTypedExpr& call, - common::Subfield& subfield, core::ExpressionEvaluator* evaluator, bool negated = false) override; }; diff --git a/velox/expression/tests/ExprToSubfieldFilterTest.cpp b/velox/expression/tests/ExprToSubfieldFilterTest.cpp index 91c1ee74fe8..c65fe7d4a1d 100644 --- a/velox/expression/tests/ExprToSubfieldFilterTest.cpp +++ b/velox/expression/tests/ExprToSubfieldFilterTest.cpp @@ -68,17 +68,13 @@ class ExprToSubfieldFilterTest : public testing::Test { std::pair> leafCallToSubfieldFilter(const core::CallTypedExprPtr& call) { - Subfield subfield; - auto filter = - ExprToSubfieldFilterParser::getInstance()->leafCallToSubfieldFilter( - *call, subfield, evaluator()); - return {std::move(subfield), std::move(filter)}; - } + if (auto result = + ExprToSubfieldFilterParser::getInstance()->leafCallToSubfieldFilter( + *call, evaluator())) { + return std::move(result.value()); + } - std::pair> toSubfieldFilter( - const core::TypedExprPtr& expr) { - return ExprToSubfieldFilterParser::getInstance()->toSubfieldFilter( - expr, evaluator()); + return std::make_pair(common::Subfield(), nullptr); } private: @@ -215,55 +211,6 @@ TEST_F(ExprToSubfieldFilterTest, isNull) { ASSERT_TRUE(filter->testNull()); } -TEST_F(ExprToSubfieldFilterTest, isNotNull) { - auto call = parseCallExpr("a is not null", ROW({{"a", BIGINT()}})); - auto [subfield, filter] = toSubfieldFilter(call); - - ASSERT_TRUE(filter); - validateSubfield(subfield, {"a"}); - ASSERT_TRUE(filter->testInt64(0)); - ASSERT_TRUE(filter->testInt64(42)); - ASSERT_FALSE(filter->testNull()); -} - -TEST_F(ExprToSubfieldFilterTest, or) { - auto call = parseExpr("a = 42 OR a = 43", ROW({{"a", BIGINT()}})); - auto [subfield, filter] = toSubfieldFilter(call); - - ASSERT_TRUE(filter); - validateSubfield(subfield, {"a"}); - ASSERT_TRUE(filter->testInt64(42)); - ASSERT_TRUE(filter->testInt64(43)); - ASSERT_FALSE(filter->testInt64(41)); - ASSERT_FALSE(filter->testNull()); -} - -TEST_F(ExprToSubfieldFilterTest, unsupported) { - const auto type = ROW({"a", "b", "c"}, BIGINT()); - - VELOX_ASSERT_THROW( - toSubfieldFilter(parseExpr("123", type)), - "Unsupported expression for range filter"); - - VELOX_ASSERT_THROW( - toSubfieldFilter(parseCallExpr("a + b", type)), - "Unsupported expression for range filter"); - - VELOX_ASSERT_THROW( - toSubfieldFilter(parseCallExpr("(a + b > 0) OR (c = 1)", type)), - "Unsupported expression for range filter"); - - // TODO Improve error message for this specific use case. Then use - // VELOX_ASSERT_THROW. - EXPECT_THROW( - toSubfieldFilter(parseCallExpr("a = 1 OR c = 2", type)), - VeloxRuntimeError); - - VELOX_ASSERT_THROW( - toSubfieldFilter(parseCallExpr("a = 1 AND b = 2", type)), - "Unsupported expression for range filter"); -} - TEST_F(ExprToSubfieldFilterTest, like) { auto call = parseCallExpr("a like 'foo%'", ROW({{"a", VARCHAR()}})); auto [subfield, filter] = leafCallToSubfieldFilter(call); @@ -304,111 +251,5 @@ TEST_F(ExprToSubfieldFilterTest, dereferenceWithEmptyField) { ASSERT_FALSE(filter); } -class CustomExprToSubfieldFilterParser : public ExprToSubfieldFilterParser { - public: - std::pair> toSubfieldFilter( - const core::TypedExprPtr& expr, - core::ExpressionEvaluator* evaluator) override { - if (expr->isCallKind(); - auto* call = expr->asUnchecked()) { - if (call->name() == "or") { - auto left = toSubfieldFilter(call->inputs()[0], evaluator); - auto right = toSubfieldFilter(call->inputs()[1], evaluator); - VELOX_CHECK(left.first == right.first); - return { - std::move(left.first), - makeOrFilter(std::move(left.second), std::move(right.second))}; - } - common::Subfield subfield; - std::unique_ptr filter; - if (call->name() == "not") { - if (auto* inner = - call->inputs()[0]->asUnchecked()) { - filter = leafCallToSubfieldFilter(*inner, subfield, evaluator, true); - } - } else { - filter = leafCallToSubfieldFilter(*call, subfield, evaluator, false); - } - if (filter) { - return std::make_pair(std::move(subfield), std::move(filter)); - } - } - VELOX_UNSUPPORTED( - "Unsupported expression for range filter: {}", expr->toString()); - } - - std::unique_ptr leafCallToSubfieldFilter( - const core::CallTypedExpr& call, - common::Subfield& subfield, - core::ExpressionEvaluator* evaluator, - bool negated) override { - if (call.inputs().empty()) { - return nullptr; - } - - const auto* leftSide = call.inputs()[0].get(); - - if (call.name() == "custom_eq") { - if (toSubfield(leftSide, subfield)) { - return negated ? makeNotEqualFilter(call.inputs()[1], evaluator) - : makeEqualFilter(call.inputs()[1], evaluator); - } - } else if (call.name() == "is_null") { - if (toSubfield(call.inputs()[0].get(), subfield)) { - if (negated) { - return isNotNull(); - } - return isNull(); - } - } - return nullptr; - } -}; - -class CustomExprToSubfieldFilterTest : public ExprToSubfieldFilterTest { - public: - static void SetUpTestSuite() { - functions::prestosql::registerAllScalarFunctions("custom_"); - functions::registerIsNullFunction("is_null"); - parse::registerTypeResolver(); - memory::MemoryManager::testingSetInstance(memory::MemoryManager::Options{}); - ExprToSubfieldFilterParser::registerParser( - std::make_unique()); - } - - static void TearDownTestSuite() { - ExprToSubfieldFilterParser::registerParser( - std::make_unique()); - } -}; - -TEST_F(CustomExprToSubfieldFilterTest, isNull) { - auto call = parseCallExpr("a is null", ROW({{"a", BIGINT()}})); - auto [subfield, filter] = toSubfieldFilter(call); - ASSERT_TRUE(filter); - validateSubfield(subfield, {"a"}); - ASSERT_FALSE(filter->testInt64(0)); - ASSERT_FALSE(filter->testInt64(42)); - ASSERT_TRUE(filter->testNull()); -} - -TEST_F(CustomExprToSubfieldFilterTest, eq) { - auto call = parseCallExpr("custom_eq(a, 42)", ROW({{"a", BIGINT()}})); - auto [subfield, filter] = toSubfieldFilter(call); - ASSERT_TRUE(filter); - validateSubfield(subfield, {"a"}); - auto bigintRange = dynamic_cast(filter.get()); - ASSERT_TRUE(bigintRange); - ASSERT_EQ(bigintRange->lower(), 42); - ASSERT_EQ(bigintRange->upper(), 42); - ASSERT_FALSE(bigintRange->testNull()); -} - -TEST_F(CustomExprToSubfieldFilterTest, unsupported) { - auto call = parseCallExpr("custom_neq(a, 42)", ROW({{"a", BIGINT()}})); - VELOX_ASSERT_USER_THROW( - toSubfieldFilter(call), "Unsupported expression for range filter"); -} - } // namespace } // namespace facebook::velox::exec