Skip to content

Commit 7eab76d

Browse files
mbasmanovameta-codesync[bot]
authored andcommitted
refactor: Move toSubfieldFilter API to PlanBuilder (facebookincubator#15556)
Summary: Pull Request resolved: facebookincubator#15556 Reviewed By: Yuhta Differential Revision: D87386102 fbshipit-source-id: 739b19ef834ec76e7457e56974be1872cc18c4ff
1 parent ecddafb commit 7eab76d

File tree

4 files changed

+58
-228
lines changed

4 files changed

+58
-228
lines changed

velox/exec/tests/utils/PlanBuilder.cpp

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,58 @@ PlanBuilder& PlanBuilder::tpcdsTableScan(
191191
.endTableScan();
192192
}
193193

194+
namespace {
195+
196+
// Analyzes 'expr' to determine if it can be expressed as a subfield filter.
197+
// Returns a pair of subfield and filter if so. Otherwise, throws.
198+
//
199+
// Supports all expressions supported by
200+
// exec::ExprToSubfieldFilterParser::leafCallToSubfieldFilter + negations and
201+
// disjunctions over same subfield.
202+
//
203+
// Examples:
204+
// a = 1
205+
// a = 1 OR a > 10
206+
// not (a = 1)
207+
std::pair<common::Subfield, std::unique_ptr<common::Filter>> toSubfieldFilter(
208+
const core::TypedExprPtr& expr,
209+
core::ExpressionEvaluator* evaluator) {
210+
if (expr->isCallKind();
211+
auto* call = expr->asUnchecked<core::CallTypedExpr>()) {
212+
if (call->name() == "or") {
213+
VELOX_CHECK_EQ(call->inputs().size(), 2);
214+
auto left = toSubfieldFilter(call->inputs()[0], evaluator);
215+
auto right = toSubfieldFilter(call->inputs()[1], evaluator);
216+
VELOX_CHECK(left.first == right.first);
217+
return {
218+
std::move(left.first),
219+
exec::ExprToSubfieldFilterParser::makeOrFilter(
220+
std::move(left.second), std::move(right.second))};
221+
}
222+
223+
if (call->name() == "not") {
224+
const auto& input = call->inputs()[0];
225+
if (input->isCallKind();
226+
auto* inner = input->asUnchecked<core::CallTypedExpr>()) {
227+
if (auto result =
228+
exec::ExprToSubfieldFilterParser::getInstance()
229+
->leafCallToSubfieldFilter(*inner, evaluator, true)) {
230+
return std::move(result.value());
231+
}
232+
}
233+
} else {
234+
if (auto result =
235+
exec::ExprToSubfieldFilterParser::getInstance()
236+
->leafCallToSubfieldFilter(*call, evaluator, false)) {
237+
return std::move(result.value());
238+
}
239+
}
240+
}
241+
VELOX_UNSUPPORTED(
242+
"Unsupported expression for range filter: {}", expr->toString());
243+
}
244+
} // namespace
245+
194246
PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters(
195247
std::vector<std::string> subfieldFilters) {
196248
VELOX_CHECK(subfieldFiltersMap_.empty());
@@ -210,9 +262,7 @@ PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters(
210262
// Parse directly to subfieldFiltersMap_
211263
auto filterExpr = core::Expressions::inferTypes(
212264
untypedExpr, parseType, planBuilder_.pool_);
213-
auto [subfield, subfieldFilter] =
214-
exec::ExprToSubfieldFilterParser::getInstance()->toSubfieldFilter(
215-
filterExpr, &evaluator);
265+
auto [subfield, subfieldFilter] = toSubfieldFilter(filterExpr, &evaluator);
216266

217267
auto it = columnAliases_.find(subfield.toString());
218268
if (it != columnAliases_.end()) {

velox/expression/ExprToSubfieldFilter.cpp

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ std::unique_ptr<common::Filter> ExprToSubfieldFilterParser::makeInFilter(
369369
case TypeKind::VARCHAR: {
370370
auto stringElements = elements->as<SimpleVector<StringView>>();
371371
std::vector<std::string> values;
372+
values.reserve(size);
372373
for (auto i = 0; i < size; i++) {
373374
values.push_back(std::string(stringElements->valueAt(offset + i)));
374375
}
@@ -557,38 +558,4 @@ PrestoExprToSubfieldFilterParser::leafCallToSubfieldFilter(
557558
return std::nullopt;
558559
}
559560

560-
std::pair<common::Subfield, std::unique_ptr<common::Filter>>
561-
PrestoExprToSubfieldFilterParser::toSubfieldFilter(
562-
const core::TypedExprPtr& expr,
563-
core::ExpressionEvaluator* evaluator) {
564-
if (expr->isCallKind();
565-
auto* call = expr->asUnchecked<core::CallTypedExpr>()) {
566-
if (call->name() == "or") {
567-
VELOX_CHECK_EQ(call->inputs().size(), 2);
568-
auto left = toSubfieldFilter(call->inputs()[0], evaluator);
569-
auto right = toSubfieldFilter(call->inputs()[1], evaluator);
570-
VELOX_CHECK(left.first == right.first);
571-
return {
572-
std::move(left.first),
573-
makeOrFilter(std::move(left.second), std::move(right.second))};
574-
}
575-
576-
if (call->name() == "not") {
577-
const auto& input = call->inputs()[0];
578-
if (input->isCallKind();
579-
auto* inner = input->asUnchecked<core::CallTypedExpr>()) {
580-
if (auto result = leafCallToSubfieldFilter(*inner, evaluator, true)) {
581-
return std::move(result.value());
582-
}
583-
}
584-
} else {
585-
if (auto result = leafCallToSubfieldFilter(*call, evaluator, false)) {
586-
return std::move(result.value());
587-
}
588-
}
589-
}
590-
VELOX_UNSUPPORTED(
591-
"Unsupported expression for range filter: {}", expr->toString());
592-
}
593-
594561
} // namespace facebook::velox::exec

velox/expression/ExprToSubfieldFilter.h

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -424,24 +424,6 @@ class ExprToSubfieldFilterParser {
424424
parser_ = std::move(parser);
425425
}
426426

427-
/// Test-only API. Do not use in production code.
428-
///
429-
/// Analyzes 'expr' to determine if it can be expressed as a subfield filter.
430-
/// Returns a pair of subfield and filter if so. Otherwise, throws.
431-
///
432-
/// Supports all expressions supported by leafCallToSubfieldFilter + negations
433-
/// and disjunctions over same subfield.
434-
/// Examples:
435-
/// a = 1
436-
/// a = 1 OR a > 10
437-
/// not (a = 1)
438-
///
439-
/// TODO Improve the API by returning std::optional instead of throwing.
440-
virtual std::pair<common::Subfield, std::unique_ptr<common::Filter>>
441-
toSubfieldFilter(
442-
const core::TypedExprPtr& expr,
443-
core::ExpressionEvaluator*) = 0;
444-
445427
/// Analyzes 'call' expression to determine if it can be expressed as a
446428
/// subfield filter. Returns the subfield and filter if so. Otherwise, returns
447429
/// std::nullopt. If 'negated' is true, considers the negation of 'call'
@@ -454,6 +436,10 @@ class ExprToSubfieldFilterParser {
454436
core::ExpressionEvaluator* evaluator,
455437
bool negated = false) = 0;
456438

439+
static std::unique_ptr<common::Filter> makeOrFilter(
440+
std::unique_ptr<common::Filter> a,
441+
std::unique_ptr<common::Filter> b);
442+
457443
protected:
458444
// Converts an expression into a subfield. Returns false if the expression
459445
// is not a valid field expression.
@@ -505,10 +491,6 @@ class ExprToSubfieldFilterParser {
505491
core::ExpressionEvaluator* evaluator,
506492
bool negated);
507493

508-
static std::unique_ptr<common::Filter> makeOrFilter(
509-
std::unique_ptr<common::Filter> a,
510-
std::unique_ptr<common::Filter> b);
511-
512494
private:
513495
// Singleton parser instance.
514496
static std::shared_ptr<ExprToSubfieldFilterParser> parser_;
@@ -517,10 +499,6 @@ class ExprToSubfieldFilterParser {
517499
// Parser for Presto expressions.
518500
class PrestoExprToSubfieldFilterParser : public ExprToSubfieldFilterParser {
519501
public:
520-
std::pair<common::Subfield, std::unique_ptr<common::Filter>> toSubfieldFilter(
521-
const core::TypedExprPtr& expr,
522-
core::ExpressionEvaluator* evaluator) override;
523-
524502
std::optional<std::pair<common::Subfield, std::unique_ptr<common::Filter>>>
525503
leafCallToSubfieldFilter(
526504
const core::CallTypedExpr& call,

velox/expression/tests/ExprToSubfieldFilterTest.cpp

Lines changed: 0 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,6 @@ class ExprToSubfieldFilterTest : public testing::Test {
7777
return std::make_pair(common::Subfield(), nullptr);
7878
}
7979

80-
std::pair<common::Subfield, std::unique_ptr<common::Filter>> toSubfieldFilter(
81-
const core::TypedExprPtr& expr) {
82-
return ExprToSubfieldFilterParser::getInstance()->toSubfieldFilter(
83-
expr, evaluator());
84-
}
85-
8680
private:
8781
std::shared_ptr<memory::MemoryPool> pool_ =
8882
memory::memoryManager()->addLeafPool();
@@ -217,55 +211,6 @@ TEST_F(ExprToSubfieldFilterTest, isNull) {
217211
ASSERT_TRUE(filter->testNull());
218212
}
219213

220-
TEST_F(ExprToSubfieldFilterTest, isNotNull) {
221-
auto call = parseCallExpr("a is not null", ROW({{"a", BIGINT()}}));
222-
auto [subfield, filter] = toSubfieldFilter(call);
223-
224-
ASSERT_TRUE(filter);
225-
validateSubfield(subfield, {"a"});
226-
ASSERT_TRUE(filter->testInt64(0));
227-
ASSERT_TRUE(filter->testInt64(42));
228-
ASSERT_FALSE(filter->testNull());
229-
}
230-
231-
TEST_F(ExprToSubfieldFilterTest, or) {
232-
auto call = parseExpr("a = 42 OR a = 43", ROW({{"a", BIGINT()}}));
233-
auto [subfield, filter] = toSubfieldFilter(call);
234-
235-
ASSERT_TRUE(filter);
236-
validateSubfield(subfield, {"a"});
237-
ASSERT_TRUE(filter->testInt64(42));
238-
ASSERT_TRUE(filter->testInt64(43));
239-
ASSERT_FALSE(filter->testInt64(41));
240-
ASSERT_FALSE(filter->testNull());
241-
}
242-
243-
TEST_F(ExprToSubfieldFilterTest, unsupported) {
244-
const auto type = ROW({"a", "b", "c"}, BIGINT());
245-
246-
VELOX_ASSERT_THROW(
247-
toSubfieldFilter(parseExpr("123", type)),
248-
"Unsupported expression for range filter");
249-
250-
VELOX_ASSERT_THROW(
251-
toSubfieldFilter(parseCallExpr("a + b", type)),
252-
"Unsupported expression for range filter");
253-
254-
VELOX_ASSERT_THROW(
255-
toSubfieldFilter(parseCallExpr("(a + b > 0) OR (c = 1)", type)),
256-
"Unsupported expression for range filter");
257-
258-
// TODO Improve error message for this specific use case. Then use
259-
// VELOX_ASSERT_THROW.
260-
EXPECT_THROW(
261-
toSubfieldFilter(parseCallExpr("a = 1 OR c = 2", type)),
262-
VeloxRuntimeError);
263-
264-
VELOX_ASSERT_THROW(
265-
toSubfieldFilter(parseCallExpr("a = 1 AND b = 2", type)),
266-
"Unsupported expression for range filter");
267-
}
268-
269214
TEST_F(ExprToSubfieldFilterTest, like) {
270215
auto call = parseCallExpr("a like 'foo%'", ROW({{"a", VARCHAR()}}));
271216
auto [subfield, filter] = leafCallToSubfieldFilter(call);
@@ -306,115 +251,5 @@ TEST_F(ExprToSubfieldFilterTest, dereferenceWithEmptyField) {
306251
ASSERT_FALSE(filter);
307252
}
308253

309-
class CustomExprToSubfieldFilterParser : public ExprToSubfieldFilterParser {
310-
public:
311-
std::pair<common::Subfield, std::unique_ptr<common::Filter>> toSubfieldFilter(
312-
const core::TypedExprPtr& expr,
313-
core::ExpressionEvaluator* evaluator) override {
314-
if (expr->isCallKind();
315-
auto* call = expr->asUnchecked<core::CallTypedExpr>()) {
316-
if (call->name() == "or") {
317-
auto left = toSubfieldFilter(call->inputs()[0], evaluator);
318-
auto right = toSubfieldFilter(call->inputs()[1], evaluator);
319-
VELOX_CHECK(left.first == right.first);
320-
return {
321-
std::move(left.first),
322-
makeOrFilter(std::move(left.second), std::move(right.second))};
323-
}
324-
if (call->name() == "not") {
325-
if (auto* inner =
326-
call->inputs()[0]->asUnchecked<core::CallTypedExpr>()) {
327-
if (auto result = leafCallToSubfieldFilter(*inner, evaluator, true)) {
328-
return std::move(result.value());
329-
}
330-
}
331-
} else {
332-
if (auto result = leafCallToSubfieldFilter(*call, evaluator, false)) {
333-
return std::move(result.value());
334-
}
335-
}
336-
}
337-
VELOX_UNSUPPORTED(
338-
"Unsupported expression for range filter: {}", expr->toString());
339-
}
340-
341-
std::optional<std::pair<common::Subfield, std::unique_ptr<common::Filter>>>
342-
leafCallToSubfieldFilter(
343-
const core::CallTypedExpr& call,
344-
core::ExpressionEvaluator* evaluator,
345-
bool negated) override {
346-
if (call.inputs().empty()) {
347-
return std::nullopt;
348-
}
349-
350-
const auto* leftSide = call.inputs()[0].get();
351-
352-
common::Subfield subfield;
353-
if (call.name() == "custom_eq") {
354-
if (toSubfield(leftSide, subfield)) {
355-
auto filter = negated ? makeNotEqualFilter(call.inputs()[1], evaluator)
356-
: makeEqualFilter(call.inputs()[1], evaluator);
357-
if (filter != nullptr) {
358-
return std::make_pair(std::move(subfield), std::move(filter));
359-
}
360-
return std::nullopt;
361-
}
362-
} else if (call.name() == "is_null") {
363-
if (toSubfield(call.inputs()[0].get(), subfield)) {
364-
if (negated) {
365-
return std::make_pair(std::move(subfield), isNotNull());
366-
}
367-
return std::make_pair(std::move(subfield), isNull());
368-
}
369-
}
370-
return std::nullopt;
371-
}
372-
};
373-
374-
class CustomExprToSubfieldFilterTest : public ExprToSubfieldFilterTest {
375-
public:
376-
static void SetUpTestSuite() {
377-
functions::prestosql::registerAllScalarFunctions("custom_");
378-
functions::registerIsNullFunction("is_null");
379-
parse::registerTypeResolver();
380-
memory::MemoryManager::testingSetInstance(memory::MemoryManager::Options{});
381-
ExprToSubfieldFilterParser::registerParser(
382-
std::make_unique<CustomExprToSubfieldFilterParser>());
383-
}
384-
385-
static void TearDownTestSuite() {
386-
ExprToSubfieldFilterParser::registerParser(
387-
std::make_unique<PrestoExprToSubfieldFilterParser>());
388-
}
389-
};
390-
391-
TEST_F(CustomExprToSubfieldFilterTest, isNull) {
392-
auto call = parseCallExpr("a is null", ROW({{"a", BIGINT()}}));
393-
auto [subfield, filter] = toSubfieldFilter(call);
394-
ASSERT_TRUE(filter);
395-
validateSubfield(subfield, {"a"});
396-
ASSERT_FALSE(filter->testInt64(0));
397-
ASSERT_FALSE(filter->testInt64(42));
398-
ASSERT_TRUE(filter->testNull());
399-
}
400-
401-
TEST_F(CustomExprToSubfieldFilterTest, eq) {
402-
auto call = parseCallExpr("custom_eq(a, 42)", ROW({{"a", BIGINT()}}));
403-
auto [subfield, filter] = toSubfieldFilter(call);
404-
ASSERT_TRUE(filter);
405-
validateSubfield(subfield, {"a"});
406-
auto bigintRange = dynamic_cast<common::BigintRange*>(filter.get());
407-
ASSERT_TRUE(bigintRange);
408-
ASSERT_EQ(bigintRange->lower(), 42);
409-
ASSERT_EQ(bigintRange->upper(), 42);
410-
ASSERT_FALSE(bigintRange->testNull());
411-
}
412-
413-
TEST_F(CustomExprToSubfieldFilterTest, unsupported) {
414-
auto call = parseCallExpr("custom_neq(a, 42)", ROW({{"a", BIGINT()}}));
415-
VELOX_ASSERT_USER_THROW(
416-
toSubfieldFilter(call), "Unsupported expression for range filter");
417-
}
418-
419254
} // namespace
420255
} // namespace facebook::velox::exec

0 commit comments

Comments
 (0)