Skip to content

Commit f7acc46

Browse files
committed
Revert "fix: Fix the semi merge join with duplicate match vectors (facebookincubator#13096)"
This reverts commit e11e394.
1 parent 8913fd6 commit f7acc46

File tree

2 files changed

+28
-80
lines changed

2 files changed

+28
-80
lines changed

velox/exec/MergeJoin.cpp

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -648,29 +648,29 @@ bool MergeJoin::addToOutputForLeftJoin() {
648648
: rightMatch_->startRowIndex;
649649

650650
const auto numRightBatches = rightMatch_->inputs.size();
651-
// TODO: Since semi joins only require determining if there is at least
652-
// one match on the other side, we could explore specialized algorithms
653-
// or data structures that short-circuit the join process once a match
654-
// is found.
655-
for (size_t r = isLeftSemiFilterJoin(joinType_) ? numRightBatches - 1
656-
: firstRightBatch;
657-
r < numRightBatches;
658-
++r) {
651+
for (size_t r = firstRightBatch; r < numRightBatches; ++r) {
659652
const auto rightBatch = rightMatch_->inputs[r];
660-
auto rightStartRow = r == firstRightBatch ? rightStartRowIndex : 0;
661-
const auto rightEndRow = r == numRightBatches - 1
662-
? rightMatch_->endRowIndex
663-
: rightBatch->size();
664-
if (isLeftSemiFilterJoin(joinType_)) {
665-
rightStartRow = rightEndRow - 1;
666-
}
653+
const auto rightStartRow =
654+
r == firstRightBatch ? rightStartRowIndex : 0;
655+
auto rightEndRow = r == numRightBatches - 1 ? rightMatch_->endRowIndex
656+
: rightBatch->size();
657+
667658
if (prepareOutput(leftBatch, rightBatch)) {
668659
output_->resize(outputSize_);
669660
leftMatch_->setCursor(l, i);
670661
rightMatch_->setCursor(r, rightStartRow);
671662
return true;
672663
}
673664

665+
// TODO: Since semi joins only require determining if there is at least
666+
// one match on the other side, we could explore specialized algorithms
667+
// or data structures that short-circuit the join process once a match
668+
// is found.
669+
if (isLeftSemiFilterJoin(joinType_)) {
670+
// LeftSemiFilter produce each row from the left at most once.
671+
rightEndRow = rightStartRow + 1;
672+
}
673+
674674
for (auto j = rightStartRow; j < rightEndRow; ++j) {
675675
if (!tryAddOutputRow(leftBatch, i, rightBatch, j)) {
676676
// If we run out of space in the current output_, we will need to
@@ -728,23 +728,11 @@ bool MergeJoin::addToOutputForRightJoin() {
728728
: leftMatch_->startRowIndex;
729729

730730
const auto numLeftBatches = leftMatch_->inputs.size();
731-
// TODO: Since semi joins only require determining if there is at least
732-
// one match on the other side, we could explore specialized algorithms
733-
// or data structures that short-circuit the join process once a match
734-
// is found.
735-
for (size_t l = isRightSemiFilterJoin(joinType_) ? numLeftBatches - 1
736-
: firstLeftBatch;
737-
l < numLeftBatches;
738-
++l) {
731+
for (size_t l = firstLeftBatch; l < numLeftBatches; ++l) {
739732
const auto leftBatch = leftMatch_->inputs[l];
740-
auto leftStartRow = l == firstLeftBatch ? leftStartRowIndex : 0;
741-
const auto leftEndRow = l == numLeftBatches - 1
742-
? leftMatch_->endRowIndex
743-
: leftBatch->size();
744-
if (isRightSemiFilterJoin(joinType_)) {
745-
// RightSemiFilter produce each row from the right at most once.
746-
leftStartRow = leftEndRow - 1;
747-
}
733+
const auto leftStartRow = l == firstLeftBatch ? leftStartRowIndex : 0;
734+
auto leftEndRow = l == numLeftBatches - 1 ? leftMatch_->endRowIndex
735+
: leftBatch->size();
748736

749737
if (prepareOutput(leftBatch, rightBatch)) {
750738
// Differently from left joins, for right joins we need to load lazies
@@ -758,6 +746,15 @@ bool MergeJoin::addToOutputForRightJoin() {
758746
return true;
759747
}
760748

749+
// TODO: Since semi joins only require determining if there is at least
750+
// one match on the other side, we could explore specialized algorithms
751+
// or data structures that short-circuit the join process once a match
752+
// is found.
753+
if (isRightSemiFilterJoin(joinType_)) {
754+
// RightSemiFilter produce each row from the right at most once.
755+
leftEndRow = leftStartRow + 1;
756+
}
757+
761758
for (auto j = leftStartRow; j < leftEndRow; ++j) {
762759
auto isRightJoinForFullOuter = false;
763760
if (isFullJoin(joinType_)) {

velox/exec/tests/MergeJoinTest.cpp

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -963,55 +963,6 @@ TEST_F(MergeJoinTest, semiJoin) {
963963
core::JoinType::kRightSemiFilter);
964964
}
965965

966-
TEST_F(MergeJoinTest, semiJoinWithMultipleMatchVectors) {
967-
std::vector<RowVectorPtr> leftVectors;
968-
for (int i = 0; i < 10; ++i) {
969-
leftVectors.push_back(makeRowVector(
970-
{"t0"}, {makeFlatVector<int64_t>({i / 2, i / 2, i / 2})}));
971-
}
972-
std::vector<RowVectorPtr> rightVectors;
973-
for (int i = 0; i < 10; ++i) {
974-
rightVectors.push_back(makeRowVector(
975-
{"u0"}, {makeFlatVector<int64_t>({i / 2, i / 2, i / 2})}));
976-
}
977-
978-
createDuckDbTable("t", leftVectors);
979-
createDuckDbTable("u", rightVectors);
980-
981-
auto testSemiJoin = [&](const std::string& filter,
982-
const std::string& sql,
983-
const std::vector<std::string>& outputLayout,
984-
core::JoinType joinType) {
985-
auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
986-
auto plan = PlanBuilder(planNodeIdGenerator)
987-
.values(leftVectors)
988-
.mergeJoin(
989-
{"t0"},
990-
{"u0"},
991-
PlanBuilder(planNodeIdGenerator)
992-
.values(rightVectors)
993-
.planNode(),
994-
filter,
995-
outputLayout,
996-
joinType)
997-
.planNode();
998-
AssertQueryBuilder(plan, duckDbQueryRunner_)
999-
.config(core::QueryConfig::kMaxOutputBatchRows, "1")
1000-
.assertResults(sql);
1001-
};
1002-
1003-
testSemiJoin(
1004-
"u0 > 1",
1005-
"SELECT u0 FROM u where u0 IN (SELECT t0 from t) and u0 > 1",
1006-
{"u0"},
1007-
core::JoinType::kRightSemiFilter);
1008-
testSemiJoin(
1009-
"t0 >1",
1010-
"SELECT t0 FROM t where t0 IN (SELECT u0 from u) and t0 > 1",
1011-
{"t0"},
1012-
core::JoinType::kLeftSemiFilter);
1013-
}
1014-
1015966
TEST_F(MergeJoinTest, rightJoin) {
1016967
auto left = makeRowVector(
1017968
{"t0"},

0 commit comments

Comments
 (0)