@@ -648,29 +648,29 @@ bool MergeJoin::addToOutputForLeftJoin() {
648648 : rightMatch_->startRowIndex ;
649649
650650 const auto numRightBatches = rightMatch_->inputs .size ();
651- // TODO: Since semi joins only require determining if there is at least
652- // one match on the other side, we could explore specialized algorithms
653- // or data structures that short-circuit the join process once a match
654- // is found.
655- for (size_t r = isLeftSemiFilterJoin (joinType_) ? numRightBatches - 1
656- : firstRightBatch;
657- r < numRightBatches;
658- ++r) {
651+ for (size_t r = firstRightBatch; r < numRightBatches; ++r) {
659652 const auto rightBatch = rightMatch_->inputs [r];
660- auto rightStartRow = r == firstRightBatch ? rightStartRowIndex : 0 ;
661- const auto rightEndRow = r == numRightBatches - 1
662- ? rightMatch_->endRowIndex
663- : rightBatch->size ();
664- if (isLeftSemiFilterJoin (joinType_)) {
665- rightStartRow = rightEndRow - 1 ;
666- }
653+ const auto rightStartRow =
654+ r == firstRightBatch ? rightStartRowIndex : 0 ;
655+ auto rightEndRow = r == numRightBatches - 1 ? rightMatch_->endRowIndex
656+ : rightBatch->size ();
657+
667658 if (prepareOutput (leftBatch, rightBatch)) {
668659 output_->resize (outputSize_);
669660 leftMatch_->setCursor (l, i);
670661 rightMatch_->setCursor (r, rightStartRow);
671662 return true ;
672663 }
673664
665+ // TODO: Since semi joins only require determining if there is at least
666+ // one match on the other side, we could explore specialized algorithms
667+ // or data structures that short-circuit the join process once a match
668+ // is found.
669+ if (isLeftSemiFilterJoin (joinType_)) {
670+ // LeftSemiFilter produce each row from the left at most once.
671+ rightEndRow = rightStartRow + 1 ;
672+ }
673+
674674 for (auto j = rightStartRow; j < rightEndRow; ++j) {
675675 if (!tryAddOutputRow (leftBatch, i, rightBatch, j)) {
676676 // If we run out of space in the current output_, we will need to
@@ -728,23 +728,11 @@ bool MergeJoin::addToOutputForRightJoin() {
728728 : leftMatch_->startRowIndex ;
729729
730730 const auto numLeftBatches = leftMatch_->inputs .size ();
731- // TODO: Since semi joins only require determining if there is at least
732- // one match on the other side, we could explore specialized algorithms
733- // or data structures that short-circuit the join process once a match
734- // is found.
735- for (size_t l = isRightSemiFilterJoin (joinType_) ? numLeftBatches - 1
736- : firstLeftBatch;
737- l < numLeftBatches;
738- ++l) {
731+ for (size_t l = firstLeftBatch; l < numLeftBatches; ++l) {
739732 const auto leftBatch = leftMatch_->inputs [l];
740- auto leftStartRow = l == firstLeftBatch ? leftStartRowIndex : 0 ;
741- const auto leftEndRow = l == numLeftBatches - 1
742- ? leftMatch_->endRowIndex
743- : leftBatch->size ();
744- if (isRightSemiFilterJoin (joinType_)) {
745- // RightSemiFilter produce each row from the right at most once.
746- leftStartRow = leftEndRow - 1 ;
747- }
733+ const auto leftStartRow = l == firstLeftBatch ? leftStartRowIndex : 0 ;
734+ auto leftEndRow = l == numLeftBatches - 1 ? leftMatch_->endRowIndex
735+ : leftBatch->size ();
748736
749737 if (prepareOutput (leftBatch, rightBatch)) {
750738 // Differently from left joins, for right joins we need to load lazies
@@ -758,6 +746,15 @@ bool MergeJoin::addToOutputForRightJoin() {
758746 return true ;
759747 }
760748
749+ // TODO: Since semi joins only require determining if there is at least
750+ // one match on the other side, we could explore specialized algorithms
751+ // or data structures that short-circuit the join process once a match
752+ // is found.
753+ if (isRightSemiFilterJoin (joinType_)) {
754+ // RightSemiFilter produce each row from the right at most once.
755+ leftEndRow = leftStartRow + 1 ;
756+ }
757+
761758 for (auto j = leftStartRow; j < leftEndRow; ++j) {
762759 auto isRightJoinForFullOuter = false ;
763760 if (isFullJoin (joinType_)) {
0 commit comments