Skip to content

Commit 4ccca2a

Browse files
committed
Implement the local intersect operator
1 parent d1acc2a commit 4ccca2a

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.local
19+
20+
import scala.collection.mutable
21+
22+
import org.apache.spark.sql.catalyst.InternalRow
23+
import org.apache.spark.sql.catalyst.expressions.Attribute
24+
25+
case class IntersectNode(left: LocalNode, right: LocalNode) extends BinaryLocalNode {
26+
27+
override def output: Seq[Attribute] = left.output
28+
29+
private[this] var leftRows: mutable.HashSet[InternalRow] = _
30+
31+
private[this] var currentRow: InternalRow = _
32+
33+
override def open(): Unit = {
34+
left.open()
35+
leftRows = mutable.HashSet[InternalRow]()
36+
while (left.next()) {
37+
leftRows += left.fetch().copy()
38+
}
39+
left.close()
40+
right.open()
41+
}
42+
43+
override def next(): Boolean = {
44+
currentRow = null
45+
while (currentRow == null && right.next()) {
46+
currentRow = right.fetch()
47+
if (!leftRows.contains(currentRow)) {
48+
currentRow = null
49+
}
50+
}
51+
currentRow != null
52+
}
53+
54+
override def fetch(): InternalRow = currentRow
55+
56+
override def close(): Unit = {
57+
left.close()
58+
right.close()
59+
}
60+
61+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.local
19+
20+
class IntersectNodeSuite extends LocalNodeTest {
21+
22+
test("basic") {
23+
val input1 = (1 to 10).map(i => (i, i.toString)).toDF("key", "value")
24+
val input2 = (1 to 10).filter(_ % 2 == 0).map(i => (i, i.toString)).toDF("key", "value")
25+
26+
checkAnswer2(
27+
input1,
28+
input2,
29+
(node1, node2) => IntersectNode(node1, node2),
30+
input1.intersect(input2).collect()
31+
)
32+
}
33+
}

0 commit comments

Comments
 (0)