Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ class Dataset[T] private[sql](
* @param condition Join expression.
* @param joinType Type of join to perform. Default `inner`. Must be one of:
* `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
* `right`, `right_outer`, `left_semi`, `left_anti`.
* `right`, `right_outer`.
*
* @group typedrel
* @since 1.6.0
Expand All @@ -1007,6 +1007,10 @@ class Dataset[T] private[sql](
JoinType(joinType),
Some(condition.expr))).analyzed.asInstanceOf[Join]

if (joined.joinType == LeftSemi || joined.joinType == LeftAnti) {
throw new AnalysisException("Invalid join type in joinWith: " + joined.joinType.sql)
}

// For both join side, combine all outputs into a single column and alias it with "_1" or "_2",
// to match the schema for the encoder of the join result.
// Note that we do this before joining them, to enable the join operator to return null for one
Expand Down
16 changes: 16 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
import java.sql.{Date, Timestamp}

import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
import org.apache.spark.sql.catalyst.util.sideBySide
import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SortExec}
import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange}
Expand Down Expand Up @@ -321,6 +322,21 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
((("b", 2), ("b", 2)), ("b", 2)))
}

test("joinWith join types") {
val ds1 = Seq(1, 2, 3).toDS().as("a")
val ds2 = Seq(1, 2).toDS().as("b")

val e1 = intercept[AnalysisException] {
ds1.joinWith(ds2, $"a.value" === $"b.value", "left_semi")
}.getMessage
assert(e1.contains("Invalid join type in joinWith: " + LeftSemi.sql))

val e2 = intercept[AnalysisException] {
ds1.joinWith(ds2, $"a.value" === $"b.value", "left_anti")
}.getMessage
assert(e2.contains("Invalid join type in joinWith: " + LeftAnti.sql))
}

test("groupBy function, keys") {
val ds = Seq(("a", 1), ("b", 1)).toDS()
val grouped = ds.groupByKey(v => (1, v._2))
Expand Down