diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 7be4aa1ca9562..06ff601d67a1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -990,7 +990,7 @@ class Dataset[T] private[sql]( * @param condition Join expression. * @param joinType Type of join to perform. Default `inner`. Must be one of: * `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`, - * `right`, `right_outer`, `left_semi`, `left_anti`. + * `right`, `right_outer`. * * @group typedrel * @since 1.6.0 @@ -1007,6 +1007,10 @@ class Dataset[T] private[sql]( JoinType(joinType), Some(condition.expr))).analyzed.asInstanceOf[Join] + if (joined.joinType == LeftSemi || joined.joinType == LeftAnti) { + throw new AnalysisException("Invalid join type in joinWith: " + joined.joinType.sql) + } + // For both join side, combine all outputs into a single column and alias it with "_1" or "_2", // to match the schema for the encoder of the join result. // Note that we do this before joining them, to enable the join operator to return null for one diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 87b7b090de3bf..4a1e3e1599599 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -21,6 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.sql.{Date, Timestamp} import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder} +import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi} import org.apache.spark.sql.catalyst.util.sideBySide import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SortExec} import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange} @@ -321,6 +322,21 @@ class DatasetSuite extends QueryTest with SharedSQLContext { ((("b", 2), ("b", 2)), ("b", 2))) } + test("joinWith join types") { + val ds1 = Seq(1, 2, 3).toDS().as("a") + val ds2 = Seq(1, 2).toDS().as("b") + + val e1 = intercept[AnalysisException] { + ds1.joinWith(ds2, $"a.value" === $"b.value", "left_semi") + }.getMessage + assert(e1.contains("Invalid join type in joinWith: " + LeftSemi.sql)) + + val e2 = intercept[AnalysisException] { + ds1.joinWith(ds2, $"a.value" === $"b.value", "left_anti") + }.getMessage + assert(e2.contains("Invalid join type in joinWith: " + LeftAnti.sql)) + } + test("groupBy function, keys") { val ds = Seq(("a", 1), ("b", 1)).toDS() val grouped = ds.groupByKey(v => (1, v._2))