Skip to content

Commit 6ec509b

Browse files
committed
Call cross join path in PySpark join rather than throwing NPE
1 parent 5298171 commit 6ec509b

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

python/pyspark/sql/dataframe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,8 @@ def join(self, other, on=None, how=None):
833833
else:
834834
if how is None:
835835
how = "inner"
836+
if on is None:
837+
on = self._jseq([])
836838
assert isinstance(how, basestring), "how should be basestring"
837839
jdf = self._jdf.join(other._jdf, on, how)
838840
return DataFrame(jdf, self.sql_ctx)

python/pyspark/sql/tests.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2021,6 +2021,11 @@ def test_toDF_with_schema_string(self):
20212021
self.assertEqual(df.schema.simpleString(), "struct<value:int>")
20222022
self.assertEqual(df.collect(), [Row(key=i) for i in range(100)])
20232023

2024+
def test_join_without_on(self):
2025+
self.assertRaises(
2026+
AnalysisException,
2027+
lambda: self.spark.range(1).join(self.spark.range(1), how="inner").collect())
2028+
20242029
# Regression test for invalid join methods when on is None, Spark-14761
20252030
def test_invalid_join_method(self):
20262031
df1 = self.spark.createDataFrame([("Alice", 5), ("Bob", 8)], ["name", "age"])

0 commit comments

Comments
 (0)