From c6712bfe5b44e0ad796f1e5b078838620232d162 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 21 Nov 2022 11:42:31 +0800 Subject: [PATCH 1/2] init --- python/pyspark/sql/connect/dataframe.py | 14 ++++++++++++++ .../sql/tests/connect/test_connect_basic.py | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 15aa028b11b1..6457416ce2de 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -122,6 +122,20 @@ def withPlan(cls, plan: plan.LogicalPlan, session: "RemoteSparkSession") -> "Dat new_frame._plan = plan return new_frame + def isEmpty(self) -> bool: + """Returns ``True`` if this :class:`DataFrame` is empty. + + .. versionadded:: 3.4.0 + + Returns + ------- + bool + Whether it's empty DataFrame or not. + """ + if "is_empty" not in self._cache: + self._cache["is_empty"] = len(self.take(1)) == 0 + return bool(self._cache["is_empty"]) + def select(self, *cols: "ExpressionOrString") -> "DataFrame": return DataFrame.withPlan(plan.Project(self._plan, *cols), session=self._session) diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 9e7a5f2f4a54..3493bef1b558 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -319,6 +319,11 @@ def test_empty_dataset(self): self.assertEqual(1, len(pdf.columns)) # one column self.assertEqual("X", pdf.columns[0]) + def test_is_empty(self): + # SPARK-41212: Test is empty + self.assertFalse(self.connect.sql("SELECT 1 AS X").isEmpty()) + self.assertTrue(self.connect.sql("SELECT 1 AS X LIMIT 0").isEmpty()) + def test_session(self): self.assertEqual(self.connect, self.connect.sql("SELECT 1").sparkSession()) From 34accba1f5917eb4f7c1fe6a866841ea65e8e20d Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 22 Nov 2022 13:14:31 +0800 Subject: [PATCH 2/2] remove cache --- python/pyspark/sql/connect/dataframe.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 6457416ce2de..9118644662d1 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -132,9 +132,7 @@ def isEmpty(self) -> bool: bool Whether it's empty DataFrame or not. """ - if "is_empty" not in self._cache: - self._cache["is_empty"] = len(self.take(1)) == 0 - return bool(self._cache["is_empty"]) + return len(self.take(1)) == 0 def select(self, *cols: "ExpressionOrString") -> "DataFrame": return DataFrame.withPlan(plan.Project(self._plan, *cols), session=self._session)