Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _build_metrics(self, metrics: "pb2.Response.Metrics") -> typing.List[PlanMet
def sql(self, sql_string: str) -> "DataFrame":
return DataFrame.withPlan(SQL(sql_string), self)

def collect(self, plan: pb2.Plan) -> pandas.DataFrame:
def _to_pandas(self, plan: pb2.Plan) -> pandas.DataFrame:
req = pb2.Request()
req.user_context.user_id = self._user_id
req.plan.CopyFrom(plan)
Expand Down
10 changes: 6 additions & 4 deletions python/pyspark/sql/connect/data_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
TYPE_CHECKING,
)

import pandas

import pyspark.sql.connect.plan as plan
from pyspark.sql.connect.column import (
ColumnOrString,
Expand Down Expand Up @@ -225,11 +227,11 @@ def _print_plan(self) -> str:
return ""

def collect(self):
query = self._plan.collect(self._session)
return self._session.collect(query)
raise NotImplementedError("Please use toPandas().")

def toPandas(self):
return self.collect()
def toPandas(self) -> pandas.DataFrame:
query = self._plan.collect(self._session)
return self._session._to_pandas(query)

def explain(self) -> str:
query = self._plan.collect(self._session)
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql/tests/connect/test_spark_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class SparkConnectTests(SparkConnectSQLTestCase):
def test_simple_read(self) -> None:
"""Tests that we can access the Spark Connect GRPC service locally."""
df = self.connect.read.table(self.tbl_name)
data = df.limit(10).collect()
data = df.limit(10).toPandas()
# Check that the limit is applied
assert len(data.index) == 10

Expand All @@ -67,7 +67,7 @@ def conv_udf(x) -> str:

u = udf(conv_udf)
df = self.connect.read.table(self.tbl_name)
result = df.select(u(df.id)).collect()
result = df.select(u(df.id)).toPandas()
assert result is not None

def test_simple_explain_string(self) -> None:
Expand Down