Skip to content

Commit 98de7eb

Browse files
techaddictHyukjinKwon
authored andcommitted
[SPARK-42011][SPARK-42012][CONNECT][PYTHON][TESTS][FOLLOW-UP] Enable csv, orc tests in connect/test_parity_datasources.py
### What changes were proposed in this pull request? Enable csv, orc tests in connect/test_parity_datasources.py ### Why are the changes needed? for test coverage ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? enabled UT Closes #39581 from techaddict/SPARK-42011-followup. Authored-by: Sandeep Singh <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent eb667f9 commit 98de7eb

File tree

4 files changed

+1
-24
lines changed

4 files changed

+1
-24
lines changed

python/pyspark/sql/connect/column.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,6 @@ def _test() -> None:
439439
.getOrCreate()
440440
)
441441

442-
# Spark Connect has a different string representation for Column.
443-
del pyspark.sql.connect.column.Column.getItem.__doc__
444-
445442
# TODO(SPARK-41772): Enable pyspark.sql.connect.column.Column.withField doctest
446443
del pyspark.sql.connect.column.Column.withField.__doc__
447444

python/pyspark/sql/connect/readwriter.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -616,12 +616,8 @@ def _test() -> None:
616616
globs = pyspark.sql.connect.readwriter.__dict__.copy()
617617

618618
# TODO(SPARK-41817): Support reading with schema
619-
del pyspark.sql.connect.readwriter.DataFrameReader.load.__doc__
620619
del pyspark.sql.connect.readwriter.DataFrameReader.option.__doc__
621-
del pyspark.sql.connect.readwriter.DataFrameReader.text.__doc__
622-
del pyspark.sql.connect.readwriter.DataFrameWriter.csv.__doc__
623620
del pyspark.sql.connect.readwriter.DataFrameWriter.option.__doc__
624-
del pyspark.sql.connect.readwriter.DataFrameWriter.text.__doc__
625621
del pyspark.sql.connect.readwriter.DataFrameWriter.bucketBy.__doc__
626622
del pyspark.sql.connect.readwriter.DataFrameWriter.sortBy.__doc__
627623

python/pyspark/sql/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ def write(self) -> DataFrameWriter:
506506
--------
507507
>>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
508508
>>> type(df.write)
509-
<class 'pyspark.sql.readwriter.DataFrameWriter'>
509+
<class '...readwriter.DataFrameWriter'>
510510
511511
Write the DataFrame as a table.
512512

python/pyspark/sql/tests/connect/test_parity_datasources.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,6 @@
2222

2323

2424
class DataSourcesParityTests(DataSourcesTestsMixin, ReusedConnectTestCase):
25-
26-
# TODO(SPARK-42011): Implement DataFrameReader.csv
27-
@unittest.skip("Fails in Spark Connect, should enable.")
28-
def test_checking_csv_header(self):
29-
super().test_checking_csv_header()
30-
3125
@unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
3226
def test_csv_sampling_ratio(self):
3327
super().test_csv_sampling_ratio()
@@ -36,16 +30,6 @@ def test_csv_sampling_ratio(self):
3630
def test_json_sampling_ratio(self):
3731
super().test_json_sampling_ratio()
3832

39-
# TODO(SPARK-42011): Implement DataFrameReader.csv
40-
@unittest.skip("Fails in Spark Connect, should enable.")
41-
def test_multiline_csv(self):
42-
super().test_multiline_csv()
43-
44-
# TODO(SPARK-42012): Implement DataFrameReader.orc
45-
@unittest.skip("Fails in Spark Connect, should enable.")
46-
def test_read_multiple_orc_file(self):
47-
super().test_read_multiple_orc_file()
48-
4933

5034
if __name__ == "__main__":
5135
import unittest

0 commit comments

Comments
 (0)