[SPARK-42011][SPARK-42012][CONNECT][PYTHON][TESTS][FOLLOW-UP] Enable csv, orc tests in connect/test_parity_datasources.py

techaddict · HyukjinKwon · commit 98de7eb1f3af · 2023-01-16T09:28:25.000+09:00
### What changes were proposed in this pull request? Enable csv, orc tests in connect/test_parity_datasources.py ### Why are the changes needed? for test coverage ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? enabled UT Closes #39581 from techaddict/SPARK-42011-followup. Authored-by: Sandeep Singh <sandeep@techaddict.me> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
@@ -439,9 +439,6 @@ def _test() -> None:
             .getOrCreate()
         )
 
-        # Spark Connect has a different string representation for Column.
-        del pyspark.sql.connect.column.Column.getItem.__doc__
-
         # TODO(SPARK-41772): Enable pyspark.sql.connect.column.Column.withField doctest
         del pyspark.sql.connect.column.Column.withField.__doc__
 
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
@@ -616,12 +616,8 @@ def _test() -> None:
         globs = pyspark.sql.connect.readwriter.__dict__.copy()
 
         # TODO(SPARK-41817): Support reading with schema
-        del pyspark.sql.connect.readwriter.DataFrameReader.load.__doc__
         del pyspark.sql.connect.readwriter.DataFrameReader.option.__doc__
-        del pyspark.sql.connect.readwriter.DataFrameReader.text.__doc__
-        del pyspark.sql.connect.readwriter.DataFrameWriter.csv.__doc__
         del pyspark.sql.connect.readwriter.DataFrameWriter.option.__doc__
-        del pyspark.sql.connect.readwriter.DataFrameWriter.text.__doc__
         del pyspark.sql.connect.readwriter.DataFrameWriter.bucketBy.__doc__
         del pyspark.sql.connect.readwriter.DataFrameWriter.sortBy.__doc__
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -506,7 +506,7 @@ def write(self) -> DataFrameWriter:
         --------
         >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> type(df.write)
-        <class 'pyspark.sql.readwriter.DataFrameWriter'>
+        <class '...readwriter.DataFrameWriter'>
 
         Write the DataFrame as a table.
 
diff --git a/python/pyspark/sql/tests/connect/test_parity_datasources.py b/python/pyspark/sql/tests/connect/test_parity_datasources.py
@@ -22,12 +22,6 @@
 
 
 class DataSourcesParityTests(DataSourcesTestsMixin, ReusedConnectTestCase):
-
-    # TODO(SPARK-42011): Implement DataFrameReader.csv
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_checking_csv_header(self):
-        super().test_checking_csv_header()
-
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_csv_sampling_ratio(self):
         super().test_csv_sampling_ratio()
@@ -36,16 +30,6 @@ def test_csv_sampling_ratio(self):
     def test_json_sampling_ratio(self):
         super().test_json_sampling_ratio()
 
-    # TODO(SPARK-42011): Implement DataFrameReader.csv
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_multiline_csv(self):
-        super().test_multiline_csv()
-
-    # TODO(SPARK-42012): Implement DataFrameReader.orc
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_read_multiple_orc_file(self):
-        super().test_read_multiple_orc_file()
-
 
 if __name__ == "__main__":
     import unittest

Original file line number	Diff line number	Diff line change
`@@ -439,9 +439,6 @@ def _test() -> None:`
`439`	`439`	`.getOrCreate()`
`440`	`440`	`)`
`441`	`441`
`442`		`- # Spark Connect has a different string representation for Column.`
`443`		`- del pyspark.sql.connect.column.Column.getItem.__doc__`
`444`		`-`
`445`	`442`	`# TODO(SPARK-41772): Enable pyspark.sql.connect.column.Column.withField doctest`
`446`	`443`	`del pyspark.sql.connect.column.Column.withField.__doc__`
`447`	`444`