diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py index a2a9e28a5ab56..205b937fb51db 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_as_type import AsTypeTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class AsTypeParityTests( - AsTypeTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + AsTypeTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py index c277f5ce0664e..1623db58af380 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py @@ -20,7 +20,10 @@ from pyspark.testing.connectutils import ReusedConnectTestCase -class BaseParityTests(BaseTestsMixin, ReusedConnectTestCase): +class BaseParityTests( + BaseTestsMixin, + ReusedConnectTestCase, +): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py index 29b13868e03f6..42ca982766584 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_binary_ops import BinaryOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class BinaryOpsParityTests( - BinaryOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + BinaryOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py index 9ad2aa0ad17ae..e14dcefde8517 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_boolean_ops import BooleanOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class BooleanOpsParityTests( - BooleanOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + BooleanOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py index 1b4dabdb04533..d5c1fdc09a890 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_categorical_ops import CategoricalOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class CategoricalOpsParityTests( - CategoricalOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + CategoricalOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py index ef587578f4ae6..e22bc8988a9bc 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_complex_ops import ComplexOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ComplexOpsParityTests( - ComplexOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + ComplexOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py index baa3180baaa7b..d8dfd488b325b 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_date_ops import DateOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class DateOpsParityTests( - DateOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + DateOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py index 2641e3a32dcdf..d963db367ac2f 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_datetime_ops import DatetimeOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class DatetimeOpsParityTests( - DatetimeOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + DatetimeOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py index 5df4c791c98bb..9a2d1ef685ac3 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_null_ops import NullOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class NullOpsParityTests( - NullOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + NullOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py index 6f5c294e4ad5d..1cce33e170767 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_arithmetic import ArithmeticTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ArithmeticParityTests( - ArithmeticTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + ArithmeticTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py index 56eba708c9456..729443e8b9d8a 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_ops import NumOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class NumOpsParityTests( - NumOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + NumOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py index 4d322d8b9b067..0e90dd97887d3 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_reverse import ReverseTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ReverseParityTests( - ReverseTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + ReverseTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py index f507756a7a487..bb31ded811024 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_string_ops import StringOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class StringOpsParityTests( - StringOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + StringOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py index edd29fa1ed28d..819f2e0791033 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py @@ -16,19 +16,19 @@ # import unittest -import pyspark.pandas as ps from pyspark.pandas.tests.data_type_ops.test_timedelta_ops import TimedeltaOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class TimedeltaOpsParityTests( - TimedeltaOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + TimedeltaOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py index 70a79e4cd3f95..d4ce9a4d0499d 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_udt_ops import UDTOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class UDTOpsParityTests( - UDTOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + UDTOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py deleted file mode 100644 index f1e36aecd194c..0000000000000 --- a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +++ /dev/null @@ -1,211 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import datetime -import decimal - -import numpy as np -import pandas as pd - -import pyspark.pandas as ps -from pyspark.pandas.typedef.typehints import ( - extension_dtypes_available, - extension_float_dtypes_available, - extension_object_dtypes_available, -) - -if extension_dtypes_available: - from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype - -if extension_float_dtypes_available: - from pandas import Float32Dtype, Float64Dtype - -if extension_object_dtypes_available: - from pandas import BooleanDtype, StringDtype - - -class OpsTestBase: - """The test base for arithmetic operations of different data types.""" - - @property - def numeric_pdf(self): - dtypes = [np.int32, int, np.float32, float] - sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes] - sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)])) - sers.append(pd.Series([1, 2, np.nan], dtype=float)) - sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(np.nan)])) - pdf = pd.concat(sers, axis=1) - pdf.columns = [dtype.__name__ for dtype in dtypes] + [ - "decimal", - "float_nan", - "decimal_nan", - ] - return pdf - - @property - def numeric_psdf(self): - return ps.from_pandas(self.numeric_pdf) - - @property - def numeric_df_cols(self): - return self.numeric_pdf.columns - - @property - def integral_pdf(self): - return pd.DataFrame({"this": [1, 2, 3], "that": [2, 2, 1]}) - - @property - def integral_psdf(self): - return ps.from_pandas(self.integral_pdf) - - @property - def non_numeric_pdf(self): - psers = { - "string": pd.Series(["x", "y", "z"]), - "bool": pd.Series([True, True, False]), - "date": pd.Series( - [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)] - ), - "datetime": pd.to_datetime(pd.Series([1, 2, 3])), - "timedelta": pd.Series( - [datetime.timedelta(1), datetime.timedelta(hours=2), datetime.timedelta(weeks=3)] - ), - "categorical": pd.Series(["a", "b", "a"], dtype="category"), - } - return pd.concat(psers, axis=1) - - @property - def non_numeric_psdf(self): - return ps.from_pandas(self.non_numeric_pdf) - - @property - def non_numeric_df_cols(self): - return self.non_numeric_pdf.columns - - @property - def pdf(self): - return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) - - @property - def df_cols(self): - return self.pdf.columns - - @property - def numeric_psers(self): - dtypes = [np.float32, float, int, np.int32] - sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes] - sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)])) - return sers - - @property - def numeric_pssers(self): - return [ps.from_pandas(pser) for pser in self.numeric_psers] - - @property - def numeric_pser_psser_pairs(self): - return zip(self.numeric_psers, self.numeric_pssers) - - @property - def non_numeric_psers(self): - psers = { - "string": pd.Series(["x", "y", "z"]), - "datetime": pd.to_datetime(pd.Series([1, 2, 3])), - "bool": pd.Series([True, True, False]), - "date": pd.Series( - [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)] - ), - "categorical": pd.Series(["a", "b", "a"], dtype="category"), - } - return psers - - @property - def non_numeric_pssers(self): - pssers = {} - - for k, v in self.non_numeric_psers.items(): - pssers[k] = ps.from_pandas(v) - return pssers - - @property - def non_numeric_pser_psser_pairs(self): - return zip(self.non_numeric_psers.values(), self.non_numeric_pssers.values()) - - @property - def pssers(self): - return self.numeric_pssers + list(self.non_numeric_pssers.values()) - - @property - def psers(self): - return self.numeric_psers + list(self.non_numeric_psers.values()) - - @property - def pser_psser_pairs(self): - return zip(self.psers, self.pssers) - - @property - def string_extension_dtype(self): - return ["string", StringDtype()] if extension_object_dtypes_available else [] - - @property - def object_extension_dtypes(self): - return ( - ["boolean", "string", BooleanDtype(), StringDtype()] - if extension_object_dtypes_available - else [] - ) - - @property - def fractional_extension_dtypes(self): - return ( - ["Float32", "Float64", Float32Dtype(), Float64Dtype()] - if extension_float_dtypes_available - else [] - ) - - @property - def integral_extension_dtypes(self): - return ( - [ - "Int8", - "Int16", - "Int32", - "Int64", - Int8Dtype(), - Int16Dtype(), - Int32Dtype(), - Int64Dtype(), - ] - if extension_dtypes_available - else [] - ) - - @property - def extension_dtypes(self): - return ( - self.object_extension_dtypes - + self.fractional_extension_dtypes - + self.integral_extension_dtypes - ) - - def check_extension(self, left, right): - """ - Compare `psser` and `pser` of numeric ExtensionDtypes. - - This utility is to adjust an issue for comparing numeric ExtensionDtypes in specific - pandas versions. Please refer to https://github.com/pandas-dev/pandas/issues/39410. - """ - self.assert_eq(left, right) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_as_type.py b/python/pyspark/pandas/tests/data_type_ops/test_as_type.py index 9d5c0d03d548f..b27cbceac8fa3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_as_type.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_as_type.py @@ -22,6 +22,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, @@ -87,7 +88,11 @@ def test_astype_eager_check(self): psser.astype(int) -class AsTypeTests(AsTypeTestsMixin, OpsTestBase): +class AsTypeTests( + AsTypeTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_base.py b/python/pyspark/pandas/tests/data_type_ops/test_base.py index 551bbbadfb862..8114b60af9353 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_base.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_base.py @@ -91,7 +91,10 @@ def test_bool_ext_ops(self): self.assertIsInstance(DataTypeOps(ExtensionDtype(), BooleanType()), BooleanOps) -class BaseTests(BaseTestsMixin, unittest.TestCase): +class BaseTests( + BaseTestsMixin, + unittest.TestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py index 732cc295bfb06..211d7a094f833 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py @@ -19,6 +19,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -207,7 +208,11 @@ def test_ge(self): self.assert_eq(byte_pdf["this"] >= byte_pdf["this"], byte_psdf["this"] >= byte_psdf["this"]) -class BinaryOpsTests(BinaryOpsTestsMixin, OpsTestBase): +class BinaryOpsTests( + BinaryOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py index 6887d535cffef..bb8067530d643 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py @@ -24,6 +24,7 @@ from pyspark import pandas as ps from pyspark.pandas import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_float_dtypes_available, @@ -806,7 +807,11 @@ def test_ge(self): self.check_extension(pser >= pser, psser >= psser) -class BooleanOpsTests(BooleanOpsTestsMixin, OpsTestBase): +class BooleanOpsTests( + BooleanOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py index d9a4ee1fcd8a8..cf6a986b7b651 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py @@ -21,6 +21,7 @@ from pyspark import pandas as ps from pyspark.pandas.config import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -539,7 +540,11 @@ def test_ge(self): ) -class CategoricalOpsTests(CategoricalOpsTestsMixin, OpsTestBase): +class CategoricalOpsTests( + CategoricalOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py index f7c66425a9023..535fda1359b82 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py @@ -21,6 +21,7 @@ import pandas as pd from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -351,7 +352,11 @@ def test_ge(self): ) -class ComplexOpsTests(ComplexOpsTestsMixin, OpsTestBase): +class ComplexOpsTests( + ComplexOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py index 33332503943d3..c2b29ee8a1d35 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py @@ -16,12 +16,12 @@ # import datetime -import unittest import pandas as pd from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -235,7 +235,11 @@ def test_ge(self): self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) -class DateOpsTests(DateOpsTestsMixin, OpsTestBase): +class DateOpsTests( + DateOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py index c7bda900b7d5a..f98f2011dde00 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py @@ -21,6 +21,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -236,7 +237,11 @@ def test_ge(self): self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) -class DatetimeOpsTests(DatetimeOpsTestsMixin, OpsTestBase): +class DatetimeOpsTests( + DatetimeOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py index 19a3e7c07359a..439557c460dd8 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py @@ -19,6 +19,7 @@ from pandas.api.types import CategoricalDtype import pyspark.pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -161,7 +162,11 @@ def test_ge(self): self.assert_eq(pser >= pser, psser >= psser) -class NullOpsTests(NullOpsTestsMixin, OpsTestBase): +class NullOpsTests( + NullOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py b/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py index 3aedd93622e8b..f27211f539171 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py @@ -21,6 +21,7 @@ import numpy as np from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -167,7 +168,11 @@ def test_pow(self): self.assertRaises(TypeError, lambda: psser ** psdf[n_col]) -class ArithmeticTests(ArithmeticTestsMixin, OpsTestBase): +class ArithmeticTests( + ArithmeticTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py index b1c80b31651b9..e7b157cabb2f0 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py @@ -22,6 +22,7 @@ from pyspark import pandas as ps from pyspark.pandas.config import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, @@ -410,7 +411,11 @@ def test_ge(self): self.check_extension(pser >= pser, (psser >= psser).sort_index()) -class NumOpsTests(NumOpsTestsMixin, OpsTestBase): +class NumOpsTests( + NumOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py b/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py index 4e7ee17aec6f1..e60fa1e781f08 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py @@ -21,6 +21,7 @@ import pandas as pd from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -124,7 +125,11 @@ def test_rmod(self): self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) % psser) -class ReverseTests(ReverseTestsMixin, OpsTestBase): +class ReverseTests( + ReverseTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py index 340153b063353..9648ad9ab2c0f 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py @@ -23,6 +23,7 @@ from pyspark import pandas as ps from pyspark.pandas.config import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import extension_object_dtypes_available @@ -229,7 +230,11 @@ def test_ge(self): self.assert_eq(pser >= pser, psser >= psser) -class StringOpsTests(StringOpsTestsMixin, OpsTestBase): +class StringOpsTests( + StringOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py index f89ec17ec12b3..5ea60742d9e79 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py @@ -21,6 +21,7 @@ from pandas.api.types import CategoricalDtype import pyspark.pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -202,7 +203,11 @@ def test_ge(self): self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) -class TimedeltaOpsTests(TimedeltaOpsTestsMixin, OpsTestBase): +class TimedeltaOpsTests( + TimedeltaOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py index 45f8cca56ee94..60b4153198a34 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py @@ -19,6 +19,7 @@ import pyspark.pandas as ps from pyspark.ml.linalg import SparseVector +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -175,7 +176,11 @@ def test_ge(self): ) -class UDTOpsTests(UDTOpsTestsMixin, OpsTestBase): +class UDTOpsTests( + UDTOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py index 37a708948a805..089b929db1b33 100644 --- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py @@ -27,7 +27,6 @@ extension_float_dtypes_available, extension_object_dtypes_available, ) -from pyspark.testing.pandasutils import ComparisonTestBase if extension_dtypes_available: from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype @@ -39,7 +38,7 @@ from pandas import BooleanDtype, StringDtype -class OpsTestBase(ComparisonTestBase): +class OpsTestBase: """The test base for arithmetic operations of different data types.""" @property @@ -101,6 +100,10 @@ def non_numeric_df_cols(self): def pdf(self): return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) + @property + def psdf(self): + return ps.from_pandas(self.pdf) + @property def df_cols(self): return self.pdf.columns