apache · zhengruifeng · Jan 4, 2024 · Jan 4, 2024
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -795,7 +795,9 @@ def __hash__(self):
         "pyspark.pandas.generic",
         "pyspark.pandas.series",
         # unittests
-        "pyspark.pandas.tests.indexes.test_base",
+        "pyspark.pandas.tests.indexes.test_basic",
+        "pyspark.pandas.tests.indexes.test_getattr",
+        "pyspark.pandas.tests.indexes.test_name",
         "pyspark.pandas.tests.indexes.test_conversion",
         "pyspark.pandas.tests.indexes.test_drop",
         "pyspark.pandas.tests.indexes.test_level",
@@ -1095,7 +1097,9 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.test_parity_sql",
         "pyspark.pandas.tests.connect.test_parity_typedef",
         "pyspark.pandas.tests.connect.test_parity_utils",
-        "pyspark.pandas.tests.connect.indexes.test_parity_base",
+        "pyspark.pandas.tests.connect.indexes.test_parity_basic",
+        "pyspark.pandas.tests.connect.indexes.test_parity_getattr",
+        "pyspark.pandas.tests.connect.indexes.test_parity_name",
         "pyspark.pandas.tests.connect.indexes.test_parity_conversion",
         "pyspark.pandas.tests.connect.indexes.test_parity_drop",
         "pyspark.pandas.tests.connect.indexes.test_parity_level",

diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_basic.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_basic.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_basic import IndexBasicMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class IndexBasicParityTests(
+    IndexBasicMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_basic import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_getattr.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_getattr.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_getattr import IndexGetattrMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class IndexGetattrParityTests(
+    IndexGetattrMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_getattr import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/...tests/connect/indexes/test_parity_base.py → ...tests/connect/indexes/test_parity_name.py b/...tests/connect/indexes/test_parity_base.py → ...tests/connect/indexes/test_parity_name.py
@@ -16,22 +16,21 @@
 #
 import unittest
 
-from pyspark import pandas as ps
-from pyspark.pandas.tests.indexes.test_base import IndexesTestsMixin
+from pyspark.pandas.tests.indexes.test_name import IndexNameMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class IndexesParityTests(
-    IndexesTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+class IndexNameParityTests(
+    IndexNameMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+    pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.indexes.test_parity_base import *  # noqa: F401
+    from pyspark.pandas.tests.connect.indexes.test_parity_name import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]

diff --git a/...pyspark/pandas/tests/indexes/test_base.py → ...yspark/pandas/tests/indexes/test_basic.py b/...pyspark/pandas/tests/indexes/test_base.py → ...yspark/pandas/tests/indexes/test_basic.py
@@ -22,12 +22,11 @@
 import pandas as pd
 
 import pyspark.pandas as ps
-from pyspark.loose_version import LooseVersion
 from pyspark.pandas.exceptions import PandasNotImplementedError
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils, SPARK_CONF_ARROW_ENABLED
 
 
-class IndexesTestsMixin:
+class IndexBasicMixin:
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -66,109 +65,6 @@ def test_index_basic(self):
         with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
             ps.Index([1.0, 2.0, 3.0], name=[(1, 2, 3)])
 
-    def test_index_getattr(self):
-        psidx = self.psdf.index
-        item = "databricks"
-
-        expected_error_message = "'.*Index' object has no attribute '{}'".format(item)
-        with self.assertRaisesRegex(AttributeError, expected_error_message):
-            psidx.__getattr__(item)
-        with self.assertRaisesRegex(AttributeError, expected_error_message):
-            ps.from_pandas(pd.date_range("2011-01-01", freq="D", periods=10)).__getattr__(item)
-
-    def test_multi_index_getattr(self):
-        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
-        psdf = ps.from_pandas(pdf)
-        psidx = psdf.index
-        item = "databricks"
-
-        expected_error_message = "'MultiIndex' object has no attribute '{}'".format(item)
-        with self.assertRaisesRegex(AttributeError, expected_error_message):
-            psidx.__getattr__(item)
-
-    def test_index_names(self):
-        psdf = self.psdf
-        self.assertIsNone(psdf.index.name)
-
-        idx = pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], name="x")
-        pdf = pd.DataFrame(np.random.randn(10, 5), index=idx, columns=list("abcde"))
-        psdf = ps.from_pandas(pdf)
-
-        pser = pdf.a
-        psser = psdf.a
-
-        self.assertEqual(psdf.index.name, pdf.index.name)
-        self.assertEqual(psdf.index.names, pdf.index.names)
-
-        pidx = pdf.index
-        psidx = psdf.index
-        pidx.name = "renamed"
-        psidx.name = "renamed"
-        self.assertEqual(psidx.name, pidx.name)
-        self.assertEqual(psidx.names, pidx.names)
-        self.assert_eq(psidx, pidx)
-        self.assertEqual(psdf.index.name, pdf.index.name)
-        self.assertEqual(psdf.index.names, pdf.index.names)
-        self.assertEqual(psser.index.names, pser.index.names)
-
-        pidx.name = None
-        psidx.name = None
-        self.assertEqual(psidx.name, pidx.name)
-        self.assertEqual(psidx.names, pidx.names)
-        self.assert_eq(psidx, pidx)
-        self.assertEqual(psdf.index.name, pdf.index.name)
-        self.assertEqual(psdf.index.names, pdf.index.names)
-        self.assertEqual(psser.index.names, pser.index.names)
-
-        with self.assertRaisesRegex(ValueError, "Names must be a list-like"):
-            psidx.names = "hi"
-
-        expected_error_message = "Length of new names must be {}, got {}".format(
-            psdf._internal.index_level, len(["0", "1"])
-        )
-        with self.assertRaisesRegex(ValueError, expected_error_message):
-            psidx.names = ["0", "1"]
-
-        expected_error_message = "Index.name must be a hashable type"
-        with self.assertRaisesRegex(TypeError, expected_error_message):
-            ps.Index([1, 2, 3], name=["0", "1"])
-        with self.assertRaisesRegex(TypeError, expected_error_message):
-            psidx.name = ["renamed"]
-        with self.assertRaisesRegex(TypeError, expected_error_message):
-            psidx.name = ["0", "1"]
-        # Specifying `names` when creating Index is no longer supported from pandas 2.0.0.
-        if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
-            pass
-        else:
-            with self.assertRaisesRegex(TypeError, expected_error_message):
-                ps.Index([(1, 2), (3, 4)], names=["a", ["b"]])
-
-    def test_multi_index_names(self):
-        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
-        psdf = ps.from_pandas(pdf)
-
-        self.assertEqual(psdf.index.names, pdf.index.names)
-
-        pidx = pdf.index
-        psidx = psdf.index
-        pidx.names = ["renamed_number", "renamed_color"]
-        psidx.names = ["renamed_number", "renamed_color"]
-        self.assertEqual(psidx.names, pidx.names)
-
-        pidx.names = ["renamed_number", None]
-        psidx.names = ["renamed_number", None]
-        self.assertEqual(psidx.names, pidx.names)
-        self.assert_eq(psidx, pidx)
-
-        with self.assertRaises(PandasNotImplementedError):
-            psidx.name
-        with self.assertRaises(PandasNotImplementedError):
-            psidx.name = "renamed"
-
     def test_multi_index_copy(self):
         arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
         idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
@@ -177,49 +73,6 @@ def test_multi_index_copy(self):
 
         self.assert_eq(psdf.index.copy(), pdf.index.copy())
 
-    def test_multiindex_set_names(self):
-        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
-        psidx = ps.from_pandas(pidx)
-
-        pidx = pidx.set_names(["set", "new", "names"])
-        psidx = psidx.set_names(["set", "new", "names"])
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names(["set", "new", "names"], inplace=True)
-        psidx.set_names(["set", "new", "names"], inplace=True)
-        self.assert_eq(pidx, psidx)
-
-        pidx = pidx.set_names("first", level=0)
-        psidx = psidx.set_names("first", level=0)
-        self.assert_eq(pidx, psidx)
-
-        pidx = pidx.set_names("second", level=1)
-        psidx = psidx.set_names("second", level=1)
-        self.assert_eq(pidx, psidx)
-
-        pidx = pidx.set_names("third", level=2)
-        psidx = psidx.set_names("third", level=2)
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names("first", level=0, inplace=True)
-        psidx.set_names("first", level=0, inplace=True)
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names("second", level=1, inplace=True)
-        psidx.set_names("second", level=1, inplace=True)
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names("third", level=2, inplace=True)
-        psidx.set_names("third", level=2, inplace=True)
-        self.assert_eq(pidx, psidx)
-
-    def test_multiindex_tuple_column_name(self):
-        column_labels = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")])
-        pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=column_labels)
-        pdf.set_index(("a", "x"), append=True, inplace=True)
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf, psdf)
-
     def test_holds_integer(self):
         pidx = pd.Index([1, 2, 3, 4])
         psidx = ps.from_pandas(pidx)
@@ -347,16 +200,16 @@ def test_factorize(self):
         self.assertRaises(PandasNotImplementedError, lambda: psmidx.factorize())
 
 
-class IndexesTests(
-    IndexesTestsMixin,
+class IndexBasicTests(
+    IndexBasicMixin,
     PandasOnSparkTestCase,
     TestUtils,
 ):
     pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.indexes.test_base import *  # noqa: F401
+    from pyspark.pandas.tests.indexes.test_basic import *  # noqa: F401
 
     try:
         import xmlrunner