diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py index b7ebe77b3115b..ce1ffb347655a 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py @@ -1866,9 +1866,16 @@ def test_corrwith(self): self._test_corrwith((df1 + 1), df2.B) self._test_corrwith((df1 + 1), (df2.B + 2)) + # Regression in pandas 1.5.0 + # See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue, + # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes. df_bool = ps.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]}) ser_bool = ps.Series([True, True, False, True]) - self._test_corrwith(df_bool, ser_bool) + if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"): + expected = ps.Series([0.5773502691896257, 0.5773502691896257], index=["B", "A"]) + self.assert_eq(df_bool.corrwith(ser_bool), expected, almost=True) + else: + self._test_corrwith(df_bool, ser_bool) self._test_corrwith(self.psdf1, self.psdf1) self._test_corrwith(self.psdf1, self.psdf2) @@ -1876,7 +1883,15 @@ def test_corrwith(self): self._test_corrwith(self.psdf3, self.psdf4) self._test_corrwith(self.psdf1, self.psdf1.a) - self._test_corrwith(self.psdf1, self.psdf2.b) + # Regression in pandas 1.5.0 + # See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue, + # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes. + if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"): + expected = ps.Series([-0.08827348295047496, 0.4413674147523748], index=["b", "a"]) + self.assert_eq(self.psdf1.corrwith(self.psdf2.b), expected, almost=True) + else: + self._test_corrwith(self.psdf1, self.psdf2.b) + self._test_corrwith(self.psdf2, self.psdf3.c) self._test_corrwith(self.psdf3, self.psdf4.f)