Skip to content

Commit 185a0a5

Browse files
committed
[SPARK-43611][SQL][PS][CONNCECT] Make ExtractWindowExpressions retain the PLAN_ID_TAG
### What changes were proposed in this pull request? Make rule `ExtractWindowExpressions` retain the `PLAN_ID_TAG ` ### Why are the changes needed? In #39925, we introduced a new mechanism to resolve expression with specified plan. However, sometimes the plan ID might be discarded by some analyzer rules, and then some expressions can not be correctly resolved, this issue is the main blocker of PS on Connect. ### Does this PR introduce _any_ user-facing change? yes, a lot of Pandas APIs enabled ### How was this patch tested? Enable UTs Closes #42086 from zhengruifeng/ps_connect_analyze_window. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Ruifeng Zheng <[email protected]>
1 parent 02e36dd commit 185a0a5

File tree

12 files changed

+21
-457
lines changed

12 files changed

+21
-457
lines changed

python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,36 +29,6 @@ class FrameParityMissingDataTests(
2929
def psdf(self):
3030
return ps.from_pandas(self.pdf)
3131

32-
@unittest.skip(
33-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
34-
)
35-
def test_backfill(self):
36-
super().test_backfill()
37-
38-
@unittest.skip(
39-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
40-
)
41-
def test_bfill(self):
42-
super().test_bfill()
43-
44-
@unittest.skip(
45-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
46-
)
47-
def test_ffill(self):
48-
super().test_ffill()
49-
50-
@unittest.skip(
51-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
52-
)
53-
def test_fillna(self):
54-
return super().test_fillna()
55-
56-
@unittest.skip(
57-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
58-
)
59-
def test_pad(self):
60-
super().test_pad()
61-
6232

6333
if __name__ == "__main__":
6434
from pyspark.pandas.tests.connect.computation.test_parity_missing_data import * # noqa: F401

python/pyspark/pandas/tests/connect/series/test_parity_compute.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,6 @@
2222

2323

2424
class SeriesParityComputeTests(SeriesComputeMixin, PandasOnSparkTestUtils, ReusedConnectTestCase):
25-
@unittest.skip(
26-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
27-
)
28-
def test_diff(self):
29-
super().test_diff()
30-
31-
@unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.")
32-
def test_factorize(self):
33-
super().test_factorize()
34-
35-
@unittest.skip(
36-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
37-
)
38-
def test_shift(self):
39-
super().test_shift()
40-
4125
@unittest.skip(
4226
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
4327
)

python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,8 @@
2424
class SeriesParityCumulativeTests(
2525
SeriesCumulativeMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
2626
):
27-
@unittest.skip(
28-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
29-
)
30-
def test_cummax(self):
31-
super().test_cummax()
32-
33-
@unittest.skip(
34-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
35-
)
36-
def test_cummin(self):
37-
super().test_cummin()
38-
39-
@unittest.skip(
40-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
41-
)
42-
def test_cumprod(self):
43-
super().test_cumprod()
44-
45-
@unittest.skip(
46-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
47-
)
48-
def test_cumsum(self):
49-
super().test_cumsum()
27+
28+
pass
5029

5130

5231
if __name__ == "__main__":

python/pyspark/pandas/tests/connect/series/test_parity_index.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,8 @@
2222

2323

2424
class SeriesParityIndexTests(SeriesIndexMixin, PandasOnSparkTestUtils, ReusedConnectTestCase):
25-
@unittest.skip(
26-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
27-
)
28-
def test_reset_index_with_default_index_types(self):
29-
super().test_reset_index_with_default_index_types()
25+
26+
pass
3027

3128

3229
if __name__ == "__main__":

python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,41 +24,8 @@
2424
class SeriesParityMissingDataTests(
2525
SeriesMissingDataMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
2626
):
27-
@unittest.skip(
28-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
29-
)
30-
def test_backfill(self):
31-
super().test_backfill()
3227

33-
@unittest.skip(
34-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
35-
)
36-
def test_bfill(self):
37-
super().test_bfill()
38-
39-
@unittest.skip(
40-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
41-
)
42-
def test_ffill(self):
43-
super().test_ffill()
44-
45-
@unittest.skip(
46-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
47-
)
48-
def test_fillna(self):
49-
super().test_fillna()
50-
51-
@unittest.skip(
52-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
53-
)
54-
def test_pad(self):
55-
super().test_pad()
56-
57-
@unittest.skip(
58-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
59-
)
60-
def test_replace(self):
61-
super().test_replace()
28+
pass
6229

6330

6431
if __name__ == "__main__":

python/pyspark/pandas/tests/connect/series/test_parity_stat.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,8 @@
2222

2323

2424
class SeriesParityStatTests(SeriesStatMixin, PandasOnSparkTestUtils, ReusedConnectTestCase):
25-
@unittest.skip(
26-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
27-
)
28-
def test_pct_change(self):
29-
super().test_pct_change()
30-
31-
@unittest.skip("TODO(SPARK-43618): Fix pyspark.sq.column._unary_op to work with Spark Connect.")
32-
def test_rank(self):
33-
super().test_rank()
25+
26+
pass
3427

3528

3629
if __name__ == "__main__":

python/pyspark/pandas/tests/connect/test_parity_ewm.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,7 @@
2222

2323

2424
class EWMParityTests(EWMTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase, TestUtils):
25-
@unittest.skip(
26-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
27-
)
28-
def test_ewm_mean(self):
29-
super().test_ewm_mean()
30-
31-
@unittest.skip(
32-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
33-
)
34-
def test_groupby_ewm_func(self):
35-
super().test_groupby_ewm_func()
25+
pass
3626

3727

3828
if __name__ == "__main__":

python/pyspark/pandas/tests/connect/test_parity_expanding.py

Lines changed: 1 addition & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -24,125 +24,7 @@
2424
class ExpandingParityTests(
2525
ExpandingTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
2626
):
27-
@unittest.skip(
28-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
29-
)
30-
def test_expanding_count(self):
31-
super().test_expanding_count()
32-
33-
@unittest.skip(
34-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
35-
)
36-
def test_expanding_kurt(self):
37-
super().test_expanding_kurt()
38-
39-
@unittest.skip(
40-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
41-
)
42-
def test_expanding_max(self):
43-
super().test_expanding_max()
44-
45-
@unittest.skip(
46-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
47-
)
48-
def test_expanding_mean(self):
49-
super().test_expanding_mean()
50-
51-
@unittest.skip(
52-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
53-
)
54-
def test_expanding_min(self):
55-
super().test_expanding_min()
56-
57-
@unittest.skip(
58-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
59-
)
60-
def test_expanding_quantile(self):
61-
super().test_expanding_quantile()
62-
63-
@unittest.skip(
64-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
65-
)
66-
def test_expanding_skew(self):
67-
super().test_expanding_skew()
68-
69-
@unittest.skip(
70-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
71-
)
72-
def test_expanding_std(self):
73-
super().test_expanding_std()
74-
75-
@unittest.skip(
76-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
77-
)
78-
def test_expanding_sum(self):
79-
super().test_expanding_sum()
80-
81-
@unittest.skip(
82-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
83-
)
84-
def test_expanding_var(self):
85-
super().test_expanding_var()
86-
87-
@unittest.skip(
88-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
89-
)
90-
def test_groupby_expanding_count(self):
91-
super().test_groupby_expanding_count()
92-
93-
@unittest.skip(
94-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
95-
)
96-
def test_groupby_expanding_kurt(self):
97-
super().test_groupby_expanding_kurt()
98-
99-
@unittest.skip(
100-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
101-
)
102-
def test_groupby_expanding_max(self):
103-
super().test_groupby_expanding_max()
104-
105-
@unittest.skip(
106-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
107-
)
108-
def test_groupby_expanding_mean(self):
109-
super().test_groupby_expanding_mean()
110-
111-
@unittest.skip(
112-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
113-
)
114-
def test_groupby_expanding_min(self):
115-
super().test_groupby_expanding_min()
116-
117-
@unittest.skip(
118-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
119-
)
120-
def test_groupby_expanding_quantile(self):
121-
super().test_groupby_expanding_quantile()
122-
123-
@unittest.skip(
124-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
125-
)
126-
def test_groupby_expanding_skew(self):
127-
super().test_groupby_expanding_skew()
128-
129-
@unittest.skip(
130-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
131-
)
132-
def test_groupby_expanding_std(self):
133-
super().test_groupby_expanding_std()
134-
135-
@unittest.skip(
136-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
137-
)
138-
def test_groupby_expanding_sum(self):
139-
super().test_groupby_expanding_sum()
140-
141-
@unittest.skip(
142-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
143-
)
144-
def test_groupby_expanding_var(self):
145-
super().test_groupby_expanding_var()
27+
pass
14628

14729

14830
if __name__ == "__main__":

python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,53 +24,7 @@
2424
class OpsOnDiffFramesGroupByParityTests(
2525
OpsOnDiffFramesGroupByTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
2626
):
27-
@unittest.skip(
28-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
29-
)
30-
def test_cumcount(self):
31-
super().test_cumcount()
32-
33-
@unittest.skip(
34-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
35-
)
36-
def test_cummax(self):
37-
super().test_cummax()
38-
39-
@unittest.skip(
40-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
41-
)
42-
def test_cummin(self):
43-
super().test_cummin()
44-
45-
@unittest.skip(
46-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
47-
)
48-
def test_cumprod(self):
49-
super().test_cumprod()
50-
51-
@unittest.skip(
52-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
53-
)
54-
def test_cumsum(self):
55-
super().test_cumsum()
56-
57-
@unittest.skip(
58-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
59-
)
60-
def test_diff(self):
61-
super().test_diff()
62-
63-
@unittest.skip(
64-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
65-
)
66-
def test_fillna(self):
67-
super().test_fillna()
68-
69-
@unittest.skip(
70-
"TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client."
71-
)
72-
def test_shift(self):
73-
super().test_shift()
27+
pass
7428

7529

7630
if __name__ == "__main__":

0 commit comments

Comments
 (0)