Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dev/infra/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget
RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java

RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl
RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.0' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl

RUN add-apt-repository ppa:pypy/ppa
RUN apt update
Expand All @@ -45,7 +45,7 @@ RUN mkdir -p /usr/local/pypy/pypy3.7 && \
ln -sf /usr/local/pypy/pypy3.7/bin/pypy /usr/local/bin/pypy3

RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
RUN pypy3 -m pip install numpy 'pandas<=1.4.4' scipy coverage matplotlib
RUN pypy3 -m pip install numpy 'pandas<=1.5.0' scipy coverage matplotlib

RUN $APT_INSTALL gnupg ca-certificates pandoc
RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list
Expand Down
12 changes: 6 additions & 6 deletions python/pyspark/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,8 +866,8 @@ def isin(self: IndexOpsLike, values: Sequence[Any]) -> IndexOpsLike:
5 False
Name: animal, dtype: bool

>>> s.rename("a").to_frame().set_index("a").index.isin(['lama'])
Index([True, False, True, False, True, False], dtype='object', name='a')
>>> s.rename("a").to_frame().set_index("a").index.isin(['lama']) # doctest: +SKIP
Index([True, False, True, False, True, False], dtype='bool', name='a')
Copy link
Contributor Author

@itholic itholic Oct 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: I included the fix for SPARK-40896 here and below since it's sort of minor fix, and I believe it's the last one.

"""
if not is_list_like(values):
raise TypeError(
Expand Down Expand Up @@ -910,8 +910,8 @@ def isnull(self: IndexOpsLike) -> IndexOpsLike:
2 True
dtype: bool

>>> ser.rename("a").to_frame().set_index("a").index.isna()
Index([False, False, True], dtype='object', name='a')
>>> ser.rename("a").to_frame().set_index("a").index.isna() # doctest: +SKIP
Index([False, False, True], dtype='bool', name='a')
"""
from pyspark.pandas.indexes import MultiIndex

Expand Down Expand Up @@ -953,8 +953,8 @@ def notnull(self: IndexOpsLike) -> IndexOpsLike:
2 False
dtype: bool

>>> ser.rename("a").to_frame().set_index("a").index.notna()
Index([True, True, False], dtype='object', name='a')
>>> ser.rename("a").to_frame().set_index("a").index.notna() # doctest: +SKIP
Index([True, True, False], dtype='bool', name='a')
"""
from pyspark.pandas.indexes import MultiIndex

Expand Down
28 changes: 14 additions & 14 deletions python/pyspark/pandas/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ def is_month_start(self) -> Index:
Examples
--------
>>> idx = ps.date_range("2018-02-27", periods=3)
>>> idx.is_month_start
Index([False, False, True], dtype='object')
>>> idx.is_month_start # doctest: +SKIP
Index([False, False, True], dtype='bool')
"""
return Index(self.to_series().dt.is_month_start)

Expand All @@ -307,8 +307,8 @@ def is_month_end(self) -> Index:
Examples
--------
>>> idx = ps.date_range("2018-02-27", periods=3)
>>> idx.is_month_end
Index([False, True, False], dtype='object')
>>> idx.is_month_end # doctest: +SKIP
Index([False, True, False], dtype='bool')
"""
return Index(self.to_series().dt.is_month_end)

Expand All @@ -330,8 +330,8 @@ def is_quarter_start(self) -> Index:
Examples
--------
>>> idx = ps.date_range('2017-03-30', periods=4)
>>> idx.is_quarter_start
Index([False, False, True, False], dtype='object')
>>> idx.is_quarter_start # doctest: +SKIP
Index([False, False, True, False], dtype='bool')
"""
return Index(self.to_series().dt.is_quarter_start)

Expand All @@ -353,8 +353,8 @@ def is_quarter_end(self) -> Index:
Examples
--------
>>> idx = ps.date_range('2017-03-30', periods=4)
>>> idx.is_quarter_end
Index([False, True, False, False], dtype='object')
>>> idx.is_quarter_end # doctest: +SKIP
Index([False, True, False, False], dtype='bool')
"""
return Index(self.to_series().dt.is_quarter_end)

Expand All @@ -375,8 +375,8 @@ def is_year_start(self) -> Index:
Examples
--------
>>> idx = ps.date_range("2017-12-30", periods=3)
>>> idx.is_year_start
Index([False, False, True], dtype='object')
>>> idx.is_year_start # doctest: +SKIP
Index([False, False, True], dtype='bool')
"""
return Index(self.to_series().dt.is_year_start)

Expand All @@ -397,8 +397,8 @@ def is_year_end(self) -> Index:
Examples
--------
>>> idx = ps.date_range("2017-12-30", periods=3)
>>> idx.is_year_end
Index([False, True, False], dtype='object')
>>> idx.is_year_end # doctest: +SKIP
Index([False, True, False], dtype='bool')
"""
return Index(self.to_series().dt.is_year_end)

Expand All @@ -420,8 +420,8 @@ def is_leap_year(self) -> Index:
Examples
--------
>>> idx = ps.date_range("2012-01-01", "2015-01-01", freq="Y")
>>> idx.is_leap_year
Index([True, False, False], dtype='object')
>>> idx.is_leap_year # doctest: +SKIP
Index([True, False, False], dtype='bool')
"""
return Index(self.to_series().dt.is_leap_year)

Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/pandas/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2315,8 +2315,8 @@ def zfill(self, width: int) -> "ps.Series":
added to the left of it (:func:`str.zfill` would have moved it to the
left). 1000 remains unchanged as it is longer than width.

>>> s.str.zfill(3)
0 0-1
>>> s.str.zfill(3) # doctest: +SKIP
0 -01
1 001
2 1000
3 None
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/supported_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def generate_supported_api(output_rst_file_path: str) -> None:

Write supported APIs documentation.
"""
pandas_latest_version = "1.4.4"
pandas_latest_version = "1.5.0"
if LooseVersion(pd.__version__) != LooseVersion(pandas_latest_version):
msg = (
"Warning: Latest version of pandas (%s) is required to generate the documentation; "
Expand Down