apache · itholic · Sep 21, 2022 · Sep 27, 2022 · Oct 11, 2022 · Oct 19, 2022
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
@@ -32,7 +32,7 @@ RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget
 RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
 
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
-RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl
+RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.0' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl
 
 RUN add-apt-repository ppa:pypy/ppa
 RUN apt update
@@ -45,7 +45,7 @@ RUN mkdir -p /usr/local/pypy/pypy3.7 && \
     ln -sf /usr/local/pypy/pypy3.7/bin/pypy /usr/local/bin/pypy3
 
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'pandas<=1.4.4' scipy coverage matplotlib
+RUN pypy3 -m pip install numpy 'pandas<=1.5.0' scipy coverage matplotlib
 
 RUN $APT_INSTALL gnupg ca-certificates pandoc
 RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list

diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
@@ -866,8 +866,8 @@ def isin(self: IndexOpsLike, values: Sequence[Any]) -> IndexOpsLike:
         5    False
         Name: animal, dtype: bool
 
-        >>> s.rename("a").to_frame().set_index("a").index.isin(['lama'])
-        Index([True, False, True, False, True, False], dtype='object', name='a')
+        >>> s.rename("a").to_frame().set_index("a").index.isin(['lama'])  # doctest: +SKIP
+        Index([True, False, True, False, True, False], dtype='bool', name='a')
         """
         if not is_list_like(values):
             raise TypeError(
@@ -910,8 +910,8 @@ def isnull(self: IndexOpsLike) -> IndexOpsLike:
         2     True
         dtype: bool
 
-        >>> ser.rename("a").to_frame().set_index("a").index.isna()
-        Index([False, False, True], dtype='object', name='a')
+        >>> ser.rename("a").to_frame().set_index("a").index.isna()  # doctest: +SKIP
+        Index([False, False, True], dtype='bool', name='a')
         """
         from pyspark.pandas.indexes import MultiIndex
 
@@ -953,8 +953,8 @@ def notnull(self: IndexOpsLike) -> IndexOpsLike:
         2    False
         dtype: bool
 
-        >>> ser.rename("a").to_frame().set_index("a").index.notna()
-        Index([True, True, False], dtype='object', name='a')
+        >>> ser.rename("a").to_frame().set_index("a").index.notna()  # doctest: +SKIP
+        Index([True, True, False], dtype='bool', name='a')
         """
         from pyspark.pandas.indexes import MultiIndex
 

diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py
@@ -284,8 +284,8 @@ def is_month_start(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2018-02-27", periods=3)
-        >>> idx.is_month_start
-        Index([False, False, True], dtype='object')
+        >>> idx.is_month_start  # doctest: +SKIP
+        Index([False, False, True], dtype='bool')
         """
         return Index(self.to_series().dt.is_month_start)
 
@@ -307,8 +307,8 @@ def is_month_end(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2018-02-27", periods=3)
-        >>> idx.is_month_end
-        Index([False, True, False], dtype='object')
+        >>> idx.is_month_end  # doctest: +SKIP
+        Index([False, True, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_month_end)
 
@@ -330,8 +330,8 @@ def is_quarter_start(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range('2017-03-30', periods=4)
-        >>> idx.is_quarter_start
-        Index([False, False, True, False], dtype='object')
+        >>> idx.is_quarter_start  # doctest: +SKIP
+        Index([False, False, True, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_quarter_start)
 
@@ -353,8 +353,8 @@ def is_quarter_end(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range('2017-03-30', periods=4)
-        >>> idx.is_quarter_end
-        Index([False, True, False, False], dtype='object')
+        >>> idx.is_quarter_end  # doctest: +SKIP
+        Index([False, True, False, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_quarter_end)
 
@@ -375,8 +375,8 @@ def is_year_start(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2017-12-30", periods=3)
-        >>> idx.is_year_start
-        Index([False, False, True], dtype='object')
+        >>> idx.is_year_start  # doctest: +SKIP
+        Index([False, False, True], dtype='bool')
         """
         return Index(self.to_series().dt.is_year_start)
 
@@ -397,8 +397,8 @@ def is_year_end(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2017-12-30", periods=3)
-        >>> idx.is_year_end
-        Index([False, True, False], dtype='object')
+        >>> idx.is_year_end  # doctest: +SKIP
+        Index([False, True, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_year_end)
 
@@ -420,8 +420,8 @@ def is_leap_year(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2012-01-01", "2015-01-01", freq="Y")
-        >>> idx.is_leap_year
-        Index([True, False, False], dtype='object')
+        >>> idx.is_leap_year  # doctest: +SKIP
+        Index([True, False, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_leap_year)
 

diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py
@@ -2315,8 +2315,8 @@ def zfill(self, width: int) -> "ps.Series":
         added to the left of it (:func:`str.zfill` would have moved it to the
         left). 1000 remains unchanged as it is longer than width.
 
-        >>> s.str.zfill(3)
-        0     0-1
+        >>> s.str.zfill(3)  # doctest: +SKIP
+        0     -01
         1     001
         2    1000
         3    None

diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
@@ -98,7 +98,7 @@ def generate_supported_api(output_rst_file_path: str) -> None:
 
     Write supported APIs documentation.
     """
-    pandas_latest_version = "1.4.4"
+    pandas_latest_version = "1.5.0"
     if LooseVersion(pd.__version__) != LooseVersion(pandas_latest_version):
         msg = (
             "Warning: Latest version of pandas (%s) is required to generate the documentation; "