diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 2a70bd3f98f4..24bad4db4080 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -32,7 +32,7 @@ RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 -RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl +RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.0' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl RUN add-apt-repository ppa:pypy/ppa RUN apt update @@ -45,7 +45,7 @@ RUN mkdir -p /usr/local/pypy/pypy3.7 && \ ln -sf /usr/local/pypy/pypy3.7/bin/pypy /usr/local/bin/pypy3 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 -RUN pypy3 -m pip install numpy 'pandas<=1.4.4' scipy coverage matplotlib +RUN pypy3 -m pip install numpy 'pandas<=1.5.0' scipy coverage matplotlib RUN $APT_INSTALL gnupg ca-certificates pandoc RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py index 755e111bff8f..350674e5e52d 100644 --- a/python/pyspark/pandas/base.py +++ b/python/pyspark/pandas/base.py @@ -866,8 +866,8 @@ def isin(self: IndexOpsLike, values: Sequence[Any]) -> IndexOpsLike: 5 False Name: animal, dtype: bool - >>> s.rename("a").to_frame().set_index("a").index.isin(['lama']) - Index([True, False, True, False, True, False], dtype='object', name='a') + >>> s.rename("a").to_frame().set_index("a").index.isin(['lama']) # doctest: +SKIP + Index([True, False, True, False, True, False], dtype='bool', name='a') """ if not is_list_like(values): raise TypeError( @@ -910,8 +910,8 @@ def isnull(self: IndexOpsLike) -> IndexOpsLike: 2 True dtype: bool - >>> ser.rename("a").to_frame().set_index("a").index.isna() - Index([False, False, True], dtype='object', name='a') + >>> ser.rename("a").to_frame().set_index("a").index.isna() # doctest: +SKIP + Index([False, False, True], dtype='bool', name='a') """ from pyspark.pandas.indexes import MultiIndex @@ -953,8 +953,8 @@ def notnull(self: IndexOpsLike) -> IndexOpsLike: 2 False dtype: bool - >>> ser.rename("a").to_frame().set_index("a").index.notna() - Index([True, True, False], dtype='object', name='a') + >>> ser.rename("a").to_frame().set_index("a").index.notna() # doctest: +SKIP + Index([True, True, False], dtype='bool', name='a') """ from pyspark.pandas.indexes import MultiIndex diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py index b4a7c1e8356a..3343014c6f8b 100644 --- a/python/pyspark/pandas/indexes/datetimes.py +++ b/python/pyspark/pandas/indexes/datetimes.py @@ -284,8 +284,8 @@ def is_month_start(self) -> Index: Examples -------- >>> idx = ps.date_range("2018-02-27", periods=3) - >>> idx.is_month_start - Index([False, False, True], dtype='object') + >>> idx.is_month_start # doctest: +SKIP + Index([False, False, True], dtype='bool') """ return Index(self.to_series().dt.is_month_start) @@ -307,8 +307,8 @@ def is_month_end(self) -> Index: Examples -------- >>> idx = ps.date_range("2018-02-27", periods=3) - >>> idx.is_month_end - Index([False, True, False], dtype='object') + >>> idx.is_month_end # doctest: +SKIP + Index([False, True, False], dtype='bool') """ return Index(self.to_series().dt.is_month_end) @@ -330,8 +330,8 @@ def is_quarter_start(self) -> Index: Examples -------- >>> idx = ps.date_range('2017-03-30', periods=4) - >>> idx.is_quarter_start - Index([False, False, True, False], dtype='object') + >>> idx.is_quarter_start # doctest: +SKIP + Index([False, False, True, False], dtype='bool') """ return Index(self.to_series().dt.is_quarter_start) @@ -353,8 +353,8 @@ def is_quarter_end(self) -> Index: Examples -------- >>> idx = ps.date_range('2017-03-30', periods=4) - >>> idx.is_quarter_end - Index([False, True, False, False], dtype='object') + >>> idx.is_quarter_end # doctest: +SKIP + Index([False, True, False, False], dtype='bool') """ return Index(self.to_series().dt.is_quarter_end) @@ -375,8 +375,8 @@ def is_year_start(self) -> Index: Examples -------- >>> idx = ps.date_range("2017-12-30", periods=3) - >>> idx.is_year_start - Index([False, False, True], dtype='object') + >>> idx.is_year_start # doctest: +SKIP + Index([False, False, True], dtype='bool') """ return Index(self.to_series().dt.is_year_start) @@ -397,8 +397,8 @@ def is_year_end(self) -> Index: Examples -------- >>> idx = ps.date_range("2017-12-30", periods=3) - >>> idx.is_year_end - Index([False, True, False], dtype='object') + >>> idx.is_year_end # doctest: +SKIP + Index([False, True, False], dtype='bool') """ return Index(self.to_series().dt.is_year_end) @@ -420,8 +420,8 @@ def is_leap_year(self) -> Index: Examples -------- >>> idx = ps.date_range("2012-01-01", "2015-01-01", freq="Y") - >>> idx.is_leap_year - Index([True, False, False], dtype='object') + >>> idx.is_leap_year # doctest: +SKIP + Index([True, False, False], dtype='bool') """ return Index(self.to_series().dt.is_leap_year) diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py index 774fd6c7ca0b..7d541cbcb6ea 100644 --- a/python/pyspark/pandas/strings.py +++ b/python/pyspark/pandas/strings.py @@ -2315,8 +2315,8 @@ def zfill(self, width: int) -> "ps.Series": added to the left of it (:func:`str.zfill` would have moved it to the left). 1000 remains unchanged as it is longer than width. - >>> s.str.zfill(3) - 0 0-1 + >>> s.str.zfill(3) # doctest: +SKIP + 0 -01 1 001 2 1000 3 None diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py index de072b549277..2af35923afb2 100644 --- a/python/pyspark/pandas/supported_api_gen.py +++ b/python/pyspark/pandas/supported_api_gen.py @@ -98,7 +98,7 @@ def generate_supported_api(output_rst_file_path: str) -> None: Write supported APIs documentation. """ - pandas_latest_version = "1.4.4" + pandas_latest_version = "1.5.0" if LooseVersion(pd.__version__) != LooseVersion(pandas_latest_version): msg = ( "Warning: Latest version of pandas (%s) is required to generate the documentation; "