diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index 8e36227c0837..2249dd230265 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -17,7 +17,7 @@ # under the License. # -name: "Build / Python-only (master, PyPy 3.8/Python 3.10/Python 3.11/Python 3.12)" +name: "Build / Python-only (master, PyPy 3.9/Python 3.10/Python 3.11/Python 3.12)" on: schedule: diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 22d505cca22e..870fb694045c 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -81,10 +81,10 @@ ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library RUN add-apt-repository ppa:pypy/ppa -RUN mkdir -p /usr/local/pypy/pypy3.8 && \ - curl -sqL https://downloads.python.org/pypy/pypy3.8-v7.3.11-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.8 --strip-components=1 && \ - ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3.8 && \ - ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3 +RUN mkdir -p /usr/local/pypy/pypy3.9 && \ + curl -sqL https://downloads.python.org/pypy/pypy3.9-v7.3.16-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.9 --strip-components=1 && \ + ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3.8 && \ + ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.0.3' scipy coverage matplotlib lxml diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst index 94e485c706e3..ae86cfdcaf78 100644 --- a/python/docs/source/development/contributing.rst +++ b/python/docs/source/development/contributing.rst @@ -129,7 +129,7 @@ If you are using Conda, the development environment can be set as follows. .. code-block:: bash - # Python 3.8+ is required + # Python 3.9+ is required conda create --name pyspark-dev-env python=3.9 conda activate pyspark-dev-env pip install --upgrade -r dev/requirements.txt @@ -145,7 +145,7 @@ Now, you can start developing and `running the tests `_. pip ~~~ -With Python 3.8+, pip can be used as below to install and set up the development environment. +With Python 3.9+, pip can be used as below to install and set up the development environment. .. code-block:: bash diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index ee894981387a..f7476b9110aa 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -30,7 +30,7 @@ and building from the source. Python Versions Supported ------------------------- -Python 3.8 and above. +Python 3.9 and above. Using PyPI @@ -124,7 +124,7 @@ the same session as pyspark (you can install in several steps too). .. code-block:: bash - conda install -c conda-forge pyspark # can also add "python=3.8 some_package [etc.]" here + conda install -c conda-forge pyspark # can also add "python=3.9 some_package [etc.]" here Note that `PySpark for conda `_ is maintained separately by the community; while new versions generally get packaged quickly, the diff --git a/python/docs/source/user_guide/pandas_on_spark/typehints.rst b/python/docs/source/user_guide/pandas_on_spark/typehints.rst index 1405baa39c16..23126664d78a 100644 --- a/python/docs/source/user_guide/pandas_on_spark/typehints.rst +++ b/python/docs/source/user_guide/pandas_on_spark/typehints.rst @@ -62,7 +62,7 @@ it as a Spark schema. As an example, you can specify the return type hint as bel Notice that the function ``pandas_div`` actually takes and outputs a pandas DataFrame instead of pandas-on-Spark :class:`DataFrame`. So, technically the correct types should be of pandas. -With Python 3.8+, you can specify the type hints by using pandas instances as follows: +With Python 3.9+, you can specify the type hints by using pandas instances as follows: .. code-block:: python diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py index f6d7cf08e5c9..8478a60d633b 100755 --- a/python/packaging/classic/setup.py +++ b/python/packaging/classic/setup.py @@ -359,11 +359,10 @@ def run(self): "numpy>=%s" % _minimum_numpy_version, ], }, - python_requires=">=3.8", + python_requires=">=3.9", classifiers=[ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py index 0a7c82d66a07..1ded93fe5693 100755 --- a/python/packaging/connect/setup.py +++ b/python/packaging/connect/setup.py @@ -191,11 +191,10 @@ "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version, "numpy>=%s" % _minimum_numpy_version, ], - python_requires=">=3.8", + python_requires=">=3.9", classifiers=[ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 1a01982e756d..2e1f3f401df3 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -142,6 +142,9 @@ def toDF(self, schema=None, sampleRatio=None): # # @classmethod + @property is also affected by a bug in Python's docstring which was backported # to Python 3.9.6 (https://github.com/python/cpython/pull/28838) +# +# Python 3.9 with MyPy complains about @classmethod + @property combination. We should fix +# it together with MyPy. class classproperty(property): """Same as Python's @property decorator, but for class attributes. diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/test_parity_arrow.py index 93d0b6cf0f5f..6e41e106422c 100644 --- a/python/pyspark/sql/tests/connect/test_parity_arrow.py +++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py @@ -16,7 +16,6 @@ # import unittest -import sys from pyspark.sql.tests.test_arrow import ArrowTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase @@ -121,7 +120,6 @@ def test_createDataFrame_nested_timestamp(self): def test_toPandas_nested_timestamp(self): self.check_toPandas_nested_timestamp(True) - @unittest.skipIf(sys.version_info < (3, 9), "zoneinfo is available from Python 3.9+") def test_toPandas_timestmap_tzinfo(self): self.check_toPandas_timestmap_tzinfo(True) diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py index bfb874ffe534..7a0fccc22572 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import sys import unittest from inspect import signature from typing import Union, Iterator, Tuple, cast, get_type_hints @@ -114,7 +113,6 @@ def func(iter: Iterator[Tuple[Union[pd.DataFrame, pd.Series], ...]]) -> Iterator infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER ) - @unittest.skipIf(sys.version_info < (3, 9), "Type hinting generics require Python 3.9.") def test_type_annotation_tuple_generics(self): def func(iter: Iterator[tuple[pd.DataFrame, pd.Series]]) -> Iterator[pd.DataFrame]: pass diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py index 9b6751564c40..442e1c61a0ba 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py @@ -16,7 +16,6 @@ # from __future__ import annotations -import sys import unittest from inspect import signature from typing import Union, Iterator, Tuple, cast, get_type_hints @@ -308,10 +307,6 @@ def pandas_plus_one(iter: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]: expected = df.selectExpr("id + 1 as id") assert_frame_equal(expected.toPandas(), actual.toPandas()) - @unittest.skipIf( - sys.version_info < (3, 9), - "string annotations with future annotations do not work under Python<3.9", - ) def test_string_type_annotation(self): def func(col: "pd.Series") -> "pd.Series": pass diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 5235e021bae9..1b775ad39cdb 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -23,7 +23,6 @@ import unittest from typing import cast from collections import namedtuple -import sys from pyspark import SparkConf from pyspark.sql import Row, SparkSession @@ -997,7 +996,6 @@ def check_createDataFrame_nested_timestamp(self, arrow_enabled): self.assertEqual(df.first(), expected) - @unittest.skipIf(sys.version_info < (3, 9), "zoneinfo is available from Python 3.9+") def test_toPandas_timestmap_tzinfo(self): for arrow_enabled in [True, False]: with self.subTest(arrow_enabled=arrow_enabled): diff --git a/python/run-tests b/python/run-tests index 401fcae3e350..b5492a080d6a 100755 --- a/python/run-tests +++ b/python/run-tests @@ -21,9 +21,9 @@ FWDIR="$(cd "`dirname $0`"/..; pwd)" cd "$FWDIR" -PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 8, 0))') +PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 9, 0))') if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then - echo "Python versions prior to 3.8 are not supported." + echo "Python versions prior to 3.9 are not supported." exit -1 fi