diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3776a116fd78..e825d0d90595 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -362,7 +362,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 'yarn') run: | - python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' + python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.0' python3.11 -m pip list # Run the tests. - name: Run tests @@ -766,7 +766,7 @@ jobs: python-version: '3.11' - name: Install dependencies for Python CodeGen check run: | - python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' + python3.11 -m pip install 'black==23.12.1' 'protobuf==6.33.0' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' python3.11 -m pip list - name: Python CodeGen check for branch-3.5 if: inputs.branch == 'branch-3.5' diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index cec37af22dd7..b1ebb45b9cbc 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/client/setup.py sdist cd dist pip install pyspark*client-*.tar.gz - pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting + pip install 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.0' 'googleapis-common-protos==1.71.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting - name: List Python packages run: python -m pip list - name: Run tests (local) diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 95c9aac33fc6..7bbfc420e02a 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -175,7 +175,7 @@ jobs: - name: Install Python packages (Python 3.11) if: contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' + python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.0' python3.11 -m pip list # Run the tests using script command. # BSD's script command doesn't support -c option, and the usage is different from Linux's one. diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index e800b40106ee..2bba3dcaf176 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -63,7 +63,7 @@ jobs: pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' - name: Install Ruby for documentation generation uses: ruby/setup-ruby@v1 diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index 77e85222c29d..d55eb1d93799 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -148,7 +148,7 @@ jobs: python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ - python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ + python${{matrix.python}} -m pip install 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.0' 'googleapis-common-protos==1.71.0' 'graphviz==0.20.3' && \ python${{matrix.python}} -m pip cache purge - name: List Python packages run: python${{matrix.python}} -m pip list diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 86be7e0a8229..679998b89392 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -94,7 +94,7 @@ ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 @@ -111,7 +111,7 @@ RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PI RUN python3.10 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ -'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ +'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' RUN python3.10 -m pip list diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 1aa03735ce92..423b6ba820d0 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -97,7 +97,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.3' scipy coverage matp ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 @@ -149,7 +149,7 @@ RUN apt-get update && apt-get install -y \ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13 # TODO(SPARK-49862) Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS to Python 3.13 image when it supports Python 3.13 RUN python3.13 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this -RUN python3.13 -m pip install numpy>=2.1 pyarrow>=18.0.0 six==1.16.0 pandas==2.3.3 scipy coverage matplotlib openpyxl grpcio==1.67.0 grpcio-status==1.67.0 lxml jinja2 && \ +RUN python3.13 -m pip install numpy>=2.1 pyarrow>=18.0.0 six==1.16.0 pandas==2.3.3 scipy coverage matplotlib openpyxl grpcio==1.76.0 grpcio-status==1.76.0 lxml jinja2 && \ python3.13 -m pip cache purge # Remove unused installation packages to free up disk space diff --git a/dev/requirements.txt b/dev/requirements.txt index 76652df74481..ddaeb9b3dd9d 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -61,10 +61,10 @@ black==23.12.1 py # Spark Connect (required) -grpcio>=1.67.0 -grpcio-status>=1.67.0 -googleapis-common-protos>=1.65.0 -protobuf==5.29.5 +grpcio>=1.76.0 +grpcio-status>=1.76.0 +googleapis-common-protos>=1.71.0 +protobuf==6.33.0 # Spark Connect python proto generation plugin (optional) mypy-protobuf==3.3.0 diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile index 1c17ae122d63..e268ea7a8351 100644 --- a/dev/spark-test-image/docs/Dockerfile +++ b/dev/spark-test-image/docs/Dockerfile @@ -91,6 +91,6 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \ && python3.11 -m pip cache purge diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile index 07ff9c90b759..6686e3808e03 100644 --- a/dev/spark-test-image/lint/Dockerfile +++ b/dev/spark-test-image/lint/Dockerfile @@ -82,8 +82,8 @@ RUN python3.11 -m pip install \ 'flake8==3.9.0' \ 'googleapis-common-protos-stubs==2.2.0' \ 'grpc-stubs==1.24.11' \ - 'grpcio-status==1.67.0' \ - 'grpcio==1.67.0' \ + 'grpcio-status==1.76.0' \ + 'grpcio==1.76.0' \ 'ipython' \ 'ipython_genutils' \ 'jinja2' \ diff --git a/dev/spark-test-image/numpy-213/Dockerfile b/dev/spark-test-image/numpy-213/Dockerfile index d0409e61a51a..d33fb5f5d30e 100644 --- a/dev/spark-test-image/numpy-213/Dockerfile +++ b/dev/spark-test-image/numpy-213/Dockerfile @@ -71,7 +71,7 @@ RUN apt-get update && apt-get install -y \ # Pin numpy==2.1.3 ARG BASIC_PIP_PKGS="numpy==2.1.3 pyarrow>=21.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.11 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile index ce2ca23d18a6..46cfce36832b 100644 --- a/dev/spark-test-image/python-310/Dockerfile +++ b/dev/spark-test-image/python-310/Dockerfile @@ -66,7 +66,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 00fb7be788fd..920f21bd47ee 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -70,7 +70,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.11 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index 79cab824a5b2..db1039f5cb26 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -70,7 +70,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.12 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile b/dev/spark-test-image/python-313-nogil/Dockerfile index 031eb8772b59..a50bf670b3f3 100644 --- a/dev/spark-test-image/python-313-nogil/Dockerfile +++ b/dev/spark-test-image/python-313-nogil/Dockerfile @@ -69,7 +69,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.13 packages diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile index abd5a7e01093..f74c48bf346f 100644 --- a/dev/spark-test-image/python-313/Dockerfile +++ b/dev/spark-test-image/python-313/Dockerfile @@ -70,7 +70,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.13 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13 diff --git a/dev/spark-test-image/python-314/Dockerfile b/dev/spark-test-image/python-314/Dockerfile index 0ba9b620bd8b..7deb5e855319 100644 --- a/dev/spark-test-image/python-314/Dockerfile +++ b/dev/spark-test-image/python-314/Dockerfile @@ -70,7 +70,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.75.1 grpcio-status==1.71.2 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.75.1 grpcio-status==1.71.2 protobuf==6.33.0 googleapis-common-protos==1.71.0 graphviz==0.20.3" # Install Python 3.14 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.14 diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile index 122281ec0ea1..ebafbc69ec4d 100644 --- a/dev/spark-test-image/python-minimum/Dockerfile +++ b/dev/spark-test-image/python-minimum/Dockerfile @@ -64,7 +64,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 googleapis-common-protos==1.65.0 graphviz==0.20 protobuf" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 graphviz==0.20 protobuf" # Install Python 3.9 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile b/dev/spark-test-image/python-ps-minimum/Dockerfile index 680697c3f2d7..13a5f2db386c 100644 --- a/dev/spark-test-image/python-ps-minimum/Dockerfile +++ b/dev/spark-test-image/python-ps-minimum/Dockerfile @@ -65,7 +65,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 googleapis-common-protos==1.65.0 graphviz==0.20 protobuf" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 graphviz==0.20 protobuf" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/pom.xml b/pom.xml index 0a2401914029..e49c8c047208 100644 --- a/pom.xml +++ b/pom.xml @@ -305,9 +305,8 @@ 33.4.0-jre 1.0.2 - 1.67.1 + 1.76.0 1.1.4 - 6.0.53 4.0-10 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 1d8de063133e..253893cc225b 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -820,11 +820,13 @@ object SparkConnect { ShadeRule.rename("org.checkerframework.**" -> "org.sparkproject.connect.checkerframework.@1").inAll, ShadeRule.rename("com.google.gson.**" -> "org.sparkproject.connect.gson.@1").inAll, ShadeRule.rename("com.google.api.**" -> "org.sparkproject.connect.google_protos.api.@1").inAll, + ShadeRule.rename("com.google.apps.**" -> "org.sparkproject.connect.google_protos.apps.@1").inAll, ShadeRule.rename("com.google.cloud.**" -> "org.sparkproject.connect.google_protos.cloud.@1").inAll, ShadeRule.rename("com.google.geo.**" -> "org.sparkproject.connect.google_protos.geo.@1").inAll, ShadeRule.rename("com.google.logging.**" -> "org.sparkproject.connect.google_protos.logging.@1").inAll, ShadeRule.rename("com.google.longrunning.**" -> "org.sparkproject.connect.google_protos.longrunning.@1").inAll, ShadeRule.rename("com.google.rpc.**" -> "org.sparkproject.connect.google_protos.rpc.@1").inAll, + ShadeRule.rename("com.google.shopping.**" -> "org.sparkproject.connect.google_protos.shopping.@1").inAll, ShadeRule.rename("com.google.type.**" -> "org.sparkproject.connect.google_protos.type.@1").inAll ), @@ -911,7 +913,6 @@ object SparkConnectJdbc { ShadeRule.rename("com.google.**" -> "org.sparkproject.connect.client.com.google.@1").inAll, ShadeRule.rename("io.netty.**" -> "org.sparkproject.connect.client.io.netty.@1").inAll, ShadeRule.rename("org.checkerframework.**" -> "org.sparkproject.connect.client.org.checkerframework.@1").inAll, - ShadeRule.rename("javax.annotation.**" -> "org.sparkproject.connect.client.javax.annotation.@1").inAll, ShadeRule.rename("io.perfmark.**" -> "org.sparkproject.connect.client.io.perfmark.@1").inAll, ShadeRule.rename("org.codehaus.**" -> "org.sparkproject.connect.client.org.codehaus.@1").inAll, ShadeRule.rename("android.annotation.**" -> "org.sparkproject.connect.client.android.annotation.@1").inAll @@ -991,7 +992,6 @@ object SparkConnectClient { ShadeRule.rename("com.google.**" -> "org.sparkproject.connect.client.com.google.@1").inAll, ShadeRule.rename("io.netty.**" -> "org.sparkproject.connect.client.io.netty.@1").inAll, ShadeRule.rename("org.checkerframework.**" -> "org.sparkproject.connect.client.org.checkerframework.@1").inAll, - ShadeRule.rename("javax.annotation.**" -> "org.sparkproject.connect.client.javax.annotation.@1").inAll, ShadeRule.rename("io.perfmark.**" -> "org.sparkproject.connect.client.io.perfmark.@1").inAll, ShadeRule.rename("org.codehaus.**" -> "org.sparkproject.connect.client.org.codehaus.@1").inAll, ShadeRule.rename("android.annotation.**" -> "org.sparkproject.connect.client.android.annotation.@1").inAll diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 82db489651ff..8b3c969d756d 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -227,9 +227,9 @@ Package Supported version Note ========================== ================= ========================== `pandas` >=2.2.0 Required for Spark Connect `pyarrow` >=15.0.0 Required for Spark Connect -`grpcio` >=1.67.0 Required for Spark Connect -`grpcio-status` >=1.67.0 Required for Spark Connect -`googleapis-common-protos` >=1.65.0 Required for Spark Connect +`grpcio` >=1.76.0 Required for Spark Connect +`grpcio-status` >=1.76.0 Required for Spark Connect +`googleapis-common-protos` >=1.71.0 Required for Spark Connect `graphviz` >=0.20 Optional for Spark Connect ========================== ================= ========================== @@ -310,9 +310,9 @@ Package Supported version Note ========================== ================= =================================================== `pandas` >=2.2.0 Required for Spark Connect and Spark SQL `pyarrow` >=15.0.0 Required for Spark Connect and Spark SQL -`grpcio` >=1.67.0 Required for Spark Connect -`grpcio-status` >=1.67.0 Required for Spark Connect -`googleapis-common-protos` >=1.65.0 Required for Spark Connect +`grpcio` >=1.76.0 Required for Spark Connect +`grpcio-status` >=1.76.0 Required for Spark Connect +`googleapis-common-protos` >=1.71.0 Required for Spark Connect `pyyaml` >=3.11 Required for spark-pipelines command line interface `graphviz` >=0.20 Optional for Spark Connect ========================== ================= =================================================== diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py index eac97af2e8c8..e6ac729f20d6 100755 --- a/python/packaging/classic/setup.py +++ b/python/packaging/classic/setup.py @@ -153,8 +153,8 @@ def _supports_symlinks(): _minimum_pandas_version = "2.2.0" _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "15.0.0" -_minimum_grpc_version = "1.67.0" -_minimum_googleapis_common_protos_version = "1.65.0" +_minimum_grpc_version = "1.76.0" +_minimum_googleapis_common_protos_version = "1.71.0" _minimum_pyyaml_version = "3.11" diff --git a/python/packaging/client/setup.py b/python/packaging/client/setup.py index 7ec7e45a3160..c378d223cfcc 100755 --- a/python/packaging/client/setup.py +++ b/python/packaging/client/setup.py @@ -136,8 +136,8 @@ _minimum_pandas_version = "2.2.0" _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "15.0.0" - _minimum_grpc_version = "1.67.0" - _minimum_googleapis_common_protos_version = "1.65.0" + _minimum_grpc_version = "1.76.0" + _minimum_googleapis_common_protos_version = "1.71.0" _minimum_pyyaml_version = "3.11" with open("README.md") as f: diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py index f2b53211b3a0..3b88563bcfe7 100755 --- a/python/packaging/connect/setup.py +++ b/python/packaging/connect/setup.py @@ -89,8 +89,8 @@ _minimum_pandas_version = "2.0.0" _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "11.0.0" - _minimum_grpc_version = "1.67.0" - _minimum_googleapis_common_protos_version = "1.65.0" + _minimum_grpc_version = "1.76.0" + _minimum_googleapis_common_protos_version = "1.71.0" _minimum_pyyaml_version = "3.11" with open("README.md") as f: diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index 0fe992332de7..32bf6802df7b 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/base.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/base.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/base.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.py b/python/pyspark/sql/connect/proto/catalog_pb2.py index 58c129a01daa..054b367bd3b3 100644 --- a/python/pyspark/sql/connect/proto/catalog_pb2.py +++ b/python/pyspark/sql/connect/proto/catalog_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/catalog.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/catalog.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/catalog.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py index 694b4a9a9aa3..4eccf1b71706 100644 --- a/python/pyspark/sql/connect/proto/commands_pb2.py +++ b/python/pyspark/sql/connect/proto/commands_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/commands.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/commands.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/commands.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/common_pb2.py b/python/pyspark/sql/connect/proto/common_pb2.py index 07ea9f7ed317..8abd8fa6dc04 100644 --- a/python/pyspark/sql/connect/proto/common_pb2.py +++ b/python/pyspark/sql/connect/proto/common_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/common.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/common.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/common.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/example_plugins_pb2.py b/python/pyspark/sql/connect/proto/example_plugins_pb2.py index 71a73a6d592a..423768ee63d6 100644 --- a/python/pyspark/sql/connect/proto/example_plugins_pb2.py +++ b/python/pyspark/sql/connect/proto/example_plugins_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/example_plugins.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/example_plugins.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/example_plugins.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py index bd75ade02d8b..0c466aeb67a0 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.py +++ b/python/pyspark/sql/connect/proto/expressions_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/expressions.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/expressions.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/expressions.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/ml_common_pb2.py b/python/pyspark/sql/connect/proto/ml_common_pb2.py index a49491b8ad1e..de547fc2a102 100644 --- a/python/pyspark/sql/connect/proto/ml_common_pb2.py +++ b/python/pyspark/sql/connect/proto/ml_common_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/ml_common.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/ml_common.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/ml_common.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/ml_pb2.py b/python/pyspark/sql/connect/proto/ml_pb2.py index 9574966472a5..3bd141815c8e 100644 --- a/python/pyspark/sql/connect/proto/ml_pb2.py +++ b/python/pyspark/sql/connect/proto/ml_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/ml.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/ml.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/ml.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/pipelines_pb2.py b/python/pyspark/sql/connect/proto/pipelines_pb2.py index f3489f55ed87..d7321fa7cf0c 100644 --- a/python/pyspark/sql/connect/proto/pipelines_pb2.py +++ b/python/pyspark/sql/connect/proto/pipelines_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/pipelines.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/pipelines.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/pipelines.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py index e7f319554c5e..9e630b6ba5e4 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.py +++ b/python/pyspark/sql/connect/proto/relations_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/relations.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/relations.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/relations.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/connect/proto/types_pb2.py b/python/pyspark/sql/connect/proto/types_pb2.py index 9a52129103ad..74efca8decf8 100644 --- a/python/pyspark/sql/connect/proto/types_pb2.py +++ b/python/pyspark/sql/connect/proto/types_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: spark/connect/types.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,7 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 29, 5, "", "spark/connect/types.proto" + _runtime_version.Domain.PUBLIC, 6, 33, 0, "", "spark/connect/types.proto" ) # @@protoc_insertion_point(imports) diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py index b88fc2c5ca40..1305a6213c13 100644 --- a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py +++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py @@ -18,7 +18,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: org/apache/spark/sql/execution/streaming/StateMessage.proto -# Protobuf Python Version: 5.29.5 +# Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -28,9 +28,9 @@ _runtime_version.ValidateProtobufRuntimeVersion( _runtime_version.Domain.PUBLIC, - 5, - 29, - 5, + 6, + 33, + 0, "", "org/apache/spark/sql/execution/streaming/StateMessage.proto", ) diff --git a/sql/connect/client/jdbc/pom.xml b/sql/connect/client/jdbc/pom.xml index 02deba401317..f17cdb7f0904 100644 --- a/sql/connect/client/jdbc/pom.xml +++ b/sql/connect/client/jdbc/pom.xml @@ -181,10 +181,6 @@ org.checkerframework ${spark.shade.packageName}.org.checkerframework - - javax.annotation - ${spark.shade.packageName}.javax.annotation - io.perfmark ${spark.shade.packageName}.io.perfmark diff --git a/sql/connect/client/jvm/pom.xml b/sql/connect/client/jvm/pom.xml index dfde32894197..59b44eb97f0b 100644 --- a/sql/connect/client/jvm/pom.xml +++ b/sql/connect/client/jvm/pom.xml @@ -225,10 +225,6 @@ org.checkerframework ${spark.shade.packageName}.org.checkerframework - - javax.annotation - ${spark.shade.packageName}.javax.annotation - io.perfmark ${spark.shade.packageName}.io.perfmark diff --git a/sql/connect/common/pom.xml b/sql/connect/common/pom.xml index 3d0a23158b95..5ed69ebe58a6 100644 --- a/sql/connect/common/pom.xml +++ b/sql/connect/common/pom.xml @@ -87,11 +87,6 @@ netty-transport-native-unix-common ${netty.version} - - org.apache.tomcat - annotations-api - ${tomcat.annotations.api.version} - - org.apache.tomcat - annotations-api - ${tomcat.annotations.api.version} - provided - org.scalacheck scalacheck_${scala.binary.version} @@ -376,6 +370,10 @@ com.google.api ${spark.shade.packageName}.connect.google_protos.api + + com.google.apps + ${spark.shade.packageName}.connect.google_protos.apps + com.google.cloud ${spark.shade.packageName}.connect.google_protos.cloud @@ -396,6 +394,10 @@ com.google.rpc ${spark.shade.packageName}.connect.google_protos.rpc + + com.google.shopping + ${spark.shade.packageName}.connect.google_protos.shopping + com.google.type ${spark.shade.packageName}.connect.google_protos.type diff --git a/sql/core/src/main/buf.gen.yaml b/sql/core/src/main/buf.gen.yaml index 01a34ed30844..5f87a840c6a4 100644 --- a/sql/core/src/main/buf.gen.yaml +++ b/sql/core/src/main/buf.gen.yaml @@ -17,7 +17,7 @@ version: v1 plugins: # Building the Python build and building the mypy interfaces. - - plugin: buf.build/protocolbuffers/python:v29.5 + - plugin: buf.build/protocolbuffers/python:v33.0 out: gen/proto/python - name: mypy out: gen/proto/python