diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b0760a955342..8488540b415d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -258,7 +258,7 @@ jobs: - name: Install Python packages (Python 3.8) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | - python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3' + python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3' python3.8 -m pip list # Run the tests. - name: Run tests @@ -684,7 +684,7 @@ jobs: # See also https://issues.apache.org/jira/browse/SPARK-38279. python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13' python3.9 -m pip install ipython_genutils # See SPARK-38517 - python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas 'plotly>=4.8' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 apt-get update -y apt-get install -y ruby ruby-dev diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 8ecf931bf513..1cd7d5a8f2d7 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.9.2/shaded-protobuf/orc-core-1.9.2-shaded-protobuf.jar -orc-mapreduce/1.9.2/shaded-protobuf/orc-mapreduce-1.9.2-shaded-protobuf.jar -orc-shims/1.9.2//orc-shims-1.9.2.jar +orc-core/1.9.3/shaded-protobuf/orc-core-1.9.3-shaded-protobuf.jar +orc-mapreduce/1.9.3/shaded-protobuf/orc-mapreduce-1.9.3-shaded-protobuf.jar +orc-shims/1.9.3//orc-shims-1.9.3.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index d3bae836cc63..d3fcd7ab3622 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -65,7 +65,7 @@ RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='ht ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library" RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib -RUN python3.9 -m pip install numpy pyarrow 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' +RUN python3.9 -m pip install numpy 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' # Add Python deps for Spark Connect. RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4' diff --git a/dev/requirements.txt b/dev/requirements.txt index 597417aba1f3..0749af75aa4b 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -3,7 +3,7 @@ py4j # PySpark dependencies (optional) numpy -pyarrow +pyarrow<13.0.0 pandas scipy plotly diff --git a/pom.xml b/pom.xml index fb6208777d3f..18ecdffcc035 100644 --- a/pom.xml +++ b/pom.xml @@ -141,7 +141,7 @@ 10.14.2.0 1.13.1 - 1.9.2 + 1.9.3 shaded-protobuf 9.4.54.v20240208 4.0.3 @@ -346,6 +346,10 @@ false + + staging + https://repository.apache.org/content/repositories/orgapacheorc-1078/ + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 79b58deafde5..fb789a691e1c 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -310,6 +310,7 @@ object SparkBuild extends PomBuild { "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/", DefaultMavenRepository, Resolver.mavenLocal, + "orc" at "https://repository.apache.org/content/repositories/orgapacheorc-1078/", Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns) ), externalResolvers := resolvers.value, diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 6822285e9617..e97632a8b384 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -157,7 +157,7 @@ Package Supported version Note ========================== ========================= ====================================================================================== `py4j` >=0.10.9.7 Required `pandas` >=1.0.5 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL -`pyarrow` >=4.0.0 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL +`pyarrow` >=4.0.0,<13.0.0 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL `numpy` >=1.15 Required for pandas API on Spark and MLLib DataFrame-based API; Optional for Spark SQL `grpcio` >=1.48,<1.57 Required for Spark Connect `grpcio-status` >=1.48,<1.57 Required for Spark Connect diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala index 7cd7db4088ac..ce3e7cde01b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala @@ -174,7 +174,7 @@ private[ui] class StreamingQueryPagedTable( override def row(query: StructuredStreamingRow): Seq[Node] = { val streamingQuery = query.streamingUIData - val statisticsLink = "%s/%s/statistics?id=%s" + val statisticsLink = "%s/%s/statistics/?id=%s" .format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix, streamingQuery.summary.runId)