diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index b0760a955342..8488540b415d 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -258,7 +258,7 @@ jobs:
- name: Install Python packages (Python 3.8)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
run: |
- python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
+ python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
python3.8 -m pip list
# Run the tests.
- name: Run tests
@@ -684,7 +684,7 @@ jobs:
# See also https://issues.apache.org/jira/browse/SPARK-38279.
python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13'
python3.9 -m pip install ipython_genutils # See SPARK-38517
- python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
+ python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas 'plotly>=4.8'
python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
apt-get update -y
apt-get install -y ruby ruby-dev
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 8ecf931bf513..1cd7d5a8f2d7 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
opentracing-api/0.33.0//opentracing-api-0.33.0.jar
opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.9.2/shaded-protobuf/orc-core-1.9.2-shaded-protobuf.jar
-orc-mapreduce/1.9.2/shaded-protobuf/orc-mapreduce-1.9.2-shaded-protobuf.jar
-orc-shims/1.9.2//orc-shims-1.9.2.jar
+orc-core/1.9.3/shaded-protobuf/orc-core-1.9.3-shaded-protobuf.jar
+orc-mapreduce/1.9.3/shaded-protobuf/orc-mapreduce-1.9.3-shaded-protobuf.jar
+orc-shims/1.9.3//orc-shims-1.9.3.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index d3bae836cc63..d3fcd7ab3622 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -65,7 +65,7 @@ RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='ht
ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
-RUN python3.9 -m pip install numpy pyarrow 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
+RUN python3.9 -m pip install numpy 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
# Add Python deps for Spark Connect.
RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 597417aba1f3..0749af75aa4b 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -3,7 +3,7 @@ py4j
# PySpark dependencies (optional)
numpy
-pyarrow
+pyarrow<13.0.0
pandas
scipy
plotly
diff --git a/pom.xml b/pom.xml
index fb6208777d3f..18ecdffcc035 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,7 +141,7 @@
10.14.2.0
1.13.1
- 1.9.2
+ 1.9.3
shaded-protobuf
9.4.54.v20240208
4.0.3
@@ -346,6 +346,10 @@
false
+
+ staging
+ https://repository.apache.org/content/repositories/orgapacheorc-1078/
+
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 79b58deafde5..fb789a691e1c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -310,6 +310,7 @@ object SparkBuild extends PomBuild {
"gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
DefaultMavenRepository,
Resolver.mavenLocal,
+ "orc" at "https://repository.apache.org/content/repositories/orgapacheorc-1078/",
Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
),
externalResolvers := resolvers.value,
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 6822285e9617..e97632a8b384 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -157,7 +157,7 @@ Package Supported version Note
========================== ========================= ======================================================================================
`py4j` >=0.10.9.7 Required
`pandas` >=1.0.5 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
-`pyarrow` >=4.0.0 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
+`pyarrow` >=4.0.0,<13.0.0 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
`numpy` >=1.15 Required for pandas API on Spark and MLLib DataFrame-based API; Optional for Spark SQL
`grpcio` >=1.48,<1.57 Required for Spark Connect
`grpcio-status` >=1.48,<1.57 Required for Spark Connect
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
index 7cd7db4088ac..ce3e7cde01b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
@@ -174,7 +174,7 @@ private[ui] class StreamingQueryPagedTable(
override def row(query: StructuredStreamingRow): Seq[Node] = {
val streamingQuery = query.streamingUIData
- val statisticsLink = "%s/%s/statistics?id=%s"
+ val statisticsLink = "%s/%s/statistics/?id=%s"
.format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix,
streamingQuery.summary.runId)