Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ jobs:
- name: Install Python packages (Python 3.8)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
run: |
python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
python3.8 -m pip list
# Run the tests.
- name: Run tests
Expand Down Expand Up @@ -684,7 +684,7 @@ jobs:
# See also https://issues.apache.org/jira/browse/SPARK-38279.
python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13'
python3.9 -m pip install ipython_genutils # See SPARK-38517
python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas 'plotly>=4.8'
python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
apt-get update -y
apt-get install -y ruby ruby-dev
Expand Down
6 changes: 3 additions & 3 deletions dev/deps/spark-deps-hadoop-3-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
opentracing-api/0.33.0//opentracing-api-0.33.0.jar
opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
opentracing-util/0.33.0//opentracing-util-0.33.0.jar
orc-core/1.9.2/shaded-protobuf/orc-core-1.9.2-shaded-protobuf.jar
orc-mapreduce/1.9.2/shaded-protobuf/orc-mapreduce-1.9.2-shaded-protobuf.jar
orc-shims/1.9.2//orc-shims-1.9.2.jar
orc-core/1.9.3/shaded-protobuf/orc-core-1.9.3-shaded-protobuf.jar
orc-mapreduce/1.9.3/shaded-protobuf/orc-mapreduce-1.9.3-shaded-protobuf.jar
orc-shims/1.9.3//orc-shims-1.9.3.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
Expand Down
2 changes: 1 addition & 1 deletion dev/infra/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='ht
ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"

RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
RUN python3.9 -m pip install numpy pyarrow 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
RUN python3.9 -m pip install numpy 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'

# Add Python deps for Spark Connect.
RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'
Expand Down
2 changes: 1 addition & 1 deletion dev/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ py4j

# PySpark dependencies (optional)
numpy
pyarrow
pyarrow<13.0.0
pandas
scipy
plotly
Expand Down
6 changes: 5 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
<!-- After 10.15.1.3, the minimum required version is JDK9 -->
<derby.version>10.14.2.0</derby.version>
<parquet.version>1.13.1</parquet.version>
<orc.version>1.9.2</orc.version>
<orc.version>1.9.3</orc.version>
<orc.classifier>shaded-protobuf</orc.classifier>
<jetty.version>9.4.54.v20240208</jetty.version>
<jakartaservlet.version>4.0.3</jakartaservlet.version>
Expand Down Expand Up @@ -346,6 +346,10 @@
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>staging</id>
<url>https://repository.apache.org/content/repositories/orgapacheorc-1078/</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
Expand Down
1 change: 1 addition & 0 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ object SparkBuild extends PomBuild {
"gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
DefaultMavenRepository,
Resolver.mavenLocal,
"orc" at "https://repository.apache.org/content/repositories/orgapacheorc-1078/",
Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
),
externalResolvers := resolvers.value,
Expand Down
2 changes: 1 addition & 1 deletion python/docs/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ Package Supported version Note
========================== ========================= ======================================================================================
`py4j` >=0.10.9.7 Required
`pandas` >=1.0.5 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
`pyarrow` >=4.0.0 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
`pyarrow` >=4.0.0,<13.0.0 Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
`numpy` >=1.15 Required for pandas API on Spark and MLLib DataFrame-based API; Optional for Spark SQL
`grpcio` >=1.48,<1.57 Required for Spark Connect
`grpcio-status` >=1.48,<1.57 Required for Spark Connect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ private[ui] class StreamingQueryPagedTable(

override def row(query: StructuredStreamingRow): Seq[Node] = {
val streamingQuery = query.streamingUIData
val statisticsLink = "%s/%s/statistics?id=%s"
val statisticsLink = "%s/%s/statistics/?id=%s"
.format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix,
streamingQuery.summary.runId)

Expand Down