Skip to content

Commit 1ff70db

Browse files
committed
Merge branch 'story/state-v2-base' into story/state-v2-changelog
2 parents 5ad08dd + 3a93b92 commit 1ff70db

File tree

1,025 files changed

+23444
-8173
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,025 files changed

+23444
-8173
lines changed

.github/workflows/benchmark.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ jobs:
105105
run: cd tpcds-kit/tools && make OS=LINUX
106106
- name: Install Java ${{ github.event.inputs.jdk }}
107107
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
108-
uses: actions/setup-java@v3
108+
uses: actions/setup-java@v4
109109
with:
110110
distribution: zulu
111111
java-version: ${{ github.event.inputs.jdk }}
@@ -157,7 +157,7 @@ jobs:
157157
restore-keys: |
158158
benchmark-coursier-${{ github.event.inputs.jdk }}
159159
- name: Install Java ${{ github.event.inputs.jdk }}
160-
uses: actions/setup-java@v3
160+
uses: actions/setup-java@v4
161161
with:
162162
distribution: zulu
163163
java-version: ${{ github.event.inputs.jdk }}
@@ -177,7 +177,7 @@ jobs:
177177
# In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
178178
bin/spark-submit \
179179
--driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
180-
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
180+
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`,`find ~/.cache/coursier -name 'curator-test-*.jar'`" \
181181
"`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
182182
"${{ github.event.inputs.class }}"
183183
# To keep the directory structure and file permissions, tar them

.github/workflows/build_and_test.yml

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ jobs:
9898
\"java-other-versions\": \"true\",
9999
\"lint\" : \"true\",
100100
\"k8s-integration-tests\" : \"true\",
101-
\"breaking-changes-buf\" : \"true\",
101+
\"buf\" : \"true\",
102+
\"ui\" : \"true\",
102103
}"
103104
echo $precondition # For debugging
104105
# Remove `\n` to avoid "Invalid format" error
@@ -149,7 +150,7 @@ jobs:
149150
- >-
150151
mllib-local, mllib, graphx
151152
- >-
152-
streaming, sql-kafka-0-10, streaming-kafka-0-10,
153+
streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl,
153154
yarn, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
154155
connect, protobuf
155156
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
@@ -240,12 +241,12 @@ jobs:
240241
./dev/free_disk_space
241242
fi
242243
- name: Install Java ${{ matrix.java }}
243-
uses: actions/setup-java@v3
244+
uses: actions/setup-java@v4
244245
with:
245246
distribution: zulu
246247
java-version: ${{ matrix.java }}
247248
- name: Install Python 3.9
248-
uses: actions/setup-python@v4
249+
uses: actions/setup-python@v5
249250
# We should install one Python that is higher than 3+ for SQL and Yarn because:
250251
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
251252
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
@@ -267,6 +268,10 @@ jobs:
267268
export TERM=vt100
268269
# Hive "other tests" test needs larger metaspace size based on experiment.
269270
if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
271+
// SPARK-46283: should delete the following env replacement after SPARK 3.x EOL
272+
if [[ "$MODULES_TO_TEST" == *"streaming-kinesis-asl"* ]] && [[ "${{ inputs.branch }}" =~ ^branch-3 ]]; then
273+
MODULES_TO_TEST=${MODULES_TO_TEST//streaming-kinesis-asl, /}
274+
fi
270275
export SERIAL_SBT_TESTS=1
271276
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
272277
- name: Upload test results to report
@@ -427,7 +432,7 @@ jobs:
427432
./dev/free_disk_space_container
428433
fi
429434
- name: Install Java ${{ matrix.java }}
430-
uses: actions/setup-java@v3
435+
uses: actions/setup-java@v4
431436
with:
432437
distribution: zulu
433438
java-version: ${{ matrix.java }}
@@ -537,7 +542,7 @@ jobs:
537542
./dev/free_disk_space_container
538543
fi
539544
- name: Install Java ${{ inputs.java }}
540-
uses: actions/setup-java@v3
545+
uses: actions/setup-java@v4
541546
with:
542547
distribution: zulu
543548
java-version: ${{ inputs.java }}
@@ -556,11 +561,10 @@ jobs:
556561
name: test-results-sparkr--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3
557562
path: "**/target/test-reports/*.xml"
558563

559-
breaking-changes-buf:
564+
buf:
560565
needs: [precondition]
561-
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).breaking-changes-buf == 'true'
562-
# Change 'branch-3.5' to 'branch-4.0' in master branch after cutting branch-4.0 branch.
563-
name: Breaking change detection with Buf (branch-3.5)
566+
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).buf == 'true'
567+
name: Protobuf breaking change detection and Python CodeGen check
564568
runs-on: ubuntu-22.04
565569
steps:
566570
- name: Checkout Spark repository
@@ -579,12 +583,26 @@ jobs:
579583
uses: bufbuild/buf-setup-action@v1
580584
with:
581585
github_token: ${{ secrets.GITHUB_TOKEN }}
582-
- name: Detect breaking changes
586+
- name: Protocol Buffers Linter
587+
uses: bufbuild/buf-lint-action@v1
588+
with:
589+
input: core/src/main/protobuf
590+
# Change 'branch-3.5' to 'branch-4.0' in master branch after cutting branch-4.0 branch.
591+
- name: Breaking change detection against branch-3.5
583592
uses: bufbuild/buf-breaking-action@v1
584593
with:
585594
input: connector/connect/common/src/main
586595
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
587-
596+
- name: Install Python 3.9
597+
uses: actions/setup-python@v5
598+
with:
599+
python-version: '3.9'
600+
- name: Install dependencies for Python CodeGen check
601+
run: |
602+
python3.9 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==0.982' 'mypy-protobuf==3.3.0'
603+
python3.9 -m pip list
604+
- name: Python CodeGen check
605+
run: ./dev/connect-check-protos.py
588606

589607
# Static analysis, and documentation build
590608
lint:
@@ -651,7 +669,7 @@ jobs:
651669
./dev/free_disk_space_container
652670
fi
653671
- name: Install Java ${{ inputs.java }}
654-
uses: actions/setup-java@v3
672+
uses: actions/setup-java@v4
655673
with:
656674
distribution: zulu
657675
java-version: ${{ inputs.java }}
@@ -696,17 +714,19 @@ jobs:
696714
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
697715
- name: Python linter
698716
run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
699-
- name: Install dependencies for Python code generation check
700-
if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4'
717+
# Should delete this section after SPARK 3.5 EOL.
718+
- name: Install dependencies for Python code generation check for branch-3.5
719+
if: inputs.branch == 'branch-3.5'
701720
run: |
702721
# See more in "Installation" https://docs.buf.build/installation#tarball
703722
curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz
704723
mkdir -p $HOME/buf
705724
tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1
706725
rm buf-Linux-x86_64.tar.gz
707726
python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0'
708-
- name: Python code generation check
709-
if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4'
727+
# Should delete this section after SPARK 3.5 EOL.
728+
- name: Python code generation check for branch-3.5
729+
if: inputs.branch == 'branch-3.5'
710730
run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
711731
# Should delete this section after SPARK 3.5 EOL.
712732
- name: Install JavaScript linter dependencies for branch-3.3, branch-3.4, branch-3.5
@@ -741,9 +761,7 @@ jobs:
741761
Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
742762
- name: Install dependencies for documentation generation
743763
run: |
744-
# Pin the MarkupSafe to 2.0.1 to resolve the CI error.
745-
# See also https://issues.apache.org/jira/browse/SPARK-38279.
746-
python3.9 -m pip install 'sphinx==4.2.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 'markupsafe==2.0.1' 'pyzmq<24.0.0'
764+
python3.9 -m pip install 'sphinx==4.2.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0'
747765
python3.9 -m pip install ipython_genutils # See SPARK-38517
748766
python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
749767
python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
@@ -817,7 +835,7 @@ jobs:
817835
restore-keys: |
818836
java${{ matrix.java }}-maven-
819837
- name: Install Java ${{ matrix.java }}
820-
uses: actions/setup-java@v3
838+
uses: actions/setup-java@v4
821839
with:
822840
distribution: zulu
823841
java-version: ${{ matrix.java }}
@@ -872,7 +890,7 @@ jobs:
872890
restore-keys: |
873891
tpcds-coursier-
874892
- name: Install Java ${{ inputs.java }}
875-
uses: actions/setup-java@v3
893+
uses: actions/setup-java@v4
876894
with:
877895
distribution: zulu
878896
java-version: ${{ inputs.java }}
@@ -979,7 +997,7 @@ jobs:
979997
restore-keys: |
980998
docker-integration-coursier-
981999
- name: Install Java ${{ inputs.java }}
982-
uses: actions/setup-java@v3
1000+
uses: actions/setup-java@v4
9831001
with:
9841002
distribution: zulu
9851003
java-version: ${{ inputs.java }}
@@ -1039,7 +1057,7 @@ jobs:
10391057
restore-keys: |
10401058
k8s-integration-coursier-
10411059
- name: Install Java ${{ inputs.java }}
1042-
uses: actions/setup-java@v3
1060+
uses: actions/setup-java@v4
10431061
with:
10441062
distribution: zulu
10451063
java-version: ${{ inputs.java }}
@@ -1074,3 +1092,22 @@ jobs:
10741092
with:
10751093
name: spark-on-kubernetes-it-log
10761094
path: "**/target/integration-tests.log"
1095+
1096+
ui:
1097+
needs: [precondition]
1098+
if: fromJson(needs.precondition.outputs.required).ui == 'true'
1099+
name: Run Spark UI tests
1100+
runs-on: ubuntu-22.04
1101+
timeout-minutes: 300
1102+
steps:
1103+
- uses: actions/checkout@v4
1104+
- name: Use Node.js
1105+
uses: actions/setup-node@v4
1106+
with:
1107+
node-version: 20
1108+
cache: 'npm'
1109+
cache-dependency-path: ui-test/package-lock.json
1110+
- run: |
1111+
cd ui-test
1112+
npm install --save-dev
1113+
node --experimental-vm-modules node_modules/.bin/jest
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build using Maven (master, Scala 2.13, Hadoop 3, JDK 21)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 14 * * *'
25+
26+
jobs:
27+
run-build:
28+
permissions:
29+
packages: write
30+
name: Run
31+
uses: ./.github/workflows/maven_test.yml
32+
if: github.repository == 'apache/spark'
33+
with:
34+
java: 21

.github/workflows/build_python.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ on:
2525

2626
jobs:
2727
run-build:
28+
strategy:
29+
matrix:
30+
pyversion: ["pypy3,python3.10", "python3.11,python3.12"]
2831
permissions:
2932
packages: write
3033
name: Run
@@ -36,7 +39,7 @@ jobs:
3639
hadoop: hadoop3
3740
envs: >-
3841
{
39-
"PYTHON_TO_TEST": "pypy3,python3.10,python3.11,python3.12"
42+
"PYTHON_TO_TEST": "${{ matrix.pyversion }}"
4043
}
4144
jobs: >-
4245
{

.github/workflows/maven_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,12 @@ jobs:
150150
restore-keys: |
151151
java${{ matrix.java }}-maven-
152152
- name: Install Java ${{ matrix.java }}
153-
uses: actions/setup-java@v3
153+
uses: actions/setup-java@v4
154154
with:
155155
distribution: zulu
156156
java-version: ${{ matrix.java }}
157157
- name: Install Python 3.9
158-
uses: actions/setup-python@v4
158+
uses: actions/setup-python@v5
159159
# We should install one Python that is higher than 3+ for SQL and Yarn because:
160160
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
161161
# - Yarn has a Python specific test too, for example, YarnClusterSuite.

.github/workflows/notify_test_workflow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
checks: write
3737
steps:
3838
- name: "Notify test workflow"
39-
uses: actions/github-script@v6
39+
uses: actions/github-script@v7
4040
with:
4141
github-token: ${{ secrets.GITHUB_TOKEN }}
4242
script: |

.github/workflows/publish_snapshot.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,13 @@ jobs:
5353
snapshot-maven-
5454
- name: Install Java 8 for branch-3.x
5555
if: matrix.branch == 'branch-3.5' || matrix.branch == 'branch-3.4' || matrix.branch == 'branch-3.3'
56-
uses: actions/setup-java@v3
56+
uses: actions/setup-java@v4
5757
with:
5858
distribution: temurin
5959
java-version: 8
6060
- name: Install Java 17
6161
if: matrix.branch != 'branch-3.5' && matrix.branch != 'branch-3.4' && matrix.branch != 'branch-3.3'
62-
uses: actions/setup-java@v3
62+
uses: actions/setup-java@v4
6363
with:
6464
distribution: temurin
6565
java-version: 17

.github/workflows/update_build_status.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
checks: write
3333
steps:
3434
- name: "Update build status"
35-
uses: actions/github-script@v6
35+
uses: actions/github-script@v7
3636
with:
3737
github-token: ${{ secrets.GITHUB_TOKEN }}
3838
script: |

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4103,10 +4103,7 @@ test_that("catalog APIs, listCatalogs, setCurrentCatalog, currentCatalog", {
41034103
expect_equal(currentCatalog(), "spark_catalog")
41044104
expect_error(setCurrentCatalog("spark_catalog"), NA)
41054105
expect_error(setCurrentCatalog("zxwtyswklpf"),
4106-
paste0("Error in setCurrentCatalog : ",
4107-
"org.apache.spark.sql.connector.catalog.CatalogNotFoundException: ",
4108-
"Catalog 'zxwtyswklpf' plugin class not found: ",
4109-
"spark.sql.catalog.zxwtyswklpf is not defined"))
4106+
"[CATALOG_NOT_FOUND]*`zxwtyswklpf`*")
41104107
catalogs <- collect(listCatalogs())
41114108
})
41124109

0 commit comments

Comments
 (0)