diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index dfc6c505c8a1d..29702846b3d2d 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -14,51 +14,26 @@ jobs:
build:
runs-on: ubuntu-latest
strategy:
- max-parallel: 8
matrix:
include:
- # Spark 2.4.4, scala 2.11
- scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
- sparkVersion: "2.4.4"
flinkProfile: "flink1.13"
- # Spark 2.4.4, scala 2.12
- - scalaProfile: "scala-2.12"
+ - scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
- sparkVersion: "2.4.4"
flinkProfile: "flink1.14"
- # Spark 3.1.x
- - scalaProfile: "scala-2.12"
- sparkProfile: "spark3.1"
- sparkVersion: "3.1.0"
- flinkProfile: "flink1.13"
-
- scalaProfile: "scala-2.12"
- sparkProfile: "spark3.1"
- sparkVersion: "3.1.1"
+ sparkProfile: "spark2.4"
flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
- sparkVersion: "3.1.2"
flinkProfile: "flink1.14"
- - scalaProfile: "scala-2.12"
- sparkProfile: "spark3.1"
- sparkVersion: "3.1.3"
- flinkProfile: "flink1.14"
-
- # Spark 3.2.x
- - scalaProfile: "scala-2.12"
- sparkProfile: "spark3.2"
- sparkVersion: "3.2.0"
- flinkProfile: "flink1.13"
-
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.2"
- sparkVersion: "3.2.1"
flinkProfile: "flink1.14"
steps:
@@ -73,16 +48,14 @@ jobs:
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
- SPARK_VERSION: ${{ matrix.sparkVersion }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
run:
- mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V
+ mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipTests=true -B -V
- name: Quickstart Test
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
- SPARK_VERSION: ${{ matrix.sparkVersion }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
- if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x
+ if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x
run:
- mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
+ mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
diff --git a/README.md b/README.md
index e646463bac992..b0e4564ad4551 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,8 @@ spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
```
+To build for integration tests that include `hudi-integ-test-bundle`, use `-Dintegration-tests`.
+
To build the Javadoc for all Java and Scala classes:
```
# Javadoc generated under target/site/apidocs
@@ -72,32 +74,31 @@ mvn clean javadoc:aggregate -Pjavadocs
### Build with different Spark versions
-The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the
-corresponding profile
+The default Spark version supported is 2.4.4. Refer to the table below for building with different Spark and Scala versions.
-| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes |
-|--|--|--|--|
-| Spark 2.4, Scala 2.11 | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default |
-| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 |
-| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x |
-| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x |
-| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` |
-| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 |
-| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 |
+| Maven build options | Expected Spark bundle jar name | Notes |
+|:--------------------------|:---------------------------------------------|:-------------------------------------------------|
+| (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) |
+| `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) |
+| `-Dspark2.4 -Dscala-2.12` | hudi-spark2.4-bundle_2.12 | For Spark 2.4.4 and Scala 2.12 |
+| `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 |
+| `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 |
+| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.2.x and Scala 2.12 |
+| `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 |
For example,
```
-# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle)
-mvn clean package -DskipTests -Pspark3.2
+# Build against Spark 3.2.x
+mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12
# Build against Spark 3.1.x
-mvn clean package -DskipTests -Pspark3.1
+mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12
# Build against Spark 2.4.4 and Scala 2.12
-mvn clean package -DskipTests -Pspark2.4,scala-2.12
+mvn clean package -DskipTests -Dspark2.4 -Dscala-2.12
```
-### What about "spark-avro" module?
+#### What about "spark-avro" module?
Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`
diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml
index 0803b1f143662..60f0af9d64f07 100644
--- a/packaging/hudi-utilities-slim-bundle/pom.xml
+++ b/packaging/hudi-utilities-slim-bundle/pom.xml
@@ -112,7 +112,6 @@
org.apache.httpcomponents:httpcore
org.apache.httpcomponents:fluent-hc
org.antlr:stringtemplate
- org.apache.parquet:parquet-avro
com.github.davidmoten:guava-mini
com.github.davidmoten:hilbert-curve
@@ -394,13 +393,6 @@
${project.version}
-
-
- org.apache.parquet
- parquet-avro
- compile
-
-
${hive.groupid}
diff --git a/pom.xml b/pom.xml
index 570259b65d2a3..7caff57f066b4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1570,8 +1570,6 @@
${scala12.version}
2.12
- true
- true
@@ -1613,6 +1611,9 @@
hudi-spark-datasource/hudi-spark2
hudi-spark-datasource/hudi-spark2-common
+
+ true
+
true
@@ -1631,6 +1632,7 @@
2.4
+ true