diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index dfc6c505c8a1d..29702846b3d2d 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -14,51 +14,26 @@ jobs: build: runs-on: ubuntu-latest strategy: - max-parallel: 8 matrix: include: - # Spark 2.4.4, scala 2.11 - scalaProfile: "scala-2.11" sparkProfile: "spark2.4" - sparkVersion: "2.4.4" flinkProfile: "flink1.13" - # Spark 2.4.4, scala 2.12 - - scalaProfile: "scala-2.12" + - scalaProfile: "scala-2.11" sparkProfile: "spark2.4" - sparkVersion: "2.4.4" flinkProfile: "flink1.14" - # Spark 3.1.x - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.1" - sparkVersion: "3.1.0" - flinkProfile: "flink1.13" - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.1" - sparkVersion: "3.1.1" + sparkProfile: "spark2.4" flinkProfile: "flink1.13" - scalaProfile: "scala-2.12" sparkProfile: "spark3.1" - sparkVersion: "3.1.2" flinkProfile: "flink1.14" - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.1" - sparkVersion: "3.1.3" - flinkProfile: "flink1.14" - - # Spark 3.2.x - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.2" - sparkVersion: "3.2.0" - flinkProfile: "flink1.13" - - scalaProfile: "scala-2.12" sparkProfile: "spark3.2" - sparkVersion: "3.2.1" flinkProfile: "flink1.14" steps: @@ -73,16 +48,14 @@ jobs: env: SCALA_PROFILE: ${{ matrix.scalaProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_VERSION: ${{ matrix.sparkVersion }} FLINK_PROFILE: ${{ matrix.flinkProfile }} run: - mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V + mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipTests=true -B -V - name: Quickstart Test env: SCALA_PROFILE: ${{ matrix.scalaProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_VERSION: ${{ matrix.sparkVersion }} FLINK_PROFILE: ${{ matrix.flinkProfile }} - if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x + if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x run: - mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark + mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark diff --git a/README.md b/README.md index e646463bac992..b0e4564ad4551 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,8 @@ spark-2.4.4-bin-hadoop2.7/bin/spark-shell \ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' ``` +To build for integration tests that include `hudi-integ-test-bundle`, use `-Dintegration-tests`. + To build the Javadoc for all Java and Scala classes: ``` # Javadoc generated under target/site/apidocs @@ -72,32 +74,31 @@ mvn clean javadoc:aggregate -Pjavadocs ### Build with different Spark versions -The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the -corresponding profile +The default Spark version supported is 2.4.4. Refer to the table below for building with different Spark and Scala versions. -| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes | -|--|--|--|--| -| Spark 2.4, Scala 2.11 | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default | -| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 | -| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x | -| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x | -| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` | -| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 | -| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 | +| Maven build options | Expected Spark bundle jar name | Notes | +|:--------------------------|:---------------------------------------------|:-------------------------------------------------| +| (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) | +| `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) | +| `-Dspark2.4 -Dscala-2.12` | hudi-spark2.4-bundle_2.12 | For Spark 2.4.4 and Scala 2.12 | +| `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 | +| `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 | +| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.2.x and Scala 2.12 | +| `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 | For example, ``` -# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle) -mvn clean package -DskipTests -Pspark3.2 +# Build against Spark 3.2.x +mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12 # Build against Spark 3.1.x -mvn clean package -DskipTests -Pspark3.1 +mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12 # Build against Spark 2.4.4 and Scala 2.12 -mvn clean package -DskipTests -Pspark2.4,scala-2.12 +mvn clean package -DskipTests -Dspark2.4 -Dscala-2.12 ``` -### What about "spark-avro" module? +#### What about "spark-avro" module? Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages` diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml index 0803b1f143662..60f0af9d64f07 100644 --- a/packaging/hudi-utilities-slim-bundle/pom.xml +++ b/packaging/hudi-utilities-slim-bundle/pom.xml @@ -112,7 +112,6 @@ org.apache.httpcomponents:httpcore org.apache.httpcomponents:fluent-hc org.antlr:stringtemplate - org.apache.parquet:parquet-avro com.github.davidmoten:guava-mini com.github.davidmoten:hilbert-curve @@ -394,13 +393,6 @@ ${project.version} - - - org.apache.parquet - parquet-avro - compile - - ${hive.groupid} diff --git a/pom.xml b/pom.xml index 570259b65d2a3..7caff57f066b4 100644 --- a/pom.xml +++ b/pom.xml @@ -1570,8 +1570,6 @@ ${scala12.version} 2.12 - true - true @@ -1613,6 +1611,9 @@ hudi-spark-datasource/hudi-spark2 hudi-spark-datasource/hudi-spark2-common + + true + true @@ -1631,6 +1632,7 @@ 2.4 + true