diff --git a/README.md b/README.md index 6a1abe36c34c1..4526b9517d8d2 100644 --- a/README.md +++ b/README.md @@ -66,9 +66,11 @@ git clone https://github.com/apache/hudi.git && cd hudi mvn clean package -DskipTests # Start command -spark-2.4.4-bin-hadoop2.7/bin/spark-shell \ - --jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \ +spark-3.2.3-bin-hadoop3.2/bin/spark-shell \ + --jars `ls packaging/hudi-spark-bundle/target/hudi-spark3.2-bundle_2.12-*.*.*-SNAPSHOT.jar` \ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \ + --conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \ + --conf 'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog' \ --conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar' ``` @@ -82,29 +84,31 @@ mvn clean javadoc:aggregate -Pjavadocs ### Build with different Spark versions -The default Spark 2.x version supported is 2.4.4. The default Spark 3.x version, corresponding to `spark3` profile is 3.3.1. -Refer to the table below for building with different Spark and Scala versions. +The default Spark 2.x version supported is 2.4.4. The default Spark 3.x version, corresponding to `spark3` profile is +3.3.1. The default Scala version is 2.12. Refer to the table below for building with different Spark and Scala versions. | Maven build options | Expected Spark bundle jar name | Notes | |:--------------------------|:---------------------------------------------|:-------------------------------------------------| -| (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) | -| `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) | -| `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 | -| `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 | -| `-Dspark3.3 -Dscala-2.12` | hudi-spark3.3-bundle_2.12 | For Spark 3.3.x and Scala 2.12 | +| (empty) | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 (default options) | +| `-Dspark2.4 -Dscala-2.11` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 | +| `-Dspark3.0` | hudi-spark3.0-bundle_2.12 | For Spark 3.0.x and Scala 2.12 | +| `-Dspark3.1` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 | +| `-Dspark3.2` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 (same as default) | +| `-Dspark3.3` | hudi-spark3.3-bundle_2.12 | For Spark 3.3.x and Scala 2.12 | +| `-Dspark2 -Dscala-2.11` | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 | +| `-Dspark2 -Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 | | `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.3.x and Scala 2.12 | -| `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 | For example, ``` # Build against Spark 3.2.x -mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12 +mvn clean package -DskipTests # Build against Spark 3.1.x -mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12 +mvn clean package -DskipTests -Dspark3.1 # Build against Spark 2.4.4 and Scala 2.11 -mvn clean package -DskipTests -Dspark2.4 +mvn clean package -DskipTests -Dspark2.4 -Dscala-2.11 ``` #### What about "spark-avro" module? @@ -123,10 +127,10 @@ Refer to the table below for building with different Flink and Scala versions. | `-Dflink1.17` | hudi-flink1.17-bundle | For Flink 1.17 (same as default) | | `-Dflink1.16` | hudi-flink1.16-bundle | For Flink 1.16 | | `-Dflink1.15` | hudi-flink1.15-bundle | For Flink 1.15 | -| `-Dflink1.14 -Dscala-2.12` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.12 | -| `-Dflink1.14` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.11 | -| `-Dflink1.13 -Dscala-2.12` | hudi-flink1.13-bundle | For Flink 1.13 and Scala 2.12 | -| `-Dflink1.13` | hudi-flink1.13-bundle | For Flink 1.13 and Scala 2.11 | +| `-Dflink1.14` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.12 | +| `-Dflink1.14 -Dscala-2.11` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.11 | +| `-Dflink1.13` | hudi-flink1.13-bundle | For Flink 1.13 and Scala 2.12 | +| `-Dflink1.13 -Dscala-2.11` | hudi-flink1.13-bundle | For Flink 1.13 and Scala 2.11 | For example, ``` @@ -134,10 +138,10 @@ For example, mvn clean package -DskipTests -Dflink1.15 # Build against Flink 1.14.x and Scala 2.11 -mvn clean package -DskipTests -Dflink1.14 +mvn clean package -DskipTests -Dflink1.14 -Dscala-2.11 # Build against Flink 1.13.x and Scala 2.12 -mvn clean package -DskipTests -Dflink1.13 -Dscala-2.12 +mvn clean package -DskipTests -Dflink1.13 ``` ## Running Tests diff --git a/pom.xml b/pom.xml index 123f767e953f7..e22843a854e96 100644 --- a/pom.xml +++ b/pom.xml @@ -2383,6 +2383,7 @@ hudi-spark-datasource/hudi-spark3.2plus-common + true spark3.2