diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 41945fdfe1d46..0811c828e498d 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -31,7 +31,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' @@ -76,9 +76,9 @@ jobs: sparkModules: "hudi-spark-datasource/hudi-spark3.4.x" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' @@ -112,6 +112,61 @@ jobs: run: mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS + test-spark-java17: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - scalaProfile: "scala-2.12" + sparkProfile: "spark3.3" + sparkModules: "hudi-spark-datasource/hudi-spark3.3.x" + - scalaProfile: "scala-2.12" + sparkProfile: "spark3.4" + sparkModules: "hudi-spark-datasource/hudi-spark3.4.x" + + steps: + - uses: actions/checkout@v3 + - name: Set up JDK 8 + uses: actions/setup-java@v3 + with: + java-version: '8' + distribution: 'adopt' + architecture: x64 + - name: Build Project + env: + SCALA_PROFILE: ${{ matrix.scalaProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} + run: + mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + architecture: x64 + - name: Quickstart Test + env: + SCALA_PROFILE: ${{ matrix.scalaProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} + run: + mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS + - name: UT - Common & Spark + env: + SCALA_PROFILE: ${{ matrix.scalaProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} + SPARK_MODULES: ${{ matrix.sparkModules }} + if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI + run: + mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS + - name: FT - Spark + env: + SCALA_PROFILE: ${{ matrix.scalaProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} + SPARK_MODULES: ${{ matrix.sparkModules }} + if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI + run: + mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS + test-flink: runs-on: ubuntu-latest strategy: @@ -123,9 +178,9 @@ jobs: - flinkProfile: "flink1.16" - flinkProfile: "flink1.17" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' @@ -151,6 +206,34 @@ jobs: mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink $MVN_ARGS + docker-java17-test: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - flinkProfile: 'flink1.17' + sparkProfile: 'spark3.4' + sparkRuntime: 'spark3.4.0' + + steps: + - uses: actions/checkout@v3 + - name: Set up JDK 8 + uses: actions/setup-java@v3 + with: + java-version: '8' + distribution: 'adopt' + architecture: x64 + - name: UT/FT - Docker Test - OpenJDK 17 + env: + FLINK_PROFILE: ${{ matrix.flinkProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} + SPARK_RUNTIME: ${{ matrix.sparkRuntime }} + SCALA_PROFILE: 'scala-2.12' + if: ${{ env.SPARK_PROFILE >= 'spark3.4' }} # Only support Spark 3.4 for now + run: | + HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + ./packaging/bundle-validation/run_docker_java17.sh + validate-bundles: runs-on: ubuntu-latest strategy: @@ -181,9 +264,9 @@ jobs: sparkProfile: 'spark2.4' sparkRuntime: 'spark2.4.8' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' @@ -255,9 +338,9 @@ jobs: sparkProfile: 'spark2.4' sparkRuntime: 'spark2.4.8' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' @@ -294,9 +377,9 @@ jobs: - sparkProfile: 'spark2.4' sparkArchive: 'spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' diff --git a/.github/workflows/pr_compliance.yml b/.github/workflows/pr_compliance.yml index 542a0a54672da..3f58ceafcf3d1 100644 --- a/.github/workflows/pr_compliance.yml +++ b/.github/workflows/pr_compliance.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 - name: run script - run: python3 scripts/pr_compliance.py + run: python3 scripts/pr_compliance.py diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 9d2fafef852fa..4a1d02695e364 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -248,6 +248,13 @@ + + org.apache.spark + spark-streaming-kafka-0-10_${scala.binary.version} + test + ${spark.version} + + commons-io diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index d825080bf737e..4f92660e377c9 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -450,10 +450,8 @@ public void testGenerateProjectionSchema() { assertTrue(fieldNames1.contains("_row_key")); assertTrue(fieldNames1.contains("timestamp")); - assertEquals("Field fake_field not found in log schema. Query cannot proceed! Derived Schema Fields: " - + "[non_pii_col, _hoodie_commit_time, _row_key, _hoodie_partition_path, _hoodie_record_key, pii_col," - + " _hoodie_commit_seqno, _hoodie_file_name, timestamp]", - assertThrows(HoodieException.class, () -> - HoodieAvroUtils.generateProjectionSchema(originalSchema, Arrays.asList("_row_key", "timestamp", "fake_field"))).getMessage()); + assertTrue(assertThrows(HoodieException.class, () -> + HoodieAvroUtils.generateProjectionSchema(originalSchema, Arrays.asList("_row_key", "timestamp", "fake_field"))) + .getMessage().contains("Field fake_field not found in log schema. Query cannot proceed!")); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java index b3182498a579a..6d298c2edc448 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java @@ -26,11 +26,15 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; + +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import java.io.IOException; +import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; +import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs; import static org.junit.jupiter.api.Assertions.assertEquals; class TestHoodieWrapperFileSystem { @@ -40,11 +44,23 @@ class TestHoodieWrapperFileSystem { private static MiniDFSCluster dfsCluster; @BeforeAll - public static void prepareFs() throws IOException { - hdfsTestService = new HdfsTestService(HoodieTestUtils.getDefaultHadoopConf()); - dfsCluster = hdfsTestService.start(true); - fs = dfsCluster.getFileSystem(); - basePath = fs.getWorkingDirectory().toString(); + public static void setUp() throws IOException { + if (shouldUseExternalHdfs()) { + fs = useExternalHdfs(); + } else { + hdfsTestService = new HdfsTestService(HoodieTestUtils.getDefaultHadoopConf()); + dfsCluster = hdfsTestService.start(true); + fs = dfsCluster.getFileSystem(); + } + basePath = fs.getWorkingDirectory() + "/TestHoodieWrapperFileSystem/"; + fs.mkdirs(new Path(basePath)); + } + + @AfterAll + public static void cleanUp() { + if (hdfsTestService != null) { + hdfsTestService.stop(); + } } @Test @@ -58,6 +74,6 @@ public void testCreateImmutableFileInPath() throws IOException { fs.createImmutableFileInPath(testFile, Option.of(testContent.getBytes())); assertEquals(1, fs.listStatus(new Path(basePath)).length, - "create same file twice should only have on file exists"); + "create same file twice should only have one file exists, files: " + fs.listStatus(new Path(basePath))); } -} \ No newline at end of file +} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index 5c946ec9c5a84..c613b52dd631f 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -73,6 +73,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.util.counters.BenchmarkCounter; + import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; @@ -104,6 +105,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.testutils.HoodieTestUtils.getJavaVersion; +import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; +import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -131,15 +135,21 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness { private String spillableBasePath; @BeforeAll - public static void setUpClass() throws IOException, InterruptedException { - // Append is not supported in LocalFileSystem. HDFS needs to be setup. - hdfsTestService = new HdfsTestService(); - fs = hdfsTestService.start(true).getFileSystem(); + public static void setUpClass() throws IOException { + if (shouldUseExternalHdfs()) { + fs = useExternalHdfs(); + } else { + // Append is not supported in LocalFileSystem. HDFS needs to be setup. + hdfsTestService = new HdfsTestService(); + fs = hdfsTestService.start(true).getFileSystem(); + } } @AfterAll public static void tearDownClass() { - hdfsTestService.stop(); + if (hdfsTestService != null) { + hdfsTestService.stop(); + } } @BeforeEach @@ -2539,7 +2549,10 @@ public void testDataBlockFormatAppendAndReadWithProjectedSchema( new HashMap() {{ put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0); // not supported put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0); // not supported - put(HoodieLogBlockType.PARQUET_DATA_BLOCK, HoodieAvroUtils.gteqAvro1_9() ? 1802 : 1809); + put(HoodieLogBlockType.PARQUET_DATA_BLOCK, + HoodieAvroUtils.gteqAvro1_9() + ? getJavaVersion() == 17 || getJavaVersion() == 11 ? 1803 : 1802 + : 1809); }}; List recordsRead = getRecords(dataBlockRead); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java index 309a3b04858ec..83a439c3ad126 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java @@ -38,7 +38,9 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.DataNode; + import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; @@ -53,6 +55,7 @@ import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; +import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -63,6 +66,9 @@ public class TestHoodieLogFormatAppendFailure { @BeforeAll public static void setUpClass() throws IOException { + // This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17 + Assumptions.assumeFalse(shouldUseExternalHdfs()); + // NOTE : The MiniClusterDFS leaves behind the directory under which the cluster was created baseDir = new File("/tmp/" + UUID.randomUUID()); FileUtil.fullyDelete(baseDir); @@ -78,6 +84,9 @@ public static void setUpClass() throws IOException { @AfterAll public static void tearDownClass() { + // This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17 + Assumptions.assumeFalse(shouldUseExternalHdfs()); + cluster.shutdown(true); // Force clean up the directory under which the cluster was created FileUtil.fullyDelete(baseDir); @@ -145,5 +154,4 @@ public void testFailedToGetAppendStreamFromHDFSNameNode() assertNotEquals(writer.getLogFile().getLogVersion(), logFileVersion); writer.close(); } - } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java index 1e6e9642c6429..9dcd2851b4a0f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java @@ -18,6 +18,7 @@ package org.apache.hudi.common.testutils; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieFileFormat; @@ -43,6 +44,7 @@ import java.util.Objects; import java.util.Properties; import java.util.UUID; +import org.junit.jupiter.api.Assumptions; /** * A utility class for testing. @@ -218,4 +220,31 @@ public static void createCompactionCommitInMetadataTable( HoodieTestUtils.init(hadoopConf, metadataTableBasePath, HoodieTableType.MERGE_ON_READ); HoodieTestDataGenerator.createCommitFile(metadataTableBasePath, instantTime + "001", wrapperFs.getConf()); } + + public static int getJavaVersion() { + String version = System.getProperty("java.version"); + if (version.startsWith("1.")) { + version = version.substring(2, 3); + } else { + int dot = version.indexOf("."); + if (dot != -1) { + version = version.substring(0, dot); + } + } + return Integer.parseInt(version); + } + + public static boolean shouldUseExternalHdfs() { + return getJavaVersion() == 11 || getJavaVersion() == 17; + } + + public static DistributedFileSystem useExternalHdfs() throws IOException { + // For Java 17, this unit test has to run in Docker + // Need to set -Duse.external.hdfs=true in mvn command to run this test + Assumptions.assumeTrue(Boolean.valueOf(System.getProperty("use.external.hdfs", "false"))); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", "hdfs://localhost:9000"); + conf.set("dfs.replication", "3"); + return (DistributedFileSystem) DistributedFileSystem.get(conf); + } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java index 8a2cf0cff133c..4dd32d840b187 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; + import org.junit.Rule; import org.junit.contrib.java.lang.system.EnvironmentVariables; import org.junit.jupiter.api.AfterAll; @@ -39,6 +40,8 @@ import java.io.IOException; import java.io.PrintStream; +import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; +import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -60,10 +63,15 @@ public class TestDFSPropertiesConfiguration { @BeforeAll public static void initClass() throws Exception { - hdfsTestService = new HdfsTestService(); - dfsCluster = hdfsTestService.start(true); + if (shouldUseExternalHdfs()) { + dfs = useExternalHdfs(); + } else { + hdfsTestService = new HdfsTestService(); + dfsCluster = hdfsTestService.start(true); + dfs = dfsCluster.getFileSystem(); + } + // Create a temp folder as the base path - dfs = dfsCluster.getFileSystem(); dfsBasePath = dfs.getWorkingDirectory().toString(); dfs.mkdirs(new Path(dfsBasePath)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java index 5a2a407bc4b07..be938ef9a4816 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.List; +import static org.apache.hudi.common.testutils.HoodieTestUtils.getJavaVersion; import static org.apache.hudi.common.util.ObjectSizeCalculator.getObjectSize; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -59,12 +60,29 @@ public void testGetObjectSize() { String name = "Alice Bob"; Person person = new Person(name); + if (getJavaVersion() == 11 || getJavaVersion() == 17) { + assertEquals(48, getObjectSize(string)); + assertEquals(168, getObjectSize(stringArray)); + assertEquals(144, getObjectSize(stringBuilder)); + assertEquals(72, getObjectSize(DayOfWeek.TUESDAY)); + assertEquals(HoodieAvroUtils.gteqAvro1_9() ? 1256 : 1176, + getObjectSize(Schema.create(Schema.Type.STRING))); + assertEquals(96, getObjectSize(person)); + } else { + assertEquals(56, getObjectSize(string)); + assertEquals(184, getObjectSize(stringArray)); + assertEquals(240, getObjectSize(stringBuilder)); + assertEquals(80, getObjectSize(DayOfWeek.TUESDAY)); + // Since avro 1.9, Schema use ConcurrentHashMap instead of LinkedHashMap to + // implement props, which will change the size of the object. + assertEquals(HoodieAvroUtils.gteqAvro1_9() ? 1320 : 1240, + getObjectSize(Schema.create(Schema.Type.STRING))); + assertEquals(104, getObjectSize(person)); + } + assertEquals(40, getObjectSize(emptyString)); - assertEquals(56, getObjectSize(string)); - assertEquals(184, getObjectSize(stringArray)); assertEquals(416, getObjectSize(anotherStringArray)); assertEquals(40, getObjectSize(stringList)); - assertEquals(240, getObjectSize(stringBuilder)); assertEquals(16, getObjectSize(maxIntPrimitive)); assertEquals(16, getObjectSize(minIntPrimitive)); assertEquals(16, getObjectSize(maxInteger)); @@ -72,16 +90,10 @@ public void testGetObjectSize() { assertEquals(24, getObjectSize(zeroLong)); assertEquals(24, getObjectSize(zeroDouble)); assertEquals(16, getObjectSize(booleanField)); - assertEquals(80, getObjectSize(DayOfWeek.TUESDAY)); assertEquals(16, getObjectSize(object)); assertEquals(32, getObjectSize(emptyClass)); assertEquals(40, getObjectSize(stringClass)); assertEquals(40, getObjectSize(payloadClass)); - // Since avro 1.9, Schema use ConcurrentHashMap instead of LinkedHashMap to - // implement props, which will change the size of the object. - assertEquals(HoodieAvroUtils.gteqAvro1_9() ? 1320 : 1240, - getObjectSize(Schema.create(Schema.Type.STRING))); - assertEquals(104, getObjectSize(person)); } class EmptyClass { diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/sql/hive/TestHiveClientUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/sql/hive/TestHiveClientUtils.scala index f74bd07eae702..814975ffff87b 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/sql/hive/TestHiveClientUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/sql/hive/TestHiveClientUtils.scala @@ -17,17 +17,32 @@ package org.apache.spark.sql.hive +import org.apache.hudi.common.testutils.HoodieTestUtils.getJavaVersion import org.apache.spark.sql.SparkSession import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION -import org.junit.jupiter.api.Test +import org.junit.Assume +import org.junit.jupiter.api.TestInstance.Lifecycle +import org.junit.jupiter.api.{BeforeAll, Test, TestInstance} +@TestInstance(Lifecycle.PER_CLASS) class TestHiveClientUtils { - protected val spark: SparkSession = TestHive.sparkSession - protected val hiveContext: TestHiveContext = TestHive - protected val hiveClient: HiveClient = - spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client + protected var spark: SparkSession = null + protected var hiveContext: TestHiveContext = null + protected var hiveClient: HiveClient = null + + @BeforeAll + def setUp(): Unit = { + // This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17 + // for Java 17 test coverage this test has been converted to scala script here: + // packaging/bundle-validation/spark_hadoop_mr/TestHiveClientUtils.scala + Assume.assumeFalse(getJavaVersion == 11 || getJavaVersion == 17) + + spark = TestHive.sparkSession + hiveContext = TestHive + hiveClient = spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client + } @Test def reuseHiveClientFromSparkSession(): Unit = { diff --git a/packaging/bundle-validation/Dockerfile b/packaging/bundle-validation/Dockerfile index 4b185d474b71f..995e11ef828b1 100644 --- a/packaging/bundle-validation/Dockerfile +++ b/packaging/bundle-validation/Dockerfile @@ -23,6 +23,7 @@ ADD . . ENV HUDI_CONF_DIR=$WORKDIR/conf RUN if [[ $HIVE_HOME =~ 'hive-2' ]]; \ then mv conf/hive-site.hive2.xml conf/hive-site.xml; fi + RUN cp conf/hive-site.xml $HIVE_HOME/conf/ RUN cp conf/hive-site.xml $SPARK_HOME/conf/ RUN cp $DERBY_HOME/lib/derbyclient.jar $SPARK_HOME/jars/ @@ -31,3 +32,27 @@ RUN if [[ $SPARK_HOME =~ 'spark-3.2' ]] || [[ $SPARK_HOME =~ 'spark-3.3' ]]; \ then printf "\nspark.sql.catalog.spark_catalog org.apache.spark.sql.hudi.catalog.HoodieCatalog\n" >> $SPARK_HOME/conf/spark-defaults.conf; fi RUN printf "\ntaskmanager.numberOfTaskSlots: 2\n" >> $FLINK_HOME/conf/flink-conf.yaml RUN printf "\nlocalhost\n" >> $FLINK_HOME/conf/workers + +# install maven +RUN wget https://archive.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz +RUN mkdir -p /usr/share/maven +RUN tar xzvf apache-maven-3.6.3-bin.tar.gz -C /usr/share/maven +ENV MAVEN_HOME=/usr/share/maven/apache-maven-3.6.3 +ENV PATH=$MAVEN_HOME/bin:$PATH +RUN mvn --version + +# prepare directory for docker java17 test +RUN mkdir -p /opt/bundle-validation/docker-test +RUN mkdir -p /opt/bundle-validation/tmp-conf-dir +RUN cp conf/hdfs-site.xml /opt/bundle-validation/tmp-conf-dir/hdfs-site.xml +RUN cp conf/core-site.xml /opt/bundle-validation/tmp-conf-dir/core-site.xml + +# Hadoop env +ENV HDFS_NAMENODE_USER root +ENV HDFS_DATANODE_USER root +ENV HDFS_SECONDARYNAMENODE_USER root +ENV YARN_RESOURCEMANAGER_USER root +ENV YARN_NODEMANAGER_USER root + +# for RocksDb +RUN apk add --no-cache libstdc++ diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index 3e10a6c0c9a94..bfdf9a1f661b9 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -200,5 +200,8 @@ docker build \ . # run validation script in docker -docker run -v $TMP_JARS_DIR:/opt/bundle-validation/jars -v $TMP_DATA_DIR:/opt/bundle-validation/data \ +docker run --name hudi_docker \ + -v ${GITHUB_WORKSPACE}:/opt/bundle-validation/docker-test \ + -v $TMP_JARS_DIR:/opt/bundle-validation/jars \ + -v $TMP_DATA_DIR:/opt/bundle-validation/data \ -i hudi-ci-bundle-validation:$IMAGE_TAG bash validate.sh $JAVA_RUNTIME_VERSION diff --git a/packaging/bundle-validation/conf/core-site.xml b/packaging/bundle-validation/conf/core-site.xml new file mode 100644 index 0000000000000..1fd3bb942e408 --- /dev/null +++ b/packaging/bundle-validation/conf/core-site.xml @@ -0,0 +1,22 @@ + + + + + + fs.defaultFS + hdfs://localhost:9000 + + diff --git a/packaging/bundle-validation/conf/hdfs-site.xml b/packaging/bundle-validation/conf/hdfs-site.xml new file mode 100644 index 0000000000000..a88199ed7feca --- /dev/null +++ b/packaging/bundle-validation/conf/hdfs-site.xml @@ -0,0 +1,29 @@ + + + + + + dfs.replication + 3 + + + dfs.datanode.du.reserved + 3221225472 + 3GB of disk space reserved for non DFS usage. + This space will be left unconsumed by the Datanode. + + + diff --git a/packaging/bundle-validation/docker_java17/TestHiveClientUtils.scala b/packaging/bundle-validation/docker_java17/TestHiveClientUtils.scala new file mode 100644 index 0000000000000..40e040aca35df --- /dev/null +++ b/packaging/bundle-validation/docker_java17/TestHiveClientUtils.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.spark.sql.hive.HiveClientUtils +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION + +/* + * Converted from hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/sql/hive/TestHiveClientUtils.scala + * as original test couldn't run on Java 17 + */ + +assert(spark.conf.get(CATALOG_IMPLEMENTATION.key) == "hive") +val hiveClient1 = HiveClientUtils.getSingletonClientForMetadata(spark) +val hiveClient2 = HiveClientUtils.getSingletonClientForMetadata(spark) +assert(hiveClient1 == hiveClient2) diff --git a/packaging/bundle-validation/docker_java17/docker_java17_test.sh b/packaging/bundle-validation/docker_java17/docker_java17_test.sh new file mode 100755 index 0000000000000..e668bc66de76d --- /dev/null +++ b/packaging/bundle-validation/docker_java17/docker_java17_test.sh @@ -0,0 +1,178 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +SPARK_PROFILE=$1 +SCALA_PROFILE=$2 +JAVA_RUNTIME_VERSION=openjdk17 +DEFAULT_JAVA_HOME=${JAVA_HOME} +WORKDIR=/opt/bundle-validation +JARS_DIR=${WORKDIR}/jars +DOCKER_TEST_DIR=${WORKDIR}/docker-test + +## +# Function to change Java runtime version by changing JAVA_HOME +## +change_java_runtime_version () { + if [[ ${JAVA_RUNTIME_VERSION} == 'openjdk11' ]]; then + echo "Change JAVA_HOME to /usr/lib/jvm/java-11-openjdk" + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + elif [[ ${JAVA_RUNTIME_VERSION} == 'openjdk17' ]]; then + echo "Change JAVA_HOME to /usr/lib/jvm/java-17-openjdk" + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + fi +} + +## +# Function to change Java runtime version to default Java 8 +## +use_default_java_runtime () { + echo "Use default java runtime under ${DEFAULT_JAVA_HOME}" + export JAVA_HOME=${DEFAULT_JAVA_HOME} +} + +start_datanode () { + DN=$1 + + echo "::warning::docker_test_java17.sh starting datanode:"$DN + + cat $HADOOP_HOME/hadoop/etc/hdfs-site.xml + cat $HADOOP_HOME/hadoop/etc/core-site.xml + + DN_DIR_PREFIX=$DOCKER_TEST_DIR/additional_datanode/ + PID_DIR=$DOCKER_TEST_DIR/pid/$1 + + if [ -z $DN_DIR_PREFIX ]; then + mkdir -p $DN_DIR_PREFIX + fi + + if [ -z $PID_DIR ]; then + mkdir -p $PID_DIR + fi + + export HADOOP_PID_DIR=$PID_PREFIX + DN_CONF_OPTS="\ + -Dhadoop.tmp.dir=$DN_DIR_PREFIX$DN\ + -Ddfs.datanode.address=localhost:5001$DN \ + -Ddfs.datanode.http.address=localhost:5008$DN \ + -Ddfs.datanode.ipc.address=localhost:5002$DN" + $HADOOP_HOME/bin/hdfs --daemon start datanode $DN_CONF_OPTS + $HADOOP_HOME/bin/hdfs dfsadmin -report +} + +setup_hdfs () { + echo "::warning::docker_test_java17.sh copying hadoop conf" + mv /opt/bundle-validation/tmp-conf-dir/hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml + mv /opt/bundle-validation/tmp-conf-dir/core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml + + $HADOOP_HOME/bin/hdfs namenode -format + $HADOOP_HOME/bin/hdfs --daemon start namenode + echo "::warning::docker_test_java17.sh starting hadoop hdfs" + $HADOOP_HOME/sbin/start-dfs.sh + + # start datanodes + for i in $(seq 1 3) + do + start_datanode $i + done + + echo "::warning::docker_test_java17.sh starting hadoop hdfs, hdfs report" + $HADOOP_HOME/bin/hdfs dfs -mkdir -p /user/root + $HADOOP_HOME/bin/hdfs dfs -ls /user/ + if [ "$?" -ne 0 ]; then + echo "::error::docker_test_java17.sh Failed setting up HDFS!" + exit 1 + fi +} + +stop_hdfs() { + use_default_java_runtime + echo "::warning::docker_test_java17.sh stopping hadoop hdfs" + $HADOOP_HOME/sbin/stop-dfs.sh +} + +build_hudi_java8 () { + use_default_java_runtime + mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true \ + -e -ntp -B -V -Dgpg.skip -Djacoco.skip -Pwarn-log \ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn \ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn \ + -pl packaging/hudi-spark-bundle -am + + if [ "$?" -ne 0 ]; then + echo "::error::docker_test_java17.sh Failed building Hudi with Java 8!" + exit 1 + fi + + if [ ! -d $JARS_DIR ]; then + mkdir -p $JARS_DIR + fi + + cp ./packaging/hudi-spark-bundle/target/hudi-spark*.jar $JARS_DIR/spark.jar +} + +run_docker_tests() { + echo "::warning::docker_test_java17.sh run_docker_tests Running Hudi maven tests on Docker" + change_java_runtime_version + + mvn -e test -D$SPARK_PROFILE -D$SCALA_PROFILE -Djava17 -Duse.external.hdfs=true \ + -Dtest=org.apache.hudi.common.functional.TestHoodieLogFormat,org.apache.hudi.common.util.TestDFSPropertiesConfiguration,org.apache.hudi.common.fs.TestHoodieWrapperFileSystem \ + -DfailIfNoTests=false -pl hudi-common + + if [ "$?" -ne 0 ]; then + echo "::error::docker_test_java17.sh Hudi maven tests failed" + exit 1 + fi + echo "::warning::docker_test_java17.sh Hudi maven tests passed!" + + echo "::warning::docker_test_java17.sh run_docker_tests Running Hudi Scala script tests on Docker" + $SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/docker_java17/TestHiveClientUtils.scala + if [ $? -ne 0 ]; then + echo "::error::docker_test_java17.sh HiveClientUtils failed" + exit 1 + fi + echo "::warning::docker_test_java17.sh run_docker_tests Hudi Scala script tests passed!" + + echo "::warning::docker_test_java17.sh All Docker tests passed!" + use_default_java_runtime +} + +############################ +# Execute tests +############################ +cd $DOCKER_TEST_DIR +echo "yxchang: $(PATH)" +export PATH=/usr/bin:$PATH +whoami +which ssh +whoami + +echo "::warning::docker_test_java17.sh Building Hudi with Java 8" +build_hudi_java8 +echo "::warning::docker_test_java17.sh Done building Hudi with Java 8" + +setup_hdfs + +echo "::warning::docker_test_java17.sh Running tests with Java 17" +run_docker_tests +if [ "$?" -ne 0 ]; then + exit 1 +fi +echo "::warning::docker_test_java17.sh Done running tests with Java 17" + +stop_hdfs diff --git a/packaging/bundle-validation/run_docker_java17.sh b/packaging/bundle-validation/run_docker_java17.sh new file mode 100755 index 0000000000000..879b56367e0c0 --- /dev/null +++ b/packaging/bundle-validation/run_docker_java17.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "SPARK_RUNTIME: $SPARK_RUNTIME SPARK_PROFILE (optional): $SPARK_PROFILE" +echo "SCALA_PROFILE: $SCALA_PROFILE" +CONTAINER_NAME=hudi_docker +DOCKER_TEST_DIR=/opt/bundle-validation/docker-test + +# choose versions based on build profiles +if [[ ${SPARK_RUNTIME} == 'spark2.4.8' ]]; then + HADOOP_VERSION=2.7.7 + HIVE_VERSION=2.3.9 + DERBY_VERSION=10.10.2.0 + FLINK_VERSION=1.13.6 + SPARK_VERSION=2.4.8 + SPARK_HADOOP_VERSION=2.7 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1136hive239spark248 +elif [[ ${SPARK_RUNTIME} == 'spark3.0.2' ]]; then + HADOOP_VERSION=2.7.7 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.14.6 + SPARK_VERSION=3.0.2 + SPARK_HADOOP_VERSION=2.7 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1146hive313spark302 +elif [[ ${SPARK_RUNTIME} == 'spark3.1.3' ]]; then + HADOOP_VERSION=2.7.7 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.13.6 + SPARK_VERSION=3.1.3 + SPARK_HADOOP_VERSION=2.7 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1136hive313spark313 +elif [[ ${SPARK_RUNTIME} == 'spark3.2.3' ]]; then + HADOOP_VERSION=2.7.7 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.14.6 + SPARK_VERSION=3.2.3 + SPARK_HADOOP_VERSION=2.7 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1146hive313spark323 +elif [[ ${SPARK_RUNTIME} == 'spark3.3.1' ]]; then + HADOOP_VERSION=2.7.7 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.15.3 + SPARK_VERSION=3.3.1 + SPARK_HADOOP_VERSION=2 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1153hive313spark331 +elif [[ ${SPARK_RUNTIME} == 'spark3.3.2' ]]; then + HADOOP_VERSION=2.7.7 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.17.0 + SPARK_VERSION=3.3.2 + SPARK_HADOOP_VERSION=2 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1170hive313spark332 +elif [[ ${SPARK_RUNTIME} == 'spark3.4.0' ]]; then + HADOOP_VERSION=3.3.5 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.17.0 + SPARK_VERSION=3.4.0 + SPARK_HADOOP_VERSION=3 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1170hive313spark340 +fi + +# build docker image +cd ${GITHUB_WORKSPACE}/packaging/bundle-validation || exit 1 +docker build \ +--build-arg HADOOP_VERSION=$HADOOP_VERSION \ +--build-arg HIVE_VERSION=$HIVE_VERSION \ +--build-arg DERBY_VERSION=$DERBY_VERSION \ +--build-arg FLINK_VERSION=$FLINK_VERSION \ +--build-arg SPARK_VERSION=$SPARK_VERSION \ +--build-arg SPARK_HADOOP_VERSION=$SPARK_HADOOP_VERSION \ +--build-arg CONFLUENT_VERSION=$CONFLUENT_VERSION \ +--build-arg KAFKA_CONNECT_HDFS_VERSION=$KAFKA_CONNECT_HDFS_VERSION \ +--build-arg IMAGE_TAG=$IMAGE_TAG \ +-t hudi-ci-bundle-validation:$IMAGE_TAG \ +. + +# run Java 17 test script in docker +docker run --name $CONTAINER_NAME \ + -v ${GITHUB_WORKSPACE}:$DOCKER_TEST_DIR \ + -i hudi-ci-bundle-validation:$IMAGE_TAG bash docker_java17/docker_java17_test.sh $SPARK_PROFILE $SCALA_PROFILE diff --git a/packaging/bundle-validation/validate.sh b/packaging/bundle-validation/validate.sh index 7ac90ae64c57d..75d4227c74a37 100755 --- a/packaging/bundle-validation/validate.sh +++ b/packaging/bundle-validation/validate.sh @@ -55,7 +55,7 @@ change_java_runtime_version () { # Function to change Java runtime version to default Java 8 ## use_default_java_runtime () { - echo "Use default java runtime under ${DEFAULT_JAVA_HOME}" + echo "::warning:: Use default java runtime under ${DEFAULT_JAVA_HOME}" export JAVA_HOME=${DEFAULT_JAVA_HOME} } @@ -189,7 +189,6 @@ test_flink_bundle() { echo "::warning::validate.sh done validating Flink bundle validation was successful." } - ## # Function to test the kafka-connect bundle. # It runs zookeeper, kafka broker, schema registry, and connector worker. diff --git a/pom.xml b/pom.xml index a1d1fc5363576..42794f596d414 100644 --- a/pom.xml +++ b/pom.xml @@ -156,7 +156,7 @@ flink-clients flink-connector-kafka flink-hadoop-compatibility_2.12 - 5.17.2 + 7.5.3 3.0.2 3.1.3 3.2.3 @@ -203,7 +203,7 @@ provided -Xmx2g - 0.8.5 + 0.8.8 compile org.apache.hudi. compile @@ -2614,6 +2614,18 @@ + + java17 + + -Xmx2g --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djol.magicFieldOffset=true + + + + java17 + + + +