apache · yihua · Jul 25, 2023 · Jul 17, 2023 · Jul 6, 2023 · Jul 15, 2023
diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
@@ -31,7 +31,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK 8
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'
@@ -76,9 +76,9 @@ jobs:
             sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up JDK 8
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'
@@ -112,6 +112,61 @@ jobs:
         run:
           mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
 
+  test-spark-java17:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'adopt'
+          architecture: x64
+      - name: Build Project
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS
+      - name: Set up JDK 17
+        uses: actions/setup-java@v3
+        with:
+          java-version: '17'
+          distribution: 'adopt'
+          architecture: x64
+      - name: Quickstart Test
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS
+      - name: UT - Common & Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: FT - Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
+        run:
+          mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+
   test-flink:
     runs-on: ubuntu-latest
     strategy:
@@ -123,9 +178,9 @@ jobs:
           - flinkProfile: "flink1.16"
           - flinkProfile: "flink1.17"
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up JDK 8
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'
@@ -151,6 +206,34 @@ jobs:
           mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS
           mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink $MVN_ARGS
 
+  docker-java17-test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - flinkProfile: 'flink1.17'
+            sparkProfile: 'spark3.4'
+            sparkRuntime: 'spark3.4.0'
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'adopt'
+          architecture: x64
+      - name: UT/FT - Docker Test - OpenJDK 17
+        env:
+          FLINK_PROFILE: ${{ matrix.flinkProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
+          SCALA_PROFILE: 'scala-2.12'
+        if: ${{ env.SPARK_PROFILE >= 'spark3.4' }} # Only support Spark 3.4 for now
+        run: |
+          HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
+          ./packaging/bundle-validation/run_docker_java17.sh
+
   validate-bundles:
     runs-on: ubuntu-latest
     strategy:
@@ -181,9 +264,9 @@ jobs:
             sparkProfile: 'spark2.4'
             sparkRuntime: 'spark2.4.8'
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up JDK 8
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'
@@ -255,9 +338,9 @@ jobs:
             sparkProfile: 'spark2.4'
             sparkRuntime: 'spark2.4.8'
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up JDK 8
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'
@@ -294,9 +377,9 @@ jobs:
           - sparkProfile: 'spark2.4'
             sparkArchive: 'spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz'
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up JDK 8
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'

diff --git a/.github/workflows/pr_compliance.yml b/.github/workflows/pr_compliance.yml
@@ -15,7 +15,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v3
       - name: run script
-        run: python3 scripts/pr_compliance.py 
+        run: python3 scripts/pr_compliance.py
 
 
 
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
@@ -248,6 +248,13 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+      <version>${spark.version}</version>
+    </dependency>
+
     <!-- Force to use 2.11.0 since hbase-server requires 2.7+ -->
     <dependency>
       <groupId>commons-io</groupId>

diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
@@ -450,10 +450,8 @@ public void testGenerateProjectionSchema() {
     assertTrue(fieldNames1.contains("_row_key"));
     assertTrue(fieldNames1.contains("timestamp"));
 
-    assertEquals("Field fake_field not found in log schema. Query cannot proceed! Derived Schema Fields: "
-            + "[non_pii_col, _hoodie_commit_time, _row_key, _hoodie_partition_path, _hoodie_record_key, pii_col,"
-            + " _hoodie_commit_seqno, _hoodie_file_name, timestamp]",
-        assertThrows(HoodieException.class, () ->
-            HoodieAvroUtils.generateProjectionSchema(originalSchema, Arrays.asList("_row_key", "timestamp", "fake_field"))).getMessage());
+    assertTrue(assertThrows(HoodieException.class, () ->
+            HoodieAvroUtils.generateProjectionSchema(originalSchema, Arrays.asList("_row_key", "timestamp", "fake_field")))
+        .getMessage().contains("Field fake_field not found in log schema. Query cannot proceed!"));
   }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java
@@ -26,11 +26,15 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 
+import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs;
+import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 class TestHoodieWrapperFileSystem {
@@ -40,11 +44,23 @@ class TestHoodieWrapperFileSystem {
   private static MiniDFSCluster dfsCluster;
 
   @BeforeAll
-  public static void prepareFs() throws IOException {
-    hdfsTestService = new HdfsTestService(HoodieTestUtils.getDefaultHadoopConf());
-    dfsCluster = hdfsTestService.start(true);
-    fs = dfsCluster.getFileSystem();
-    basePath = fs.getWorkingDirectory().toString();
+  public static void setUp() throws IOException {
+    if (shouldUseExternalHdfs()) {
+      fs = useExternalHdfs();
+    } else {
+      hdfsTestService = new HdfsTestService(HoodieTestUtils.getDefaultHadoopConf());
+      dfsCluster = hdfsTestService.start(true);
+      fs = dfsCluster.getFileSystem();
+    }
+    basePath = fs.getWorkingDirectory() + "/TestHoodieWrapperFileSystem/";
+    fs.mkdirs(new Path(basePath));
+  }
+
+  @AfterAll
+  public static void cleanUp() {
+    if (hdfsTestService != null) {
+      hdfsTestService.stop();
+    }
   }
 
   @Test
@@ -58,6 +74,6 @@ public void testCreateImmutableFileInPath() throws IOException {
     fs.createImmutableFileInPath(testFile, Option.of(testContent.getBytes()));
 
     assertEquals(1, fs.listStatus(new Path(basePath)).length,
-        "create same file twice should only have on file exists");
+        "create same file twice should only have one file exists, files: " + fs.listStatus(new Path(basePath)));
   }
-}
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
@@ -73,6 +73,7 @@
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
+
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
@@ -104,6 +105,9 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.common.testutils.HoodieTestUtils.getJavaVersion;
+import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs;
+import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs;
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -131,15 +135,21 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
   private String spillableBasePath;
 
   @BeforeAll
-  public static void setUpClass() throws IOException, InterruptedException {
-    // Append is not supported in LocalFileSystem. HDFS needs to be setup.
-    hdfsTestService = new HdfsTestService();
-    fs = hdfsTestService.start(true).getFileSystem();
+  public static void setUpClass() throws IOException {
+    if (shouldUseExternalHdfs()) {
+      fs = useExternalHdfs();
+    } else {
+      // Append is not supported in LocalFileSystem. HDFS needs to be setup.
+      hdfsTestService = new HdfsTestService();
+      fs = hdfsTestService.start(true).getFileSystem();
+    }
   }
 
   @AfterAll
   public static void tearDownClass() {
-    hdfsTestService.stop();
+    if (hdfsTestService != null) {
+      hdfsTestService.stop();
+    }
   }
 
   @BeforeEach
@@ -2539,7 +2549,10 @@ public void testDataBlockFormatAppendAndReadWithProjectedSchema(
           new HashMap<HoodieLogBlockType, Integer>() {{
             put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0); // not supported
             put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0); // not supported
-            put(HoodieLogBlockType.PARQUET_DATA_BLOCK, HoodieAvroUtils.gteqAvro1_9() ? 1802 : 1809);
+            put(HoodieLogBlockType.PARQUET_DATA_BLOCK,
+                HoodieAvroUtils.gteqAvro1_9()
+                    ? getJavaVersion() == 17 || getJavaVersion() == 11 ? 1803 : 1802
+                    : 1809);
           }};
 
       List<IndexedRecord> recordsRead = getRecords(dataBlockRead);

diff --git a/...mon/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java b/...mon/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
@@ -38,7 +38,9 @@
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
+
 import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assumptions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
@@ -53,6 +55,7 @@
 import java.util.concurrent.TimeoutException;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs;
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 
@@ -63,6 +66,9 @@ public class TestHoodieLogFormatAppendFailure {
 
   @BeforeAll
   public static void setUpClass() throws IOException {
+    // This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17
+    Assumptions.assumeFalse(shouldUseExternalHdfs());
+
     // NOTE : The MiniClusterDFS leaves behind the directory under which the cluster was created
     baseDir = new File("/tmp/" + UUID.randomUUID());
     FileUtil.fullyDelete(baseDir);
@@ -78,6 +84,9 @@ public static void setUpClass() throws IOException {
 
   @AfterAll
   public static void tearDownClass() {
+    // This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17
+    Assumptions.assumeFalse(shouldUseExternalHdfs());
+
     cluster.shutdown(true);
     // Force clean up the directory under which the cluster was created
     FileUtil.fullyDelete(baseDir);
@@ -145,5 +154,4 @@ public void testFailedToGetAppendStreamFromHDFSNameNode()
     assertNotEquals(writer.getLogFile().getLogVersion(), logFileVersion);
     writer.close();
   }
-
 }