Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
68f6b69
Add Hadoop conf to HiveConf for HiveSyncConfig
CTTY Jul 17, 2023
10f012b
Add GitHub CI for spark33/34 Java 17
CTTY Jul 6, 2023
b8035db
Testing docker java 17 test
CTTY Jul 15, 2023
e083529
minor
CTTY Jul 15, 2023
7a86dee
test
CTTY Jul 15, 2023
6bdcdac
All docker java17 tests passed, adding all other CIs back
CTTY Jul 16, 2023
5a07bf7
minor
CTTY Jul 16, 2023
e8cd0e2
trigger github ci
CTTY Jul 16, 2023
8d3cca1
fix java17 test flag
CTTY Jul 16, 2023
44d10bd
Rebase onto master. Migrate one new test
CTTY Jul 16, 2023
100b397
stablize TestHoodieWrapperFileSystem
CTTY Jul 16, 2023
60eb622
trigger ci
CTTY Jul 17, 2023
8d76f7d
add ps in alpine image
CTTY Jul 17, 2023
3f8c949
minor
CTTY Jul 17, 2023
26777da
minor
CTTY Jul 17, 2023
1735065
isolate bundle validation issue
CTTY Jul 18, 2023
2cc01c6
Change option to use external hdfs, wrap external hdfs logic to util …
CTTY Jul 19, 2023
ec2e7cd
minor
CTTY Jul 19, 2023
c1da8c4
try removing hdfs-site
CTTY Jul 19, 2023
dbfec9a
combine docker java17 test and bundle validation
CTTY Jul 19, 2023
256b4b0
make new script executable
CTTY Jul 20, 2023
d34bfdd
fix script permission
CTTY Jul 20, 2023
f6e952f
add docker container name
CTTY Jul 20, 2023
6358c86
restart docker container
CTTY Jul 20, 2023
24d5ac0
fix path
CTTY Jul 20, 2023
cc1d428
minor
CTTY Jul 20, 2023
ddad307
check dir
CTTY Jul 20, 2023
de24436
fix path
CTTY Jul 20, 2023
323c1de
fixing ssh command
CTTY Jul 20, 2023
6a86262
only test for spark 3.4
CTTY Jul 20, 2023
8e54816
trigger ci again
CTTY Jul 20, 2023
5a0ed39
Have 6 datanodes
CTTY Jul 20, 2023
10f4cb2
10 datanodes
CTTY Jul 20, 2023
cdaff5f
have 8 datanodes
CTTY Jul 20, 2023
409e7d8
5 nodes
CTTY Jul 21, 2023
172b237
3 nodes
CTTY Jul 21, 2023
d018c71
run java 17 ci first
CTTY Jul 21, 2023
9bfcf7c
adjust ci
CTTY Jul 21, 2023
150f0be
minor
CTTY Jul 21, 2023
d496aed
run clean test
CTTY Jul 21, 2023
91a5dec
seperate CIs
CTTY Jul 21, 2023
c8540ca
Fix core site
CTTY Jul 21, 2023
3723d50
Fix core site
CTTY Jul 21, 2023
9bc5072
fix core-site
CTTY Jul 21, 2023
91c5a05
try removing spark-streaming test dep
CTTY Jul 24, 2023
6b33d37
add spark streaming back
CTTY Jul 24, 2023
8b62b65
Revert "Add Hadoop conf to HiveConf for HiveSyncConfig"
CTTY Jul 25, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 94 additions & 11 deletions .github/workflows/bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
Expand Down Expand Up @@ -76,9 +76,9 @@ jobs:
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
Expand Down Expand Up @@ -112,6 +112,61 @@ jobs:
run:
mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS

test-spark-java17:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

steps:
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
architecture: x64
- name: Build Project
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
run:
mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
java-version: '17'
distribution: 'adopt'
architecture: x64
- name: Quickstart Test
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
run:
mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS
- name: UT - Common & Spark
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_MODULES: ${{ matrix.sparkModules }}
if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: not required.

run:
mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
- name: FT - Spark
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_MODULES: ${{ matrix.sparkModules }}
if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: not required.

run:
mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS

test-flink:
runs-on: ubuntu-latest
strategy:
Expand All @@ -123,9 +178,9 @@ jobs:
- flinkProfile: "flink1.16"
- flinkProfile: "flink1.17"
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
Expand All @@ -151,6 +206,34 @@ jobs:
mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS
mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink $MVN_ARGS

docker-java17-test:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- flinkProfile: 'flink1.17'
sparkProfile: 'spark3.4'
sparkRuntime: 'spark3.4.0'

steps:
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
architecture: x64
- name: UT/FT - Docker Test - OpenJDK 17
env:
FLINK_PROFILE: ${{ matrix.flinkProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
SCALA_PROFILE: 'scala-2.12'
if: ${{ env.SPARK_PROFILE >= 'spark3.4' }} # Only support Spark 3.4 for now
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: not required.

run: |
HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
./packaging/bundle-validation/run_docker_java17.sh

validate-bundles:
runs-on: ubuntu-latest
strategy:
Expand Down Expand Up @@ -181,9 +264,9 @@ jobs:
sparkProfile: 'spark2.4'
sparkRuntime: 'spark2.4.8'
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
Expand Down Expand Up @@ -255,9 +338,9 @@ jobs:
sparkProfile: 'spark2.4'
sparkRuntime: 'spark2.4.8'
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
Expand Down Expand Up @@ -294,9 +377,9 @@ jobs:
- sparkProfile: 'spark2.4'
sparkArchive: 'spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz'
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up JDK 8
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_compliance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- name: run script
run: python3 scripts/pr_compliance.py
run: python3 scripts/pr_compliance.py



7 changes: 7 additions & 0 deletions hudi-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,13 @@
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<scope>test</scope>
<version>${spark.version}</version>
</dependency>

<!-- Force to use 2.11.0 since hbase-server requires 2.7+ -->
<dependency>
<groupId>commons-io</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,8 @@ public void testGenerateProjectionSchema() {
assertTrue(fieldNames1.contains("_row_key"));
assertTrue(fieldNames1.contains("timestamp"));

assertEquals("Field fake_field not found in log schema. Query cannot proceed! Derived Schema Fields: "
+ "[non_pii_col, _hoodie_commit_time, _row_key, _hoodie_partition_path, _hoodie_record_key, pii_col,"
+ " _hoodie_commit_seqno, _hoodie_file_name, timestamp]",
assertThrows(HoodieException.class, () ->
HoodieAvroUtils.generateProjectionSchema(originalSchema, Arrays.asList("_row_key", "timestamp", "fake_field"))).getMessage());
assertTrue(assertThrows(HoodieException.class, () ->
HoodieAvroUtils.generateProjectionSchema(originalSchema, Arrays.asList("_row_key", "timestamp", "fake_field")))
.getMessage().contains("Field fake_field not found in log schema. Query cannot proceed!"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;

import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import java.io.IOException;

import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs;
import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs;
import static org.junit.jupiter.api.Assertions.assertEquals;

class TestHoodieWrapperFileSystem {
Expand All @@ -40,11 +44,23 @@ class TestHoodieWrapperFileSystem {
private static MiniDFSCluster dfsCluster;

@BeforeAll
public static void prepareFs() throws IOException {
hdfsTestService = new HdfsTestService(HoodieTestUtils.getDefaultHadoopConf());
dfsCluster = hdfsTestService.start(true);
fs = dfsCluster.getFileSystem();
basePath = fs.getWorkingDirectory().toString();
public static void setUp() throws IOException {
if (shouldUseExternalHdfs()) {
fs = useExternalHdfs();
} else {
hdfsTestService = new HdfsTestService(HoodieTestUtils.getDefaultHadoopConf());
dfsCluster = hdfsTestService.start(true);
fs = dfsCluster.getFileSystem();
}
basePath = fs.getWorkingDirectory() + "/TestHoodieWrapperFileSystem/";
fs.mkdirs(new Path(basePath));
}

@AfterAll
public static void cleanUp() {
if (hdfsTestService != null) {
hdfsTestService.stop();
}
}

@Test
Expand All @@ -58,6 +74,6 @@ public void testCreateImmutableFileInPath() throws IOException {
fs.createImmutableFileInPath(testFile, Option.of(testContent.getBytes()));

assertEquals(1, fs.listStatus(new Path(basePath)).length,
"create same file twice should only have on file exists");
"create same file twice should only have one file exists, files: " + fs.listStatus(new Path(basePath)));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;

import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
Expand Down Expand Up @@ -104,6 +105,9 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.hudi.common.testutils.HoodieTestUtils.getJavaVersion;
import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs;
import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs;
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
Expand Down Expand Up @@ -131,15 +135,21 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
private String spillableBasePath;

@BeforeAll
public static void setUpClass() throws IOException, InterruptedException {
// Append is not supported in LocalFileSystem. HDFS needs to be setup.
hdfsTestService = new HdfsTestService();
fs = hdfsTestService.start(true).getFileSystem();
public static void setUpClass() throws IOException {
if (shouldUseExternalHdfs()) {
fs = useExternalHdfs();
} else {
// Append is not supported in LocalFileSystem. HDFS needs to be setup.
hdfsTestService = new HdfsTestService();
fs = hdfsTestService.start(true).getFileSystem();
}
}

@AfterAll
public static void tearDownClass() {
hdfsTestService.stop();
if (hdfsTestService != null) {
hdfsTestService.stop();
}
}

@BeforeEach
Expand Down Expand Up @@ -2539,7 +2549,10 @@ public void testDataBlockFormatAppendAndReadWithProjectedSchema(
new HashMap<HoodieLogBlockType, Integer>() {{
put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0); // not supported
put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0); // not supported
put(HoodieLogBlockType.PARQUET_DATA_BLOCK, HoodieAvroUtils.gteqAvro1_9() ? 1802 : 1809);
put(HoodieLogBlockType.PARQUET_DATA_BLOCK,
HoodieAvroUtils.gteqAvro1_9()
? getJavaVersion() == 17 || getJavaVersion() == 11 ? 1803 : 1802
: 1809);
}};

List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.datanode.DataNode;

import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
Expand All @@ -53,6 +55,7 @@
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;

import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs;
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
import static org.junit.jupiter.api.Assertions.assertNotEquals;

Expand All @@ -63,6 +66,9 @@ public class TestHoodieLogFormatAppendFailure {

@BeforeAll
public static void setUpClass() throws IOException {
// This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17
Assumptions.assumeFalse(shouldUseExternalHdfs());

// NOTE : The MiniClusterDFS leaves behind the directory under which the cluster was created
baseDir = new File("/tmp/" + UUID.randomUUID());
FileUtil.fullyDelete(baseDir);
Expand All @@ -78,6 +84,9 @@ public static void setUpClass() throws IOException {

@AfterAll
public static void tearDownClass() {
// This test is not supported yet for Java 17 due to MiniDFSCluster can't initialize under Java 17
Assumptions.assumeFalse(shouldUseExternalHdfs());

cluster.shutdown(true);
// Force clean up the directory under which the cluster was created
FileUtil.fullyDelete(baseDir);
Expand Down Expand Up @@ -145,5 +154,4 @@ public void testFailedToGetAppendStreamFromHDFSNameNode()
assertNotEquals(writer.getLogFile().getLogVersion(), logFileVersion);
writer.close();
}

}
Loading