diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java index c531eea35ebd9..1dc925b47ecb9 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java @@ -25,6 +25,7 @@ import org.apache.hudi.cli.TableHeader; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieLogFile; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.log.HoodieLogFormat; import org.apache.hudi.common.table.log.HoodieLogFormat.Reader; import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; @@ -63,7 +64,7 @@ public String showArchivedCommits( throws IOException { System.out.println("===============> Showing only " + limit + " archived commits <==============="); String basePath = HoodieCLI.getTableMetaClient().getBasePath(); - Path archivePath = new Path(basePath + "/.hoodie/.commits_.archive*"); + Path archivePath = new Path(HoodieCLI.getTableMetaClient().getArchivePath() + "/.commits_.archive*"); if (folder != null && !folder.isEmpty()) { archivePath = new Path(basePath + "/.hoodie/" + folder); } @@ -138,9 +139,11 @@ public String showCommits( throws IOException { System.out.println("===============> Showing only " + limit + " archived commits <==============="); - String basePath = HoodieCLI.getTableMetaClient().getBasePath(); + HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); + String basePath = metaClient.getBasePath(); + Path archivePath = new Path(metaClient.getArchivePath() + "/.commits_.archive*"); FileStatus[] fsStatuses = - FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(new Path(basePath + "/.hoodie/.commits_.archive*")); + FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); List allCommits = new ArrayList<>(); for (FileStatus fs : fsStatuses) { // read the archived file diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java index 2db49a25a5165..015743d2f299f 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java @@ -179,7 +179,7 @@ private List convertBootstrapSourceFileMapping(List rows = new ArrayList<>(); for (BootstrapFileMapping mapping : mappingList) { rows.add(new Comparable[] {mapping.getPartitionPath(), mapping.getFileId(), - mapping.getBootstrapBasePath(), mapping.getBootstrapPartitionPath(), mapping.getBoostrapFileStatus().getPath().getUri()}); + mapping.getBootstrapBasePath(), mapping.getBootstrapPartitionPath(), mapping.getBootstrapFileStatus().getPath().getUri()}); } return rows; } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java index 7d5cee6939e86..f69e3d3ec9ac4 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java @@ -54,7 +54,7 @@ public class TestArchivedCommitsCommand extends AbstractShellIntegrationTest { private String tablePath; @BeforeEach - public void init() throws IOException { + public void init() throws Exception { initDFS(); jsc.hadoopConfiguration().addResource(dfs.getConf()); HoodieCLI.conf = dfs.getConf(); @@ -156,7 +156,7 @@ public void testShowArchivedCommits() { * Test for command: show archived commits. */ @Test - public void testShowCommits() throws IOException { + public void testShowCommits() throws Exception { CommandResult cr = getShell().executeCommand("show archived commits"); assertTrue(cr.isSuccess()); final List rows = new ArrayList<>(); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java index 3da718964b49f..2311aaa22f3fa 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java @@ -62,7 +62,7 @@ public class TestCleansCommand extends AbstractShellIntegrationTest { private URL propsFilePath; @BeforeEach - public void init() throws IOException { + public void init() throws Exception { HoodieCLI.conf = jsc.hadoopConfiguration(); String tableName = "test_table"; diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java index 44e2b8097e5f8..c2509b12d4a0a 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java @@ -76,17 +76,19 @@ public void init() throws IOException { "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); } - private LinkedHashMap generateData() { + private LinkedHashMap generateData() throws Exception { // generate data and metadata LinkedHashMap data = new LinkedHashMap<>(); data.put("102", new Integer[] {15, 10}); data.put("101", new Integer[] {20, 10}); data.put("100", new Integer[] {15, 15}); - data.forEach((key, value) -> { + for (Map.Entry entry : data.entrySet()) { + String key = entry.getKey(); + Integer[] value = entry.getValue(); HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, key, jsc.hadoopConfiguration(), Option.of(value[0]), Option.of(value[1])); - }); + } metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); assertEquals(3, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), @@ -138,7 +140,7 @@ private String generateExpectData(int records, Map data) thro * Test case of 'commits show' command. */ @Test - public void testShowCommits() throws IOException { + public void testShowCommits() throws Exception { Map data = generateData(); CommandResult cr = getShell().executeCommand("commits show"); @@ -154,7 +156,7 @@ public void testShowCommits() throws IOException { * Test case of 'commits showarchived' command. */ @Test - public void testShowArchivedCommits() throws IOException { + public void testShowArchivedCommits() throws Exception { // Generate archive HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath) .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) @@ -168,10 +170,12 @@ public void testShowArchivedCommits() throws IOException { data.put("102", new Integer[] {25, 45}); data.put("101", new Integer[] {35, 15}); - data.forEach((key, value) -> { + for (Map.Entry entry : data.entrySet()) { + String key = entry.getKey(); + Integer[] value = entry.getValue(); HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, key, jsc.hadoopConfiguration(), Option.of(value[0]), Option.of(value[1])); - }); + } // archive metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); @@ -198,7 +202,7 @@ public void testShowArchivedCommits() throws IOException { * Test case of 'commit showpartitions' command. */ @Test - public void testShowCommitPartitions() { + public void testShowCommitPartitions() throws Exception { Map data = generateData(); String commitInstant = "101"; @@ -235,7 +239,7 @@ public void testShowCommitPartitions() { * Test case of 'commit showfiles' command. */ @Test - public void testShowCommitFiles() { + public void testShowCommitFiles() throws Exception { Map data = generateData(); String commitInstant = "101"; @@ -270,7 +274,7 @@ public void testShowCommitFiles() { * Test case of 'commits compare' command. */ @Test - public void testCompareCommits() throws IOException { + public void testCompareCommits() throws Exception { Map data = generateData(); String tableName2 = "test_table2"; @@ -278,10 +282,12 @@ public void testCompareCommits() throws IOException { HoodieTestUtils.init(jsc.hadoopConfiguration(), tablePath2, getTableType()); data.remove("102"); - data.forEach((key, value) -> { + for (Map.Entry entry : data.entrySet()) { + String key = entry.getKey(); + Integer[] value = entry.getValue(); HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath2, key, jsc.hadoopConfiguration(), Option.of(value[0]), Option.of(value[1])); - }); + } CommandResult cr = getShell().executeCommand(String.format("commits compare --path %s", tablePath2)); assertTrue(cr.isSuccess()); @@ -298,7 +304,7 @@ public void testCompareCommits() throws IOException { * Test case of 'commits sync' command. */ @Test - public void testSyncCommits() throws IOException { + public void testSyncCommits() throws Exception { Map data = generateData(); String tableName2 = "test_table2"; @@ -306,10 +312,12 @@ public void testSyncCommits() throws IOException { HoodieTestUtils.init(jsc.hadoopConfiguration(), tablePath2, getTableType(), tableName2); data.remove("102"); - data.forEach((key, value) -> { + for (Map.Entry entry : data.entrySet()) { + String key = entry.getKey(); + Integer[] value = entry.getValue(); HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath2, key, jsc.hadoopConfiguration(), Option.of(value[0]), Option.of(value[1])); - }); + } CommandResult cr = getShell().executeCommand(String.format("commits sync --path %s", tablePath2)); assertTrue(cr.isSuccess()); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java index e7a4603019aa4..10197a32518fd 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java @@ -26,12 +26,12 @@ import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest; import org.apache.hudi.client.HoodieWriteClient; import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; -import org.apache.hudi.common.testutils.HoodieTestDataGenerator; -import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; @@ -41,14 +41,18 @@ import org.junit.jupiter.api.Test; import org.springframework.shell.core.CommandResult; -import java.io.File; import java.io.IOException; +import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Stream; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -59,39 +63,37 @@ public class TestRollbacksCommand extends AbstractShellIntegrationTest { @BeforeEach - public void init() throws IOException { + public void init() throws Exception { String tableName = "test_table"; - String tablePath = basePath + File.separator + tableName; + String tablePath = Paths.get(basePath, tableName).toString(); new TableCommand().createTable( tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); - + metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); //Create some commits files and parquet files - String commitTime1 = "100"; - String commitTime2 = "101"; - String commitTime3 = "102"; - HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath); - - // two commit files - HoodieTestUtils.createCommitFiles(tablePath, commitTime1, commitTime2); - // one .inflight commit file - HoodieTestUtils.createInflightCommitFiles(tablePath, commitTime3); - - // generate commit files for commits - for (String commitTime : Arrays.asList(commitTime1, commitTime2, commitTime3)) { - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, commitTime, "file-1"); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, commitTime, "file-2"); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, commitTime, "file-3"); - } - + Map partitionAndFileId = new HashMap() { + { + put(DEFAULT_FIRST_PARTITION_PATH, "file-1"); + put(DEFAULT_SECOND_PARTITION_PATH, "file-2"); + put(DEFAULT_THIRD_PARTITION_PATH, "file-3"); + } + }; + HoodieTestTable.of(metaClient) + .withPartitionMetaFiles(DEFAULT_PARTITION_PATHS) + .addCommit("100") + .withBaseFilesInPartitions(partitionAndFileId) + .addCommit("101") + .withBaseFilesInPartitions(partitionAndFileId) + .addInflightCommit("102") + .withBaseFilesInPartitions(partitionAndFileId); // generate two rollback HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(tablePath) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build(); try (HoodieWriteClient client = getHoodieWriteClient(config)) { // Rollback inflight commit3 and commit2 - client.rollback(commitTime3); - client.rollback(commitTime2); + client.rollback("102"); + client.rollback("101"); } } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java index 35e5b47da368a..cd4f1960232b0 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java @@ -73,17 +73,19 @@ public void init() throws IOException { * Test case for command 'stats wa'. */ @Test - public void testWriteAmplificationStats() { + public void testWriteAmplificationStats() throws Exception { // generate data and metadata Map data = new LinkedHashMap<>(); data.put("100", new Integer[] {15, 10}); data.put("101", new Integer[] {20, 10}); data.put("102", new Integer[] {15, 15}); - data.forEach((key, value) -> { - HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, key, jsc.hadoopConfiguration(), - Option.of(value[0]), Option.of(value[1])); - }); + for (Map.Entry entry : data.entrySet()) { + String k = entry.getKey(); + Integer[] v = entry.getValue(); + HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, k, jsc.hadoopConfiguration(), + Option.of(v[0]), Option.of(v[1])); + } CommandResult cr = getShell().executeCommand("stats wa"); assertTrue(cr.isSuccess()); @@ -93,7 +95,7 @@ public void testWriteAmplificationStats() { DecimalFormat df = new DecimalFormat("#.00"); data.forEach((key, value) -> { // there are two partitions, so need to *2 - rows.add(new Comparable[]{key, value[1] * 2, value[0] * 2, df.format((float) value[0] / value[1])}); + rows.add(new Comparable[] {key, value[1] * 2, value[0] * 2, df.format((float) value[0] / value[1])}); }); int totalWrite = data.values().stream().map(integers -> integers[0] * 2).mapToInt(s -> s).sum(); int totalUpdate = data.values().stream().map(integers -> integers[1] * 2).mapToInt(s -> s).sum(); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java index 2334f12b4aead..b9aa3f7310c3a 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java @@ -27,8 +27,7 @@ import org.apache.hudi.common.table.HoodieTableVersion; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.testutils.FileCreateUtils; -import org.apache.hudi.common.testutils.HoodieTestDataGenerator; -import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -36,11 +35,14 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import java.io.File; import java.io.IOException; -import java.util.Arrays; +import java.nio.file.Paths; import java.util.Properties; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -51,45 +53,31 @@ public class TestUpgradeDowngradeCommand extends AbstractShellIntegrationTest { private String tablePath; @BeforeEach - public void init() throws IOException { + public void init() throws Exception { String tableName = "test_table"; - tablePath = basePath + File.separator + tableName; + tablePath = Paths.get(basePath, tableName).toString(); new TableCommand().createTable( tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); - + metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); //Create some commits files and parquet files - String commitTime1 = "100"; - String commitTime2 = "101"; - HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath); - - // one commit file - HoodieTestUtils.createCommitFiles(tablePath, commitTime1); - // one .inflight commit file - HoodieTestUtils.createInflightCommitFiles(tablePath, commitTime2); - - // generate commit files for commit 100 - for (String commitTime : Arrays.asList(commitTime1)) { - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, commitTime, "file-1"); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, commitTime, "file-2"); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, commitTime, "file-3"); - } - - // generate commit and marker files for inflight commit 101 - for (String commitTime : Arrays.asList(commitTime2)) { - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, commitTime, "file-1"); - FileCreateUtils.createMarkerFile(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, commitTime, "file-1", IOType.MERGE); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, commitTime, "file-2"); - FileCreateUtils.createMarkerFile(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, commitTime, "file-2", IOType.MERGE); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, commitTime, "file-3"); - FileCreateUtils.createMarkerFile(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, commitTime, "file-3", IOType.MERGE); - } + HoodieTestTable.of(metaClient) + .withPartitionMetaFiles(DEFAULT_PARTITION_PATHS) + .addCommit("100") + .withBaseFilesInPartition(DEFAULT_FIRST_PARTITION_PATH, "file-1") + .withBaseFilesInPartition(DEFAULT_SECOND_PARTITION_PATH, "file-2") + .withBaseFilesInPartition(DEFAULT_THIRD_PARTITION_PATH, "file-3") + .addInflightCommit("101") + .withBaseFilesInPartition(DEFAULT_FIRST_PARTITION_PATH, "file-1") + .withBaseFilesInPartition(DEFAULT_SECOND_PARTITION_PATH, "file-2") + .withBaseFilesInPartition(DEFAULT_THIRD_PARTITION_PATH, "file-3") + .withMarkerFile(DEFAULT_FIRST_PARTITION_PATH, "file-1", IOType.MERGE) + .withMarkerFile(DEFAULT_SECOND_PARTITION_PATH, "file-2", IOType.MERGE) + .withMarkerFile(DEFAULT_THIRD_PARTITION_PATH, "file-3", IOType.MERGE); } @Test public void testDowngradeCommand() throws Exception { - metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); - // update hoodie.table.version to 1 metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE); try (FSDataOutputStream os = metaClient.getFs().create(new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE), true)) { @@ -98,7 +86,7 @@ public void testDowngradeCommand() throws Exception { metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); // verify marker files for inflight commit exists - for (String partitionPath : HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS) { + for (String partitionPath : DEFAULT_PARTITION_PATHS) { assertEquals(1, FileCreateUtils.getTotalMarkerFileCount(tablePath, partitionPath, "101", IOType.MERGE)); } @@ -112,7 +100,7 @@ public void testDowngradeCommand() throws Exception { assertTableVersionFromPropertyFile(); // verify marker files are non existant - for (String partitionPath : HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS) { + for (String partitionPath : DEFAULT_PARTITION_PATHS) { assertEquals(0, FileCreateUtils.getTotalMarkerFileCount(tablePath, partitionPath, "101", IOType.MERGE)); } } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java index aa87cdd1a861a..adc61457a2d30 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java @@ -26,17 +26,21 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; -import org.apache.hudi.common.testutils.HoodieTestDataGenerator; -import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.testutils.HoodieTestTable; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.springframework.shell.core.CommandResult; -import java.io.File; import java.io.IOException; -import java.util.Arrays; - +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; + +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -48,43 +52,45 @@ */ public class ITTestCommitsCommand extends AbstractShellIntegrationTest { - private String tablePath; - @BeforeEach public void init() throws IOException { - String tableName = "test_table"; - tablePath = basePath + File.separator + tableName; + String tableName = "test_table_" + ITTestCommitsCommand.class.getName(); + String tablePath = Paths.get(basePath, tableName).toString(); HoodieCLI.conf = jsc.hadoopConfiguration(); // Create table and connect new TableCommand().createTable( tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); + metaClient.setBasePath(tablePath); + metaClient = HoodieTableMetaClient.reload(metaClient); } /** * Test case of 'commit rollback' command. */ @Test - public void testRollbackCommit() throws IOException { + public void testRollbackCommit() throws Exception { //Create some commits files and parquet files - String commitTime1 = "100"; - String commitTime2 = "101"; - String commitTime3 = "102"; - HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath); - - // three commit files - HoodieTestUtils.createCommitFiles(tablePath, commitTime1, commitTime2, commitTime3); - - // generate commit files for commits - for (String commitTime : Arrays.asList(commitTime1, commitTime2, commitTime3)) { - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, commitTime, "file-1"); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, commitTime, "file-2"); - HoodieTestUtils.createDataFile(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, commitTime, "file-3"); - } + Map partitionAndFileId = new HashMap() { + { + put(DEFAULT_FIRST_PARTITION_PATH, "file-1"); + put(DEFAULT_SECOND_PARTITION_PATH, "file-2"); + put(DEFAULT_THIRD_PARTITION_PATH, "file-3"); + } + }; + final String rollbackCommit = "102"; + HoodieTestTable.of(metaClient) + .withPartitionMetaFiles(DEFAULT_PARTITION_PATHS) + .addCommit("100") + .withBaseFilesInPartitions(partitionAndFileId) + .addCommit("101") + .withBaseFilesInPartitions(partitionAndFileId) + .addCommit(rollbackCommit) + .withBaseFilesInPartitions(partitionAndFileId); CommandResult cr = getShell().executeCommand(String.format("commit rollback --commit %s --sparkMaster %s --sparkMemory %s", - commitTime3, "local", "4G")); + rollbackCommit, "local", "4G")); assertTrue(cr.isSuccess()); metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java index b86e7483879bd..f4d8019be0599 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java @@ -23,18 +23,14 @@ import org.apache.hudi.common.model.HoodieWriteStat; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieTimeline; +import org.apache.hudi.common.testutils.FileCreateUtils; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; -import org.apache.hudi.common.testutils.HoodieTestUtils; -import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieIOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; @@ -42,6 +38,9 @@ import java.util.Map; import java.util.UUID; +import static org.apache.hudi.common.testutils.FileCreateUtils.baseFileName; +import static org.apache.hudi.common.util.CollectionUtils.createImmutableList; + /** * Class to be used in tests to keep generating test inserts and updates against a corpus. */ @@ -62,67 +61,53 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator { /** * Create a commit file with default CommitMetadata. */ - public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration) { + public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration) throws Exception { createCommitFileWithMetadata(basePath, commitTime, configuration, Option.empty(), Option.empty()); } public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration, - Option writes, Option updates) { + Option writes, Option updates) throws Exception { createCommitFileWithMetadata(basePath, commitTime, configuration, UUID.randomUUID().toString(), UUID.randomUUID().toString(), writes, updates); } public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration, - String fileId1, String fileId2, Option writes, Option updates) { - Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime), - HoodieTimeline.makeRequestedCommitFileName(commitTime)) - .forEach(f -> { - Path commitFile = new Path( - basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f); - FSDataOutputStream os = null; - try { - FileSystem fs = FSUtils.getFs(basePath, configuration); - os = fs.create(commitFile, true); - // Generate commitMetadata - HoodieCommitMetadata commitMetadata = - generateCommitMetadata(basePath, commitTime, fileId1, fileId2, writes, updates); - // Write empty commit metadata - os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); - } catch (IOException ioe) { - throw new HoodieIOException(ioe.getMessage(), ioe); - } finally { - if (null != os) { - try { - os.close(); - } catch (IOException e) { - throw new HoodieIOException(e.getMessage(), e); - } - } - } - }); + String fileId1, String fileId2, Option writes, Option updates) throws Exception { + List commitFileNames = Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime), + HoodieTimeline.makeRequestedCommitFileName(commitTime)); + for (String name : commitFileNames) { + Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + name); + try (FSDataOutputStream os = FSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { + // Generate commitMetadata + HoodieCommitMetadata commitMetadata = + generateCommitMetadata(basePath, commitTime, fileId1, fileId2, writes, updates); + // Write empty commit metadata + os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + } + } } /** * Generate commitMetadata in path. */ - public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime) throws IOException { + public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime) throws Exception { return generateCommitMetadata(basePath, commitTime, Option.empty(), Option.empty()); } public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime, - Option writes, Option updates) throws IOException { + Option writes, Option updates) throws Exception { return generateCommitMetadata(basePath, commitTime, UUID.randomUUID().toString(), UUID.randomUUID().toString(), writes, updates); } public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime, String fileId1, - String fileId2, Option writes, Option updates) throws IOException { - String file1P0C0 = HoodieTestUtils.createDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, commitTime, fileId1); - String file1P1C0 = HoodieTestUtils.createDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, commitTime, fileId2); + String fileId2, Option writes, Option updates) throws Exception { + FileCreateUtils.createBaseFile(basePath, DEFAULT_FIRST_PARTITION_PATH, commitTime, fileId1); + FileCreateUtils.createBaseFile(basePath, DEFAULT_SECOND_PARTITION_PATH, commitTime, fileId2); return generateCommitMetadata(new HashMap>() { { - put(DEFAULT_FIRST_PARTITION_PATH, CollectionUtils.createImmutableList(file1P0C0)); - put(DEFAULT_SECOND_PARTITION_PATH, CollectionUtils.createImmutableList(file1P1C0)); + put(DEFAULT_FIRST_PARTITION_PATH, createImmutableList(baseFileName(DEFAULT_FIRST_PARTITION_PATH, fileId1))); + put(DEFAULT_SECOND_PARTITION_PATH, createImmutableList(baseFileName(DEFAULT_SECOND_PARTITION_PATH, fileId2))); } }, writes, updates); } diff --git a/hudi-client/src/main/java/org/apache/hudi/metrics/Metrics.java b/hudi-client/src/main/java/org/apache/hudi/metrics/Metrics.java index 2b524a74ca4bd..a9cf14e69f405 100644 --- a/hudi-client/src/main/java/org/apache/hudi/metrics/Metrics.java +++ b/hudi-client/src/main/java/org/apache/hudi/metrics/Metrics.java @@ -97,8 +97,7 @@ public static void registerGauge(String metricName, final long value) { guage.setValue(value); } catch (Exception e) { // Here we catch all exception, so the major upsert pipeline will not be affected if the - // metrics system - // has some issues. + // metrics system has some issues. LOG.error("Failed to send metrics: ", e); } } diff --git a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java index ba4ffb4731e23..c9a03c74d6ba5 100644 --- a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java +++ b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java @@ -33,7 +33,7 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; -import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.testutils.RawTripTestPayload; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; @@ -53,20 +53,22 @@ import org.apache.hudi.table.action.commit.WriteHelper; import org.apache.hudi.testutils.HoodieClientTestBase; import org.apache.hudi.testutils.HoodieClientTestUtils; +import org.apache.hudi.testutils.HoodieWriteableTestTable; import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -102,6 +104,12 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase { private static final Logger LOG = LogManager.getLogger(TestHoodieClientOnCopyOnWriteStorage.class); + private HoodieTestTable testTable; + + @BeforeEach + public void setUpTestTable() { + testTable = HoodieWriteableTestTable.of(metaClient); + } /** * Test Auto Commit behavior for HoodieWriteClient insert API. @@ -170,10 +178,10 @@ private void testAutoCommit(Function3, HoodieWriteClient, J JavaRDD result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn, isPrepped, false, numRecords); - assertFalse(HoodieTestUtils.doesCommitExist(basePath, newCommitTime), + assertFalse(testTable.commitExists(newCommitTime), "If Autocommit is false, then commit should not be made automatically"); assertTrue(client.commit(newCommitTime, result), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, newCommitTime), + assertTrue(testTable.commitExists(newCommitTime), "After explicit commit, commit file should be created"); } } @@ -985,7 +993,7 @@ public void testCommitWritesRelativePaths() throws Exception { JavaRDD result = client.bulkInsert(writeRecords, instantTime); assertTrue(client.commit(instantTime, result), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertTrue(testTable.commitExists(instantTime), "After explicit commit, commit file should be created"); // Get parquet file paths from commit metadata @@ -998,16 +1006,14 @@ public void testCommitWritesRelativePaths() throws Exception { Collection commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values(); // Read from commit file - String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); - FileInputStream inputStream = new FileInputStream(filename); - String everything = FileIOUtils.readAsUTFString(inputStream); - HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class); - HashMap paths = metadata.getFileIdAndFullPaths(basePath); - inputStream.close(); - - // Compare values in both to make sure they are equal. - for (String pathName : paths.values()) { - assertTrue(commitPathNames.contains(pathName)); + try (FSDataInputStream inputStream = fs.open(testTable.getCommitFilePath(instantTime))) { + String everything = FileIOUtils.readAsUTFString(inputStream); + HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class); + HashMap paths = metadata.getFileIdAndFullPaths(basePath); + // Compare values in both to make sure they are equal. + for (String pathName : paths.values()) { + assertTrue(commitPathNames.contains(pathName)); + } } } } @@ -1017,65 +1023,61 @@ public void testCommitWritesRelativePaths() throws Exception { */ @Test public void testMetadataStatsOnCommit() throws Exception { - HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build(); HoodieWriteClient client = getHoodieWriteClient(cfg); - HoodieTableMetaClient metaClient = new HoodieTableMetaClient(hadoopConf, basePath); - - String instantTime = "000"; - client.startCommitWithTime(instantTime); - List records = dataGen.generateInserts(instantTime, 200); - JavaRDD writeRecords = jsc.parallelize(records, 1); + String instantTime0 = "000"; + client.startCommitWithTime(instantTime0); - JavaRDD result = client.bulkInsert(writeRecords, instantTime); + List records0 = dataGen.generateInserts(instantTime0, 200); + JavaRDD writeRecords0 = jsc.parallelize(records0, 1); + JavaRDD result0 = client.bulkInsert(writeRecords0, instantTime0); - assertTrue(client.commit(instantTime, result), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertTrue(client.commit(instantTime0, result0), "Commit should succeed"); + assertTrue(testTable.commitExists(instantTime0), "After explicit commit, commit file should be created"); // Read from commit file - String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); - FileInputStream inputStream = new FileInputStream(filename); - String everything = FileIOUtils.readAsUTFString(inputStream); - HoodieCommitMetadata metadata = - HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class); - int inserts = 0; - for (Map.Entry> pstat : metadata.getPartitionToWriteStats().entrySet()) { - for (HoodieWriteStat stat : pstat.getValue()) { - inserts += stat.getNumInserts(); + try (FSDataInputStream inputStream = fs.open(testTable.getCommitFilePath(instantTime0))) { + String everything = FileIOUtils.readAsUTFString(inputStream); + HoodieCommitMetadata metadata = + HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class); + int inserts = 0; + for (Map.Entry> pstat : metadata.getPartitionToWriteStats().entrySet()) { + for (HoodieWriteStat stat : pstat.getValue()) { + inserts += stat.getNumInserts(); + } } + assertEquals(200, inserts); } - assertEquals(200, inserts); // Update + Inserts such that they just expand file1 - instantTime = "001"; - client.startCommitWithTime(instantTime); + String instantTime1 = "001"; + client.startCommitWithTime(instantTime1); - records = dataGen.generateUpdates(instantTime, records); - writeRecords = jsc.parallelize(records, 1); - result = client.upsert(writeRecords, instantTime); + List records1 = dataGen.generateUpdates(instantTime1, records0); + JavaRDD writeRecords1 = jsc.parallelize(records1, 1); + JavaRDD result1 = client.upsert(writeRecords1, instantTime1); - assertTrue(client.commit(instantTime, result), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertTrue(client.commit(instantTime1, result1), "Commit should succeed"); + assertTrue(testTable.commitExists(instantTime1), "After explicit commit, commit file should be created"); // Read from commit file - filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); - inputStream = new FileInputStream(filename); - everything = FileIOUtils.readAsUTFString(inputStream); - metadata = HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class); - inserts = 0; - int upserts = 0; - for (Map.Entry> pstat : metadata.getPartitionToWriteStats().entrySet()) { - for (HoodieWriteStat stat : pstat.getValue()) { - inserts += stat.getNumInserts(); - upserts += stat.getNumUpdateWrites(); + try (FSDataInputStream inputStream = fs.open(testTable.getCommitFilePath(instantTime1))) { + String everything = FileIOUtils.readAsUTFString(inputStream); + HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class); + int inserts = 0; + int upserts = 0; + for (Map.Entry> pstat : metadata.getPartitionToWriteStats().entrySet()) { + for (HoodieWriteStat stat : pstat.getValue()) { + inserts += stat.getNumInserts(); + upserts += stat.getNumUpdateWrites(); + } } + assertEquals(0, inserts); + assertEquals(200, upserts); } - assertEquals(0, inserts); - assertEquals(200, upserts); - } /** @@ -1095,13 +1097,13 @@ public void testConsistencyCheckDuringFinalize(boolean enableOptimisticConsisten metaClient.getFs().delete(result.getKey(), false); if (!enableOptimisticConsistencyGuard) { assertTrue(client.commit(instantTime, result.getRight()), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertTrue(testTable.commitExists(instantTime), "After explicit commit, commit file should be created"); // Marker directory must be removed assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); } else { // with optimistic, first client.commit should have succeeded. - assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertTrue(testTable.commitExists(instantTime), "After explicit commit, commit file should be created"); // Marker directory must be removed assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); @@ -1124,13 +1126,13 @@ private void testRollbackAfterConsistencyCheckFailureUsingFileList(boolean rollb if (!enableOptimisticConsistencyGuard) { // Rollback of this commit should succeed with FailSafeCG client.rollback(instantTime); - assertFalse(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertFalse(testTable.commitExists(instantTime), "After explicit rollback, commit file should not be present"); // Marker directory must be removed after rollback assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); } else { // if optimistic CG is enabled, commit should have succeeded. - assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertTrue(testTable.commitExists(instantTime), "With optimistic CG, first commit should succeed. commit file should be present"); // Marker directory must be removed after rollback assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); @@ -1145,7 +1147,7 @@ private void testRollbackAfterConsistencyCheckFailureUsingFileList(boolean rollb } else { // rollback of a completed commit should succeed if using list based rollback client.rollback(instantTime); - assertFalse(HoodieTestUtils.doesCommitExist(basePath, instantTime), + assertFalse(testTable.commitExists(instantTime), "After explicit rollback, commit file should not be present"); } } @@ -1216,7 +1218,7 @@ public void testMultiOperationsPerCommit() throws IOException { JavaRDD writeRecords = jsc.parallelize(dataGen.generateInserts(firstInstantTime, numRecords), 1); JavaRDD result = client.bulkInsert(writeRecords, firstInstantTime); assertTrue(client.commit(firstInstantTime, result), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, firstInstantTime), + assertTrue(testTable.commitExists(firstInstantTime), "After explicit commit, commit file should be created"); // Check the entire dataset has all records still @@ -1235,7 +1237,7 @@ public void testMultiOperationsPerCommit() throws IOException { JavaRDD inserts = client.bulkInsert(insertRecords, nextInstantTime); JavaRDD upserts = client.upsert(updateRecords, nextInstantTime); assertTrue(client.commit(nextInstantTime, inserts.union(upserts)), "Commit should succeed"); - assertTrue(HoodieTestUtils.doesCommitExist(basePath, firstInstantTime), + assertTrue(testTable.commitExists(firstInstantTime), "After explicit commit, commit file should be created"); int totalRecords = 2 * numRecords; assertEquals(totalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(), diff --git a/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java b/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java index c1b5296b5243d..a4c7b5b7f4cb4 100644 --- a/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java +++ b/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java @@ -559,13 +559,13 @@ public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean) throw : cleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least 1 file"); if (enableBootstrapSourceClean) { HoodieFileStatus fstatus = - bootstrapMapping.get(p0).get(0).getBoostrapFileStatus(); + bootstrapMapping.get(p0).get(0).getBootstrapFileStatus(); // This ensures full path is recorded in metadata. assertTrue(cleanStat.getSuccessDeleteBootstrapBaseFiles().contains(fstatus.getPath().getUri()), "Successful delete files were " + cleanStat.getSuccessDeleteBootstrapBaseFiles() + " but did not contain " + fstatus.getPath().getUri()); assertFalse(Files.exists(Paths.get(bootstrapMapping.get( - p0).get(0).getBoostrapFileStatus().getPath().getUri()))); + p0).get(0).getBootstrapFileStatus().getPath().getUri()))); } cleanStat = getCleanStat(hoodieCleanStatsTwo, p1); String file2P0C1 = partitionAndFileId002.get(p0); @@ -579,13 +579,13 @@ public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean) throw : cleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least 1 file"); if (enableBootstrapSourceClean) { HoodieFileStatus fstatus = - bootstrapMapping.get(p1).get(0).getBoostrapFileStatus(); + bootstrapMapping.get(p1).get(0).getBootstrapFileStatus(); // This ensures full path is recorded in metadata. assertTrue(cleanStat.getSuccessDeleteBootstrapBaseFiles().contains(fstatus.getPath().getUri()), "Successful delete files were " + cleanStat.getSuccessDeleteBootstrapBaseFiles() + " but did not contain " + fstatus.getPath().getUri()); assertFalse(Files.exists(Paths.get(bootstrapMapping.get( - p1).get(0).getBoostrapFileStatus().getPath().getUri()))); + p1).get(0).getBootstrapFileStatus().getPath().getUri()))); } // make next commit, with 2 updates to existing files, and 1 insert @@ -928,7 +928,7 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3)); if (enableBootstrapSourceClean) { assertFalse(Files.exists(Paths.get(bootstrapMapping.get( - p0).get(0).getBoostrapFileStatus().getPath().getUri()))); + p0).get(0).getBootstrapFileStatus().getPath().getUri()))); } // No cleaning on partially written file, with no commit. @@ -968,7 +968,7 @@ private Map> generateBootstrapIndexAndSourceD for (Map.Entry> entry : bootstrapMapping.entrySet()) { new File(sourcePath.toString() + "/" + entry.getKey()).mkdirs(); - assertTrue(new File(entry.getValue().get(0).getBoostrapFileStatus().getPath().getUri()).createNewFile()); + assertTrue(new File(entry.getValue().get(0).getBootstrapFileStatus().getPath().getUri()).createNewFile()); } return bootstrapMapping; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index 7dc0f69d28840..d4a77b0822847 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -391,7 +391,7 @@ public void close() { } /** - * Boostrap Index Writer to build bootstrap index. + * Bootstrap Index Writer to build bootstrap index. */ public static class HFileBootstrapIndexWriter extends BootstrapIndex.IndexWriter { @@ -443,7 +443,7 @@ private void writeNextPartition(String partitionPath, String bootstrapPartitionP bootstrapPartitionMetadata.setPartitionPath(partitionPath); bootstrapPartitionMetadata.setFileIdToBootstrapFile( bootstrapFileMappings.stream().map(m -> Pair.of(m.getFileId(), - m.getBoostrapFileStatus())).collect(Collectors.toMap(Pair::getKey, Pair::getValue))); + m.getBootstrapFileStatus())).collect(Collectors.toMap(Pair::getKey, Pair::getValue))); Option bytes = TimelineMetadataUtils.serializeAvroMetadata(bootstrapPartitionMetadata, HoodieBootstrapPartitionMetadata.class); if (bytes.isPresent()) { indexByPartitionWriter @@ -459,14 +459,14 @@ private void writeNextPartition(String partitionPath, String bootstrapPartitionP /** * Write next source file to hudi file-id. Entries are expected to be appended in hudi file-group id * order. - * @param mapping boostrap source file mapping. + * @param mapping bootstrap source file mapping. */ private void writeNextSourceFileMapping(BootstrapFileMapping mapping) { try { HoodieBootstrapFilePartitionInfo srcFilePartitionInfo = new HoodieBootstrapFilePartitionInfo(); srcFilePartitionInfo.setPartitionPath(mapping.getPartitionPath()); srcFilePartitionInfo.setBootstrapPartitionPath(mapping.getBootstrapPartitionPath()); - srcFilePartitionInfo.setBootstrapFileStatus(mapping.getBoostrapFileStatus()); + srcFilePartitionInfo.setBootstrapFileStatus(mapping.getBootstrapFileStatus()); KeyValue kv = new KeyValue(getFileGroupKey(mapping.getFileGroupId()).getBytes(), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, TimelineMetadataUtils.serializeAvroMetadata(srcFilePartitionInfo, diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/BootstrapFileMapping.java b/hudi-common/src/main/java/org/apache/hudi/common/model/BootstrapFileMapping.java index a9642c71b3ac2..428d41104e3a3 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/BootstrapFileMapping.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/BootstrapFileMapping.java @@ -29,17 +29,17 @@ public class BootstrapFileMapping implements Serializable, Comparable addFilesToView(FileStatus[] statuses) { if (!isPartitionAvailableInStore(partition)) { if (bootstrapIndex.useIndex()) { try (BootstrapIndex.IndexReader reader = bootstrapIndex.createReader()) { - LOG.info("Boostrap Index available for partition " + partition); + LOG.info("Bootstrap Index available for partition " + partition); List sourceFileMappings = reader.getSourceFileMappingForPartition(partition); addBootstrapBaseFileMapping(sourceFileMappings.stream() .map(s -> new BootstrapBaseFileMapping(new HoodieFileGroupId(s.getPartitionPath(), - s.getFileId()), s.getBoostrapFileStatus()))); + s.getFileId()), s.getBootstrapFileStatus()))); } } storePartitionView(partition, value); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java index 2c2f919cdf58a..756a48a3924bb 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java @@ -51,43 +51,63 @@ */ public class BoundedInMemoryQueue implements Iterable { - // interval used for polling records in the queue. + /** Interval used for polling records in the queue. **/ public static final int RECORD_POLL_INTERVAL_SEC = 1; - // rate used for sampling records to determine avg record size in bytes. + + /** Rate used for sampling records to determine avg record size in bytes. **/ public static final int RECORD_SAMPLING_RATE = 64; - // maximum records that will be cached + + /** Maximum records that will be cached **/ private static final int RECORD_CACHING_LIMIT = 128 * 1024; + private static final Logger LOG = LogManager.getLogger(BoundedInMemoryQueue.class); - // It indicates number of records to cache. We will be using sampled record's average size to - // determine how many - // records we should cache and will change (increase/decrease) permits accordingly. + + /** + * It indicates number of records to cache. We will be using sampled record's average size to + * determine how many records we should cache and will change (increase/decrease) permits accordingly. + */ public final Semaphore rateLimiter = new Semaphore(1); - // used for sampling records with "RECORD_SAMPLING_RATE" frequency. + + /** Used for sampling records with "RECORD_SAMPLING_RATE" frequency. **/ public final AtomicLong samplingRecordCounter = new AtomicLong(-1); - // internal queue for records. + + /** Internal queue for records. **/ private final LinkedBlockingQueue> queue = new LinkedBlockingQueue<>(); - // maximum amount of memory to be used for queueing records. + + /** Maximum amount of memory to be used for queueing records. **/ private final long memoryLimit; - // it holds the root cause of the exception in case either queueing records (consuming from - // inputIterator) fails or - // thread reading records from queue fails. + + /** + * it holds the root cause of the exception in case either queueing records + * (consuming from inputIterator) fails or thread reading records from queue fails. + */ private final AtomicReference hasFailed = new AtomicReference<>(null); - // used for indicating that all the records from queue are read successfully. + + /** Used for indicating that all the records from queue are read successfully. **/ private final AtomicBoolean isReadDone = new AtomicBoolean(false); - // used for indicating that all records have been enqueued + + /** used for indicating that all records have been enqueued. **/ private final AtomicBoolean isWriteDone = new AtomicBoolean(false); - // Function to transform the input payload to the expected output payload + + /** Function to transform the input payload to the expected output payload. **/ private final Function transformFunction; - // Payload Size Estimator + + /** Payload Size Estimator. **/ private final SizeEstimator payloadSizeEstimator; - // Singleton (w.r.t this instance) Iterator for this queue + + /** Singleton (w.r.t this instance) Iterator for this queue. **/ private final QueueIterator iterator; - // indicates rate limit (number of records to cache). it is updated whenever there is a change - // in avg record size. + + /** + * indicates rate limit (number of records to cache). it is updated + * whenever there is a change in avg record size. + */ public int currentRateLimit = 1; - // indicates avg record size in bytes. It is updated whenever a new record is sampled. + + /** Indicates avg record size in bytes. It is updated whenever a new record is sampled. **/ public long avgRecordSizeInBytes = 0; - // indicates number of samples collected so far. + + /** Indicates number of samples collected so far. **/ private long numSamples = 0; /** diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueueProducer.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueueProducer.java index 31c3a3cc1dc1b..ecea9f2193c76 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueueProducer.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueueProducer.java @@ -19,7 +19,7 @@ package org.apache.hudi.common.util.queue; /** - * Producer for BoundedInMemoryQueue. Memory Bounded Buffer supports multiple producers single consumer pattern. + * Producer for {@link BoundedInMemoryQueue}. Memory Bounded Buffer supports multiple producers single consumer pattern. * * @param Input type for buffer items produced */ diff --git a/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java b/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java index ecfb59da8593b..bbe75cf893770 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java @@ -156,7 +156,7 @@ private void validateBootstrapIndex(Map> boot assertEquals(x.getFileId(), res.getFileId()); assertEquals(x.getPartitionPath(), res.getPartitionPath()); assertEquals(BOOTSTRAP_BASE_PATH, res.getBootstrapBasePath()); - assertEquals(x.getBoostrapFileStatus(), res.getBoostrapFileStatus()); + assertEquals(x.getBootstrapFileStatus(), res.getBootstrapFileStatus()); assertEquals(x.getBootstrapPartitionPath(), res.getBootstrapPartitionPath()); }); }); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java index 7f00f5a1419e4..a1dbe086e5987 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java @@ -172,19 +172,6 @@ public static void createCommitFiles(String basePath, String... instantTimes) th } } - /** - * @deprecated Use {@link HoodieTestTable} instead. - */ - public static void createInflightCommitFiles(String basePath, String... instantTimes) throws IOException { - - for (String instantTime : instantTimes) { - new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeRequestedCommitFileName(instantTime)).createNewFile(); - new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeInflightCommitFileName( - instantTime)).createNewFile(); - } - } - public static void createPendingCleanFiles(HoodieTableMetaClient metaClient, String... instantTimes) { for (String instantTime : instantTimes) { Arrays.asList(HoodieTimeline.makeRequestedCleanerFileName(instantTime), @@ -257,22 +244,6 @@ public static void createCompactionRequest(HoodieTableMetaClient metaClient, Str TimelineMetadataUtils.serializeCompactionPlan(plan)); } - /** - * @deprecated Use {@link HoodieTestTable} instead. - */ - public static String getCommitFilePath(String basePath, String instantTime) { - return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.COMMIT_EXTENSION; - } - - /** - * @deprecated Use {@link HoodieTestTable} instead. - */ - public static boolean doesCommitExist(String basePath, String instantTime) { - return new File( - basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.COMMIT_EXTENSION) - .exists(); - } - public static void createCleanFiles(HoodieTableMetaClient metaClient, String basePath, String instantTime, Configuration configuration) throws IOException { diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java b/hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java index 3a07cc597212b..4486a4286c43f 100644 --- a/hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java +++ b/hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java @@ -35,6 +35,6 @@ public ExampleDataSchemaProvider(TypedProperties props, JavaSparkContext jssc) { @Override public Schema getSourceSchema() { - return org.apache.hudi.examples.common.HoodieExampleDataGenerator.avroSchema; + return HoodieExampleDataGenerator.avroSchema; } } diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java b/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java index 6f96fe966d77a..655e549d905a4 100644 --- a/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java +++ b/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java @@ -51,8 +51,10 @@ * Simple examples of #{@link HoodieWriteClient}. * * To run this example, you should - * 1. For running in IDE, set VM options `-Dspark.master=local[2]` - * 2. For running in shell, using `spark-submit` + *
+ *   1. For running in IDE, set VM options `-Dspark.master=local[2]`;
+ *   2. For running in shell, using `spark-submit`.
+ *
* * Usage: HoodieWriteClientExample * and describe root path of hudi and table name diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java index aba1d54e3ffe2..e432f9dc423f5 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java @@ -132,6 +132,13 @@ public void testRunHoodieJavaApp(String command, String hiveTableName, String ta String hdfsPath = "/" + hiveTableName; String hdfsUrl = HDFS_BASE_URL + hdfsPath; + // Delete hdfs path if it exists + try { + executeCommandStringInDocker(ADHOC_1_CONTAINER, "hdfs dfs -rm -r " + hdfsUrl, true); + } catch (AssertionError ex) { + // Path not exists, pass + } + // Drop Table if it exists try { dropHiveTables(hiveTableName, tableType); diff --git a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java index b074a82752bfe..8936e03df8778 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java @@ -56,7 +56,7 @@ public static class DataGenerator { private static final String[] DEFAULT_PARTITION_PATHS = {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH}; static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"triprec\",\"fields\": [ " - + "{\"name\": \"ts\",\"type\": \"double\"},{\"name\": \"uuid\", \"type\": \"string\"}," + + "{\"name\": \"ts\",\"type\": \"long\"},{\"name\": \"uuid\", \"type\": \"string\"}," + "{\"name\": \"rider\", \"type\": \"string\"},{\"name\": \"driver\", \"type\": \"string\"}," + "{\"name\": \"begin_lat\", \"type\": \"double\"},{\"name\": \"begin_lon\", \"type\": \"double\"}," + "{\"name\": \"end_lat\", \"type\": \"double\"},{\"name\": \"end_lon\", \"type\": \"double\"}," diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index 450bd73e455f5..8cab7c16e6d99 100644 --- a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -29,8 +29,11 @@ import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.avro.HoodieAvroUtils import org.apache.hudi.client.{HoodieWriteClient, WriteStatus} import org.apache.hudi.common.config.TypedProperties -import org.apache.hudi.common.model.{HoodieRecordPayload, HoodieTableType, WriteOperationType} -import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient} +import org.apache.hudi.common.model.HoodieRecordPayload +import org.apache.hudi.common.model.HoodieTableType +import org.apache.hudi.common.model.WriteOperationType +import org.apache.hudi.common.table.HoodieTableConfig +import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.timeline.HoodieActiveTimeline import org.apache.hudi.common.util.ReflectionUtils import org.apache.hudi.config.HoodieBootstrapConfig.{BOOTSTRAP_BASE_PATH_PROP, BOOTSTRAP_INDEX_CLASS_PROP, DEFAULT_BOOTSTRAP_INDEX_CLASS} @@ -107,8 +110,10 @@ private[hudi] object HoodieSparkSqlWriter { handleSaveModes(mode, basePath, tableConfig, tblName, operation, fs) // Create the table if not present if (!tableExists) { + val archiveLogFolder = parameters.getOrElse( + HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, "archived") val tableMetaClient = HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, - HoodieTableType.valueOf(tableType), tblName, "archived", parameters(PAYLOAD_CLASS_OPT_KEY), + HoodieTableType.valueOf(tableType), tblName, archiveLogFolder, parameters(PAYLOAD_CLASS_OPT_KEY), null.asInstanceOf[String]) tableConfig = tableMetaClient.getTableConfig } @@ -244,8 +249,10 @@ private[hudi] object HoodieSparkSqlWriter { } if (!tableExists) { + val archiveLogFolder = parameters.getOrElse( + HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, "archived") HoodieTableMetaClient.initTableTypeWithBootstrap(sparkContext.hadoopConfiguration, path, - HoodieTableType.valueOf(tableType), tableName, "archived", parameters(PAYLOAD_CLASS_OPT_KEY), + HoodieTableType.valueOf(tableType), tableName, archiveLogFolder, parameters(PAYLOAD_CLASS_OPT_KEY), null, bootstrapIndexClass, bootstrapBasePath) } diff --git a/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java b/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java index ad4244362570d..14f36d469d855 100644 --- a/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java +++ b/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java @@ -309,7 +309,7 @@ public void testMetadataBootstrapWithUpdatesMOR() throws Exception { } @Test - public void testFullBoostrapOnlyCOW() throws Exception { + public void testFullBootstrapOnlyCOW() throws Exception { testBootstrapCommon(true, false, EffectiveMode.FULL_BOOTSTRAP_MODE); } @@ -319,7 +319,7 @@ public void testFullBootstrapWithUpdatesMOR() throws Exception { } @Test - public void testMetaAndFullBoostrapCOW() throws Exception { + public void testMetaAndFullBootstrapCOW() throws Exception { testBootstrapCommon(true, false, EffectiveMode.MIXED_BOOTSTRAP_MODE); } diff --git a/scripts/checkout_pr.sh b/scripts/checkout_pr.sh new file mode 100755 index 0000000000000..fd1878b1f8d04 --- /dev/null +++ b/scripts/checkout_pr.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Usage: ./scripts/checkout_pr.sh +# +# Checkout a PR given the PR number into a local branch. PR branches are named +# using the convention "pull/", to enable pr_push_command.sh to work +# in tandem. +# +set -eou pipefail + +function printUsage { + echo "Usage: $(basename "${0}") [-r REMOTE] [-f] " 2>&1 + echo ' -r REMOTE remote to grab PR from (default: apache)' + echo ' -f force overwrite of local branch (default: fail if exists)' + exit 1 +} + +if [[ ${#} -eq 0 ]]; then + printUsage +fi + +REMOTE="origin" +FORCE="" +while getopts ":r:f" arg; do + case "${arg}" in + r) + REMOTE="${OPTARG}" + ;; + f) + FORCE="--force" + ;; + ?) + printUsage + ;; + esac +done +shift "$(($OPTIND -1))" + +# Debug output +PR_NUM=$1 + +# Checkout the PR into a local branch. +git fetch ${REMOTE} pull/${PR_NUM}/head:pull/${PR_NUM} ${FORCE} +git checkout pull/${PR_NUM} diff --git a/scripts/pr_push_command.sh b/scripts/pr_push_command.sh new file mode 100755 index 0000000000000..433f5ef3acc32 --- /dev/null +++ b/scripts/pr_push_command.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Usage: ./scripts/pr_push_command.sh +# +# When run from a PR branch checked out by checkout_pr.sh, provides a command +# that can push local PR branch to its corresponding remote. +# NOTE: Always double check correctness of command, before pushing. +# + +set -eou pipefail + +CURR_BRANCH=$(git status | grep "On branch" | awk '{print $NF}') +REMOTE="apache" + +# Get PR number from branch +if [[ ${CURR_BRANCH} = pull/* ]] +then + PR_NUM=$(echo "${CURR_BRANCH}" | awk -F'/' '{print $NF}') +else + echo "Not on a PR branch?" + exit 1 +fi + +# Parse the pr's remote, branch & add a remote if needed. +PR_RESP=$(curl https://api.github.com/repos/${REMOTE}/hudi/pulls/${PR_NUM}) +if ! echo ${PR_RESP} | jq '.url' | grep "hudi/pulls/${PR_NUM}" ; then + echo "Unable to find PR number ${PR_NUM} in remote ${REMOTE}" + exit 1 +fi + +PR_SSH_URL=$(echo ${PR_RESP} | jq -r '.head.repo.ssh_url') +PR_REMOTE_BRANCH=$(echo ${PR_RESP} | jq -r '.head.ref') +PR_REMOTE=$(echo ${PR_RESP} | jq -r '.head.repo.owner.login') + +# Add a new remote, if needed. +if ! git remote -v | grep ${PR_REMOTE} ; then + echo "Adding new remote ${PR_REMOTE} with ssh url ${PR_SSH_URL}" + git remote add ${PR_REMOTE} ${PR_SSH_URL} +fi + +# Push local branch to PR remote/branch +echo "If you are sure, execute the following command to push" +echo " git push ${PR_REMOTE} ${CURR_BRANCH}:${PR_REMOTE_BRANCH}"