diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java index cdd3fa526b914..79f96d829b27e 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java @@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata; import org.apache.hudi.avro.model.HoodieCleanerPlan; import org.apache.hudi.avro.model.HoodieRollbackMetadata; +import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.client.utils.MetadataConversionUtils; import org.apache.hudi.common.HoodieCleanStat; import org.apache.hudi.common.fs.HoodieWrapperFileSystem; @@ -96,6 +97,7 @@ public void testArchiveEmptyTable() throws IOException { HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) .withParallelism(2, 2).forTable("test-trip-table").build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table); @@ -109,7 +111,7 @@ public void testArchiveTableWithArchival() throws IOException { .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 4).build()) .forTable("test-trip-table").build(); - HoodieTestUtils.init(hadoopConf, basePath); + initMetadataTable(cfg); // Requested Compaction HoodieTestDataGenerator.createCompactionAuxiliaryMetadata(basePath, new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100"), wrapperFs.getConf()); @@ -235,6 +237,7 @@ public void testArchiveTableWithNoArchival() throws IOException { .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table); @@ -301,6 +304,7 @@ public void testArchiveCommitSafety() throws IOException { .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table); @@ -328,7 +332,7 @@ public void testArchiveCommitSavepointNoHole() throws IOException { .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()) .build(); - + initMetadataTable(cfg); HoodieTestDataGenerator.createCommitFile(basePath, "100", wrapperFs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "101", wrapperFs.getConf()); HoodieTestDataGenerator.createSavepointFile(basePath, "101", wrapperFs.getConf()); @@ -336,6 +340,7 @@ public void testArchiveCommitSavepointNoHole() throws IOException { HoodieTestDataGenerator.createCommitFile(basePath, "103", wrapperFs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "104", wrapperFs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "105", wrapperFs.getConf()); + HoodieTable table = HoodieSparkTable.create(cfg, context); HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table); @@ -359,6 +364,7 @@ public void testArchiveRollbacks() throws IOException { .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()) .build(); + initMetadataTable(cfg); createCommitAndRollbackFile("100", "101", false); createCommitAndRollbackFile("102", "103", false); @@ -388,6 +394,8 @@ public void testArchiveCommitCompactionNoHole() throws IOException { .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()) .build(); + initMetadataTable(cfg); + HoodieTestDataGenerator.createCommitFile(basePath, "100", wrapperFs.getConf()); HoodieTestDataGenerator.createCompactionRequestedFile(basePath, "101", wrapperFs.getConf()); HoodieTestDataGenerator.createCompactionAuxiliaryMetadata(basePath, @@ -436,6 +444,7 @@ public void testArchiveCommitTimeline() throws IOException { .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); HoodieTestDataGenerator.createCommitFile(basePath, "1", wrapperFs.getConf()); @@ -488,6 +497,7 @@ public void testArchiveCompletedClean() throws IOException { .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); createCleanMetadata("10", false); @@ -513,6 +523,7 @@ public void testArchiveCompletedRollback() throws IOException { .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); createCommitAndRollbackFile("6", "10", false); @@ -543,6 +554,7 @@ public void testArchiveCompletedShouldRetainMinInstantsIfInstantsGreaterThanMaxt .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minInstants, maxInstants).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); for (int i = 0; i < maxInstants + 2; i++) { createCleanMetadata(i + "", false); @@ -564,6 +576,7 @@ public void testArchiveCompletedShouldNotArchiveIfInstantsLessThanMaxtoKeep() th .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minInstants, maxInstants).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); for (int i = 0; i < maxInstants; i++) { createCleanMetadata(i + "", false); @@ -585,6 +598,7 @@ public void testArchiveCompletedRollbackAndClean() throws IOException { .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minInstantsToKeep, maxInstantsToKeep).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); int startInstant = 1; @@ -618,6 +632,7 @@ public void testArchiveInflightClean() throws IOException { .withParallelism(2, 2).forTable("test-trip-table") .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()) .build(); + initMetadataTable(cfg); metaClient = HoodieTableMetaClient.reload(metaClient); createCleanMetadata("10", false); @@ -677,4 +692,11 @@ private HoodieInstant createRollbackMetadata(String rollbackTime, String commitT } return new HoodieInstant(inflight, "rollback", rollbackTime); } + + private void initMetadataTable(HoodieWriteConfig writeConfig) { + // Init the metadata table, so that commits/instants created externally can get synced later. Without + // instants getting synced, archival of those instants will not trigger. + SparkRDDWriteClient client = new SparkRDDWriteClient(context, writeConfig); + client.syncTableMetadata(); + } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java index 56f4eeef3402a..fc5885677a744 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java @@ -19,6 +19,7 @@ package org.apache.hudi.io.storage.row; import org.apache.hudi.client.HoodieInternalWriteStatus; +import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieWriteStat; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; @@ -168,11 +169,15 @@ public void testGlobalFailure() throws Exception { @Test public void testInstantiationFailure() throws IOException { // init config and table - HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).withPath("/dummypath/abc/").build(); + HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder("/dummypath/abc/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .enable(false) + .build()) + .build(); HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); try { - new HoodieRowCreateHandle(table, cfg, " def", UUID.randomUUID().toString(), "001", RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE); + new HoodieRowCreateHandle(table, cfg, "def", UUID.randomUUID().toString(), "001", RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE); fail("Should have thrown exception"); } catch (HoodieInsertException ioe) { // expected diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java index 5bc293a4630ce..6ab66a663987d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java @@ -1079,8 +1079,15 @@ public void testLogFileCountsAfterCompaction(boolean populateMetaFields) throws HoodieTable table = HoodieSparkTable.create(config, context, metaClient); HoodieSparkWriteableTestTable.of(table, HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS) .withLogAppends(updatedRecords); - // In writeRecordsToLogFiles, no commit files are getting added, so resetting file-system view state - ((SyncableFileSystemView) (table.getSliceView())).reset(); + + // Mark 2nd delta-instant as completed + metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, + HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime)); + metaClient.getActiveTimeline().saveAsComplete( + new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty()); + + metaClient = HoodieTableMetaClient.reload(metaClient); + table = HoodieSparkTable.create(config, context, metaClient); // Verify that all data file has one log file for (String partitionPath : dataGen.getPartitionPaths()) { @@ -1091,12 +1098,6 @@ public void testLogFileCountsAfterCompaction(boolean populateMetaFields) throws } } - // Mark 2nd delta-instant as completed - metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, - HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime)); - metaClient.getActiveTimeline().saveAsComplete( - new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty()); - // Do a compaction String compactionInstantTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); JavaRDD result = (JavaRDD) writeClient.compact(compactionInstantTime); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 11a75b6c4dfab..7f010fe6227a5 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -40,7 +40,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { // Enable the internal Metadata Table which saves file listings public static final ConfigProperty METADATA_ENABLE_PROP = ConfigProperty .key(METADATA_PREFIX + ".enable") - .defaultValue(false) + .defaultValue(true) .sinceVersion("0.7.0") .withDocumentation("Enable the internal metadata table which serves table metadata like level file listings"); @@ -51,8 +51,6 @@ public final class HoodieMetadataConfig extends HoodieConfig { .sinceVersion("0.7.0") .withDocumentation("Validate contents of metadata table on each access; e.g against the actual listings from lake storage"); - public static final boolean DEFAULT_METADATA_ENABLE_FOR_READERS = false; - // Enable metrics for internal Metadata Table public static final ConfigProperty METADATA_METRICS_ENABLE_PROP = ConfigProperty .key(METADATA_PREFIX + ".metrics.enable") diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java index d94018b88546f..167a519503e67 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java @@ -18,6 +18,7 @@ package org.apache.hudi.hadoop; +import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.engine.HoodieLocalEngineContext; import org.apache.hudi.common.fs.FSUtils; @@ -29,7 +30,6 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.TableNotFoundException; import org.apache.hudi.hadoop.utils.HoodieHiveUtils; -import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; @@ -176,7 +176,7 @@ public boolean accept(Path path) { } fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, - metaClient, HoodieInputFormatUtils.buildMetadataConfig(getConf())); + metaClient, HoodieMetadataConfig.newBuilder().build()); String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder); List latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList()); // populate the cache diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java index f4bddbd392855..4c363ea294ef9 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java @@ -70,10 +70,6 @@ import java.util.function.Function; import java.util.stream.Collectors; -import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; -import static org.apache.hudi.common.config.HoodieMetadataConfig.METADATA_ENABLE_PROP; -import static org.apache.hudi.common.config.HoodieMetadataConfig.METADATA_VALIDATE_PROP; - public class HoodieInputFormatUtils { // These positions have to be deterministic across all tables @@ -416,13 +412,6 @@ public static Map> groupSnapshotPathsByMetaCli return grouped; } - public static HoodieMetadataConfig buildMetadataConfig(Configuration conf) { - return HoodieMetadataConfig.newBuilder() - .enable(conf.getBoolean(METADATA_ENABLE_PROP.key(), DEFAULT_METADATA_ENABLE_FOR_READERS)) - .validate(conf.getBoolean(METADATA_VALIDATE_PROP.key(), METADATA_VALIDATE_PROP.defaultValue())) - .build(); - } - public static List filterFileStatusForSnapshotMode(JobConf job, Map tableMetaClientMap, List snapshotPaths) throws IOException { HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(job); @@ -443,7 +432,8 @@ public static List filterFileStatusForSnapshotMode(JobConf job, Map< HoodieTimeline timeline = HoodieHiveUtils.getTableTimeline(metaClient.getTableConfig().getTableName(), job, metaClient); HoodieTableFileSystemView fsView = fsViewCache.computeIfAbsent(metaClient, tableMetaClient -> - FileSystemViewManager.createInMemoryFileSystemViewWithTimeline(engineContext, tableMetaClient, buildMetadataConfig(job), timeline)); + FileSystemViewManager.createInMemoryFileSystemViewWithTimeline(engineContext, tableMetaClient, + HoodieMetadataConfig.newBuilder().build(), timeline)); List filteredBaseFiles = new ArrayList<>(); for (Path p : entry.getValue()) { String relativePartitionPath = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), p); diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java index f84e3440516dd..0ab2a962df518 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java @@ -18,6 +18,7 @@ package org.apache.hudi.hadoop.utils; +import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.engine.HoodieLocalEngineContext; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.FileSlice; @@ -105,7 +106,7 @@ public static InputSplit[] getRealtimeSplits(Configuration conf, Stream getPartitions(Option partitionsLimit) throws IOE // calls in metrics as they are not part of normal HUDI operation. HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc); List partitionPaths = FSUtils.getAllPartitionPaths(engineContext, metaClient.getBasePath(), - HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false); + HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false); // Sort partition so we can pick last N partitions by default Collections.sort(partitionPaths); if (!partitionPaths.isEmpty()) { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala index f94d228c39b31..a5d81de69e958 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala @@ -124,7 +124,7 @@ case class HoodieFileIndex( // would be able to run SET hoodie.metadata.enable=true in the spark sql session to enable metadata listing. properties.put(HoodieMetadataConfig.METADATA_ENABLE_PROP, sqlConf.getConfString(HoodieMetadataConfig.METADATA_ENABLE_PROP.key(), - HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS.toString)) + HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue().toString)) properties.put(HoodieMetadataConfig.METADATA_VALIDATE_PROP, sqlConf.getConfString(HoodieMetadataConfig.METADATA_VALIDATE_PROP.key(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue().toString)) diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java index 9ae6a05c73bfe..8d5d011217e54 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java @@ -373,7 +373,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta reloadInputFormats(); List records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( jsc.hadoopConfiguration(), - FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false).stream() .map(f -> basePath + "/" + f).collect(Collectors.toList()), basePath, roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, false, new ArrayList<>()); @@ -392,7 +392,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta seenKeys = new HashSet<>(); records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( jsc.hadoopConfiguration(), - FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false).stream() .map(f -> basePath + "/" + f).collect(Collectors.toList()), basePath, rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, false, new ArrayList<>()); @@ -409,7 +409,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta reloadInputFormats(); records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( jsc.hadoopConfiguration(), - FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false).stream() .map(f -> basePath + "/" + f).collect(Collectors.toList()), basePath, roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, @@ -427,7 +427,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta seenKeys = new HashSet<>(); records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( jsc.hadoopConfiguration(), - FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false).stream() .map(f -> basePath + "/" + f).collect(Collectors.toList()), basePath, rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, true, @@ -443,7 +443,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta reloadInputFormats(); records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( jsc.hadoopConfiguration(), - FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false).stream() .map(f -> basePath + "/" + f).collect(Collectors.toList()), basePath, roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, true, @@ -461,7 +461,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta seenKeys = new HashSet<>(); records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( jsc.hadoopConfiguration(), - FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + FSUtils.getAllPartitionPaths(context, basePath, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(), false).stream() .map(f -> basePath + "/" + f).collect(Collectors.toList()), basePath, rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, true, diff --git a/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncConfig.java b/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncConfig.java index 3527966833cfb..a8af46a7bce4e 100644 --- a/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncConfig.java +++ b/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncConfig.java @@ -71,7 +71,7 @@ public class DLASyncConfig implements Serializable { public Boolean useDLASyncHiveStylePartitioning = false; @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata") - public Boolean useFileListingFromMetadata = HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; + public Boolean useFileListingFromMetadata = HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(); @Parameter(names = {"--verify-metadata-file-listing"}, description = "Verify file listing from Hudi's metadata against file system") public Boolean verifyMetadataFileListing = HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(); diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index d9d833d393336..95bbeb04b6d54 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -87,7 +87,7 @@ public class HiveSyncConfig implements Serializable { public Boolean skipROSuffix = false; @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata") - public Boolean useFileListingFromMetadata = HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; + public Boolean useFileListingFromMetadata = HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(); @Parameter(names = {"--verify-metadata-file-listing"}, description = "Verify file listing from Hudi's metadata against file system") public Boolean verifyMetadataFileListing = HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java index d3281ad880c2e..a52ad802b23c1 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java @@ -71,7 +71,7 @@ static class Config implements Serializable { boolean shouldAssumeDatePartitioning = false; @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata") - public Boolean useFileListingFromMetadata = HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; + public Boolean useFileListingFromMetadata = HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(); @Parameter(names = {"--verify-metadata-file-listing"}, description = "Verify file listing from Hudi's metadata against file system") public Boolean verifyMetadataFileListing = HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index eb5f2a803ea2d..5983a05f2cb34 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -306,7 +306,7 @@ public static class Config implements Serializable { public Boolean waitForManualQueries = false; @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata") - public Boolean useFileListingFromMetadata = HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; + public Boolean useFileListingFromMetadata = HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(); @Parameter(names = {"--verify-metadata-file-listing"}, description = "Verify file listing from Hudi's metadata against file system") public Boolean verifyMetadataFileListing = HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue(); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java index 9aa03e2e46d22..9c36a3909689f 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java @@ -69,7 +69,7 @@ public void testEmptySnapshotCopy() throws IOException { // Do the snapshot HoodieSnapshotCopier copier = new HoodieSnapshotCopier(); copier.snapshot(jsc(), basePath, outputPath, true, - HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue()); // Nothing changed; we just bail out @@ -123,7 +123,7 @@ public void testSnapshotCopy() throws Exception { // Do a snapshot copy HoodieSnapshotCopier copier = new HoodieSnapshotCopier(); - copier.snapshot(jsc(), basePath, outputPath, false, HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS, + copier.snapshot(jsc(), basePath, outputPath, false, HoodieMetadataConfig.METADATA_ENABLE_PROP.defaultValue(), HoodieMetadataConfig.METADATA_VALIDATE_PROP.defaultValue()); // Check results diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java index 133205cb8a77b..41439f01087c9 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java @@ -200,6 +200,8 @@ public void testExportDatasetWithNoCommit() throws IOException { public void testExportDatasetWithNoPartition() throws IOException { // delete all source data dfs().delete(new Path(sourcePath + "/" + PARTITION_PATH), true); + // delete the metadata folder if present as metadata will be referring to deleted data + dfs().delete(new Path(sourcePath + "/.hoodie/metadata"), true); // export final Throwable thrown = assertThrows(HoodieSnapshotExporterException.class, () -> {