-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-53] Implementation of a native DFS based index based on the metadata table. #5581
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ | |
| import org.apache.hudi.client.transaction.TransactionManager; | ||
| import org.apache.hudi.client.utils.TransactionUtils; | ||
| import org.apache.hudi.common.HoodiePendingRollbackInfo; | ||
| import org.apache.hudi.common.data.HoodieData; | ||
| import org.apache.hudi.common.engine.HoodieEngineContext; | ||
| import org.apache.hudi.common.model.HoodieCommitMetadata; | ||
| import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; | ||
|
|
@@ -172,13 +173,14 @@ public boolean commit(String instantTime, O writeStatuses, Option<Map<String, St | |
| public abstract boolean commit(String instantTime, O writeStatuses, Option<Map<String, String>> extraMetadata, | ||
| String commitActionType, Map<String, List<String>> partitionToReplacedFileIds); | ||
|
|
||
| public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Option<Map<String, String>> extraMetadata, | ||
| String commitActionType) { | ||
| return commitStats(instantTime, stats, extraMetadata, commitActionType, Collections.emptyMap()); | ||
| public boolean commitStats(String instantTime, HoodieData<WriteStatus> writeStatuses, List<HoodieWriteStat> stats, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just clarifying - WriteStatus contains only deflated HoodieRecord - right? Making sure we dont hold on to the data in memory until we update the metadata table. |
||
| Option<Map<String, String>> extraMetadata, String commitActionType) { | ||
| return commitStats(instantTime, writeStatuses, stats, extraMetadata, commitActionType, Collections.emptyMap()); | ||
| } | ||
|
|
||
| public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Option<Map<String, String>> extraMetadata, | ||
| String commitActionType, Map<String, List<String>> partitionToReplaceFileIds) { | ||
| public boolean commitStats(String instantTime, HoodieData<WriteStatus> writeStatuses, List<HoodieWriteStat> stats, | ||
| Option<Map<String, String>> extraMetadata, String commitActionType, | ||
| Map<String, List<String>> partitionToReplaceFileIds) { | ||
| // Skip the empty commit if not allowed | ||
| if (!config.allowEmptyCommit() && stats.isEmpty()) { | ||
| return true; | ||
|
|
@@ -194,7 +196,7 @@ public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Opti | |
| lastCompletedTxnAndMetadata.isPresent() ? Option.of(lastCompletedTxnAndMetadata.get().getLeft()) : Option.empty()); | ||
| try { | ||
| preCommit(inflightInstant, metadata); | ||
| commit(table, commitActionType, instantTime, metadata, stats); | ||
| commit(table, commitActionType, instantTime, metadata, stats, writeStatuses); | ||
| postCommit(table, metadata, instantTime, extraMetadata); | ||
| LOG.info("Committed " + instantTime); | ||
| releaseResources(); | ||
|
|
@@ -217,22 +219,18 @@ public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Opti | |
| } | ||
|
|
||
| protected void commit(HoodieTable table, String commitActionType, String instantTime, HoodieCommitMetadata metadata, | ||
| List<HoodieWriteStat> stats) throws IOException { | ||
| List<HoodieWriteStat> stats, HoodieData<WriteStatus> writeStatuses) throws IOException { | ||
| LOG.info("Committing " + instantTime + " action " + commitActionType); | ||
| HoodieActiveTimeline activeTimeline = table.getActiveTimeline(); | ||
| // Finalize write | ||
| finalizeWrite(table, instantTime, stats); | ||
| // update Metadata table | ||
| writeTableMetadata(table, instantTime, commitActionType, metadata); | ||
| writeTableMetadata(table, instantTime, commitActionType, metadata, writeStatuses); | ||
| activeTimeline.saveAsComplete(new HoodieInstant(true, commitActionType, instantTime), | ||
| Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); | ||
| } | ||
|
|
||
| protected HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf) { | ||
| return createTable(config, hadoopConf, false); | ||
| } | ||
|
|
||
| protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf, boolean refreshTimeline); | ||
| protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf); | ||
|
|
||
| void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) { | ||
| try { | ||
|
|
@@ -265,9 +263,10 @@ protected void preCommit(HoodieInstant inflightInstant, HoodieCommitMetadata met | |
| * @param actionType action type of the commit. | ||
| * @param metadata instance of {@link HoodieCommitMetadata}. | ||
| */ | ||
| protected void writeTableMetadata(HoodieTable table, String instantTime, String actionType, HoodieCommitMetadata metadata) { | ||
| protected void writeTableMetadata(HoodieTable table, String instantTime, String actionType, HoodieCommitMetadata metadata, | ||
| HoodieData<WriteStatus> writeStatuses) { | ||
| context.setJobStatus(this.getClass().getSimpleName(), "Committing to metadata table"); | ||
| table.getMetadataWriter(instantTime).ifPresent(w -> ((HoodieTableMetadataWriter) w).update(metadata, instantTime, | ||
| table.getMetadataWriter(instantTime).ifPresent(w -> ((HoodieTableMetadataWriter) w).update(metadata, writeStatuses, instantTime, | ||
| table.isTableServiceAction(actionType))); | ||
| } | ||
|
|
||
|
|
@@ -297,7 +296,7 @@ public void bootstrap(Option<Map<String, String>> extraMetadata) { | |
| */ | ||
| public void rollbackFailedBootstrap() { | ||
| LOG.info("Rolling back pending bootstrap if present"); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled()); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf); | ||
| HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction(); | ||
| Option<String> instant = Option.fromJavaOptional( | ||
| inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst()); | ||
|
|
@@ -465,7 +464,7 @@ protected void postCommit(HoodieTable<T, I, K, O> table, HoodieCommitMetadata me | |
|
|
||
| protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) { | ||
| if (config.inlineTableServices()) { | ||
| if (config.isMetadataTableEnabled()) { | ||
| if (table.getMetaClient().getTableConfig().isMetadataTableEnabled()) { | ||
| table.getHoodieView().sync(); | ||
| } | ||
| // Do an inline compaction if enabled | ||
|
|
@@ -529,7 +528,7 @@ protected void autoCleanOnCommit() { | |
| * Run any pending compactions. | ||
| */ | ||
| public void runAnyPendingCompactions() { | ||
| runAnyPendingCompactions(createTable(config, hadoopConf, config.isMetadataTableEnabled())); | ||
| runAnyPendingCompactions(createTable(config, hadoopConf)); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -539,7 +538,7 @@ public void runAnyPendingCompactions() { | |
| * @param comment - Comment for the savepoint | ||
| */ | ||
| public void savepoint(String user, String comment) { | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled()); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf); | ||
| if (table.getCompletedCommitsTimeline().empty()) { | ||
| throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty"); | ||
| } | ||
|
|
@@ -563,7 +562,7 @@ public void savepoint(String user, String comment) { | |
| * @param comment - Comment for the savepoint | ||
| */ | ||
| public void savepoint(String instantTime, String user, String comment) { | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled()); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf); | ||
| table.savepoint(context, instantTime, user, comment); | ||
| } | ||
|
|
||
|
|
@@ -575,7 +574,7 @@ public void savepoint(String instantTime, String user, String comment) { | |
| * @return true if the savepoint was deleted successfully | ||
| */ | ||
| public void deleteSavepoint(String savepointTime) { | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled()); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf); | ||
| SavepointHelpers.deleteSavepoint(table, savepointTime); | ||
| } | ||
|
|
||
|
|
@@ -590,7 +589,7 @@ public void deleteSavepoint(String savepointTime) { | |
| * @return true if the savepoint was restored to successfully | ||
| */ | ||
| public void restoreToSavepoint(String savepointTime) { | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled()); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf); | ||
| SavepointHelpers.validateSavepointPresence(table, savepointTime); | ||
| restoreToInstant(savepointTime); | ||
| SavepointHelpers.validateSavepointRestore(table, savepointTime); | ||
|
|
@@ -673,7 +672,7 @@ public HoodieRestoreMetadata restoreToInstant(final String instantTime) throws H | |
| final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime(); | ||
| Timer.Context timerContext = metrics.getRollbackCtx(); | ||
| try { | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled()); | ||
| HoodieTable<T, I, K, O> table = createTable(config, hadoopConf); | ||
| HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime); | ||
| if (timerContext != null) { | ||
| final long durationInMs = metrics.getDurationInMs(timerContext.stop()); | ||
|
|
@@ -1091,17 +1090,17 @@ private Option<String> scheduleTableServiceInternal(String instantTime, Option<M | |
| switch (tableServiceType) { | ||
| case CLUSTER: | ||
| LOG.info("Scheduling clustering at instant time :" + instantTime); | ||
| Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled()) | ||
| Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf) | ||
| .scheduleClustering(context, instantTime, extraMetadata); | ||
| return clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty(); | ||
| case COMPACT: | ||
| LOG.info("Scheduling compaction at instant time :" + instantTime); | ||
| Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled()) | ||
| Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf) | ||
| .scheduleCompaction(context, instantTime, extraMetadata); | ||
| return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty(); | ||
| case CLEAN: | ||
| LOG.info("Scheduling cleaning at instant time :" + instantTime); | ||
| Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled()) | ||
| Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf) | ||
| .scheduleCleaning(context, instantTime, extraMetadata); | ||
| return cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty(); | ||
| default: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -434,6 +434,15 @@ public class HoodieWriteConfig extends HoodieConfig { | |
| .sinceVersion("0.10.0") | ||
| .withDocumentation("File Id Prefix provider class, that implements `org.apache.hudi.fileid.FileIdPrefixProvider`"); | ||
|
|
||
| /** | ||
| * If a valid location is specified, a copy of the write config is saved before each operation. | ||
| */ | ||
| public static final ConfigProperty<String> CONFIG_EXPORT_DIR = ConfigProperty | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not clear where this is used? Write configs are persisted after each operation? |
||
| .key("hoodie.write.config.save.dir") | ||
| .defaultValue("/user/hudi/runtime_configs/0.10") | ||
| .sinceVersion("0.10.0") | ||
| .withDocumentation("The directory where write configs are saved before each operation."); | ||
|
|
||
| private ConsistencyGuardConfig consistencyGuardConfig; | ||
|
|
||
| // Hoodie Write Client transparently rewrites File System View config when embedded mode is enabled | ||
|
|
@@ -1634,9 +1643,13 @@ public boolean getPushGatewayRandomJobNameSuffix() { | |
| } | ||
|
|
||
| public String getMetricReporterMetricsNamePrefix() { | ||
| return getStringOrDefault(HoodieMetricsConfig.METRICS_REPORTER_PREFIX); | ||
| String prefix = getStringOrDefault(HoodieMetricsConfig.METRICS_REPORTER_PREFIX); | ||
| if (prefix.isEmpty()) { | ||
| prefix = getTableName(); | ||
| } | ||
| return prefix; | ||
| } | ||
|
|
||
| /** | ||
| * memory configs. | ||
| */ | ||
|
|
@@ -1839,6 +1852,36 @@ public String getFileIdPrefixProviderClassName() { | |
| return getString(FILEID_PREFIX_PROVIDER_CLASS); | ||
| } | ||
|
|
||
| /** | ||
| * Record index configs. | ||
| */ | ||
| public boolean createRecordIndex() { | ||
| return metadataConfig.createRecordIndex(); | ||
| } | ||
|
|
||
| public int getRecordIndexMinFileGroupCount() { | ||
| return metadataConfig.getRecordIndexMinFileGroupCount(); | ||
| } | ||
|
|
||
| public int getRecordIndexMaxFileGroupCount() { | ||
| return metadataConfig.getRecordIndexMaxFileGroupCount(); | ||
| } | ||
|
|
||
| public float getRecordIndexGrowthFactor() { | ||
| return metadataConfig.getRecordIndexGrowthFactor(); | ||
| } | ||
|
|
||
| public long getMaxMetadataFileGroupSizeBytes() { | ||
| return metadataConfig.getMaxFileGroupSizeBytes(); | ||
| } | ||
|
|
||
| /** | ||
| * Directory where write config should be exported before each operation. | ||
| */ | ||
| public String getConfigExportDir() { | ||
| return getString(CONFIG_EXPORT_DIR); | ||
| } | ||
|
|
||
| public static class Builder { | ||
|
|
||
| protected final HoodieWriteConfig writeConfig = new HoodieWriteConfig(); | ||
|
|
@@ -2195,6 +2238,11 @@ public Builder withProperties(Properties properties) { | |
| return this; | ||
| } | ||
|
|
||
| public Builder withConfigExportDir(String dir) { | ||
| writeConfig.setValue(CONFIG_EXPORT_DIR, dir); | ||
| return this; | ||
| } | ||
|
|
||
| protected void setDefaults() { | ||
| writeConfig.setDefaultValue(MARKERS_TYPE, getDefaultMarkersType(engineType)); | ||
| // Check for mandatory properties | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is init being removed?