-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-1740] Fix insert-overwrite API archival #2784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
80a46cb
d3d5b40
49cfe0d
5572b9f
e2e7870
b2c037a
e491d1c
917fa7c
09ac4b4
cc724b8
ab27482
de8fbc5
d938e0a
7d05046
af963ac
2327417
a285561
39c062d
6bfc9fb
de884c6
e47c38f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
| import com.fasterxml.jackson.databind.DeserializationFeature; | ||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
| import java.io.IOException; | ||
|
|
||
| import org.apache.hudi.avro.model.HoodieArchivedMetaEntry; | ||
| import org.apache.hudi.avro.model.HoodieCompactionPlan; | ||
| import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata; | ||
|
|
@@ -37,8 +38,8 @@ | |
| import org.apache.hudi.common.table.timeline.HoodieTimeline; | ||
| import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; | ||
| import org.apache.hudi.common.util.CleanerUtils; | ||
| import org.apache.hudi.common.util.ClusteringUtils; | ||
| import org.apache.hudi.common.util.CompactionUtils; | ||
| import org.apache.hudi.common.util.Option; | ||
|
|
||
| /** | ||
| * Helper class to convert between different action related payloads and {@link HoodieArchivedMetaEntry}. | ||
|
|
@@ -72,10 +73,21 @@ public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInst | |
| HoodieReplaceCommitMetadata replaceCommitMetadata = HoodieReplaceCommitMetadata | ||
| .fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieReplaceCommitMetadata.class); | ||
| archivedMetaWrapper.setHoodieReplaceCommitMetadata(ReplaceArchivalHelper.convertReplaceCommitMetadata(replaceCommitMetadata)); | ||
| } else if (hoodieInstant.isInflight()) { | ||
| // inflight replacecommit files have the same meta data body as HoodieCommitMetadata | ||
ssdong marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // so we could re-use it without further creating an inflight extension. | ||
| // Or inflight replacecommit files are empty under clustering circumstance | ||
| Option<HoodieCommitMetadata> inflightCommitMetadata = getInflightReplaceMetadata(metaClient, hoodieInstant); | ||
| if (inflightCommitMetadata.isPresent()) { | ||
| archivedMetaWrapper.setHoodieInflightReplaceMetadata(convertCommitMetadata(inflightCommitMetadata.get())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is setHoodieInflightReplaceMetadata function? |
||
| } | ||
| } else { | ||
| HoodieRequestedReplaceMetadata requestedReplaceMetadata = | ||
| ClusteringUtils.getRequestedReplaceMetadata(metaClient, hoodieInstant).get(); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
| archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata); | ||
| // we may have cases with empty HoodieRequestedReplaceMetadata e.g. insert_overwrite_table or insert_overwrite | ||
| // without clustering. However, we should revisit the requested commit file standardization | ||
| Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata = getRequestedReplaceMetadata(metaClient, hoodieInstant); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about have a replaceUtils? I think maybe it is a good timing to decouple logic of replace commit from clustering
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| if (requestedReplaceMetadata.isPresent()) { | ||
| archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata.get()); | ||
| } | ||
| } | ||
| archivedMetaWrapper.setActionType(ActionType.replacecommit.name()); | ||
| break; | ||
|
|
@@ -107,14 +119,25 @@ public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInst | |
| return archivedMetaWrapper; | ||
| } | ||
|
|
||
| public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInstant, | ||
| HoodieCommitMetadata hoodieCommitMetadata) { | ||
| HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry(); | ||
| archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp()); | ||
| archivedMetaWrapper.setActionState(hoodieInstant.getState().name()); | ||
| archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(hoodieCommitMetadata)); | ||
| archivedMetaWrapper.setActionType(ActionType.commit.name()); | ||
| return archivedMetaWrapper; | ||
| public static Option<HoodieCommitMetadata> getInflightReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant instant) throws IOException { | ||
ssdong marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Option<byte[]> inflightContent = metaClient.getActiveTimeline().getInstantDetails(instant); | ||
| if (!inflightContent.isPresent() || inflightContent.get().length == 0) { | ||
| // inflight files can be empty in some certain cases, e.g. when users opt in clustering | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @satishkotha what is the original reason for this? |
||
| return Option.empty(); | ||
| } | ||
| return Option.of(HoodieCommitMetadata.fromBytes(inflightContent.get(), HoodieCommitMetadata.class)); | ||
| } | ||
|
|
||
| public static Option<HoodieRequestedReplaceMetadata> getRequestedReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant instant) throws IOException { | ||
ssdong marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Option<byte[]> requestedContent = metaClient.getActiveTimeline().getInstantDetails(instant); | ||
| if (!requestedContent.isPresent() || requestedContent.get().length == 0) { | ||
| // requested commit files can be empty in some certain cases, e.g. insert_overwrite or insert_overwrite_table. | ||
| // However, it appears requested files are supposed to contain meta data and we should revisit the standardization | ||
| // of requested commit files | ||
| // TODO revisit requested commit file standardization https://issues.apache.org/jira/browse/HUDI-1739 | ||
| return Option.empty(); | ||
| } | ||
| return Option.of(TimelineMetadataUtils.deserializeRequestedReplaceMetadata(requestedContent.get())); | ||
| } | ||
|
|
||
| public static org.apache.hudi.avro.model.HoodieCommitMetadata convertCommitMetadata( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.